1
2/*--------------------------------------------------------------------*/
3/*--- begin                                       guest_arm_toIR.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2013 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36/* XXXX thumb to check:
37   that all cases where putIRegT writes r15, we generate a jump.
38
39   All uses of newTemp assign to an IRTemp and not a UInt
40
41   For all thumb loads and stores, including VFP ones, new-ITSTATE is
42   backed out before the memory op, and restored afterwards.  This
43   needs to happen even after we go uncond.  (and for sure it doesn't
44   happen for VFP loads/stores right now).
45
46   VFP on thumb: check that we exclude all r13/r15 cases that we
47   should.
48
49   XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
50   taking into account the number of insns guarded by an IT.
51
52   remove the nasty hack, in the spechelper, of looking for Or32(...,
53   0xE0) in as the first arg to armg_calculate_condition, and instead
54   use Slice44 as specified in comments in the spechelper.
55
56   add specialisations for armg_calculate_flag_c and _v, as they
57   are moderately often needed in Thumb code.
58
59   Correctness: ITSTATE handling in Thumb SVCs is wrong.
60
61   Correctness (obscure): in m_transtab, when invalidating code
62   address ranges, invalidate up to 18 bytes after the end of the
63   range.  This is because the ITSTATE optimisation at the top of
64   _THUMB_WRK below analyses up to 18 bytes before the start of any
65   given instruction, and so might depend on the invalidated area.
66*/
67
68/* Limitations, etc
69
70   - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
71     These instructions are non-restartable in the case where the
72     transfer(s) fault.
73
74   - SWP: the restart jump back is Ijk_Boring; it should be
75     Ijk_NoRedir but that's expensive.  See comments on casLE() in
76     guest_x86_toIR.c.
77*/
78
79/* "Special" instructions.
80
81   This instruction decoder can decode four special instructions
82   which mean nothing natively (are no-ops as far as regs/mem are
83   concerned) but have meaning for supporting Valgrind.  A special
84   instruction is flagged by a 16-byte preamble:
85
86      E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
87      (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
88       mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
89
90   Following that, one of the following 3 are allowed
91   (standard interpretation in parentheses):
92
93      E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
94      E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
95      E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
96      E1899009 (orr r9,r9,r9)      IR injection
97
98   Any other bytes following the 16-byte preamble are illegal and
99   constitute a failure in instruction decoding.  This all assumes
100   that the preamble will never occur except in specific code
101   fragments designed for Valgrind to catch.
102*/
103
104/* Translates ARM(v5) code to IR. */
105
106#include "libvex_basictypes.h"
107#include "libvex_ir.h"
108#include "libvex.h"
109#include "libvex_guest_arm.h"
110
111#include "main_util.h"
112#include "main_globals.h"
113#include "guest_generic_bb_to_IR.h"
114#include "guest_arm_defs.h"
115
116
117/*------------------------------------------------------------*/
118/*--- Globals                                              ---*/
119/*------------------------------------------------------------*/
120
121/* These are set at the start of the translation of a instruction, so
122   that we don't have to pass them around endlessly.  CONST means does
123   not change during translation of the instruction.
124*/
125
126/* CONST: what is the host's endianness?  This has to do with float vs
127   double register accesses on VFP, but it's complex and not properly
128   thought out. */
129static VexEndness host_endness;
130
131/* CONST: The guest address for the instruction currently being
132   translated.  This is the real, "decoded" address (not subject
133   to the CPSR.T kludge). */
134static Addr32 guest_R15_curr_instr_notENC;
135
136/* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
137   insn is Thumb (True) or ARM (False). */
138static Bool __curr_is_Thumb;
139
140/* MOD: The IRSB* into which we're generating code. */
141static IRSB* irsb;
142
143/* These are to do with handling writes to r15.  They are initially
144   set at the start of disInstr_ARM_WRK to indicate no update,
145   possibly updated during the routine, and examined again at the end.
146   If they have been set to indicate a r15 update then a jump is
147   generated.  Note, "explicit" jumps (b, bx, etc) are generated
148   directly, not using this mechanism -- this is intended to handle
149   the implicit-style jumps resulting from (eg) assigning to r15 as
150   the result of insns we wouldn't normally consider branchy. */
151
152/* MOD.  Initially False; set to True iff abovementioned handling is
153   required. */
154static Bool r15written;
155
156/* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
157   is conditional, this holds the gating IRTemp :: Ity_I32.  If the
158   branch to be generated is unconditional, this remains
159   IRTemp_INVALID. */
160static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
161
162/* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
163   this holds the jump kind. */
164static IRTemp r15kind;
165
166
167/*------------------------------------------------------------*/
168/*--- Debugging output                                     ---*/
169/*------------------------------------------------------------*/
170
171#define DIP(format, args...)           \
172   if (vex_traceflags & VEX_TRACE_FE)  \
173      vex_printf(format, ## args)
174
175#define DIS(buf, format, args...)      \
176   if (vex_traceflags & VEX_TRACE_FE)  \
177      vex_sprintf(buf, format, ## args)
178
179#define ASSERT_IS_THUMB \
180   do { vassert(__curr_is_Thumb); } while (0)
181
182#define ASSERT_IS_ARM \
183   do { vassert(! __curr_is_Thumb); } while (0)
184
185
186/*------------------------------------------------------------*/
187/*--- Helper bits and pieces for deconstructing the        ---*/
188/*--- arm insn stream.                                     ---*/
189/*------------------------------------------------------------*/
190
191/* Do a little-endian load of a 32-bit word, regardless of the
192   endianness of the underlying host. */
193static inline UInt getUIntLittleEndianly ( const UChar* p )
194{
195   UInt w = 0;
196   w = (w << 8) | p[3];
197   w = (w << 8) | p[2];
198   w = (w << 8) | p[1];
199   w = (w << 8) | p[0];
200   return w;
201}
202
203/* Do a little-endian load of a 16-bit word, regardless of the
204   endianness of the underlying host. */
205static inline UShort getUShortLittleEndianly ( const UChar* p )
206{
207   UShort w = 0;
208   w = (w << 8) | p[1];
209   w = (w << 8) | p[0];
210   return w;
211}
212
213static UInt ROR32 ( UInt x, UInt sh ) {
214   vassert(sh >= 0 && sh < 32);
215   if (sh == 0)
216      return x;
217   else
218      return (x << (32-sh)) | (x >> sh);
219}
220
221static Int popcount32 ( UInt x )
222{
223   Int res = 0, i;
224   for (i = 0; i < 32; i++) {
225      res += (x & 1);
226      x >>= 1;
227   }
228   return res;
229}
230
231static UInt setbit32 ( UInt x, Int ix, UInt b )
232{
233   UInt mask = 1 << ix;
234   x &= ~mask;
235   x |= ((b << ix) & mask);
236   return x;
237}
238
239#define BITS2(_b1,_b0) \
240   (((_b1) << 1) | (_b0))
241
242#define BITS3(_b2,_b1,_b0)                      \
243  (((_b2) << 2) | ((_b1) << 1) | (_b0))
244
245#define BITS4(_b3,_b2,_b1,_b0) \
246   (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
247
248#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
249   ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
250    | BITS4((_b3),(_b2),(_b1),(_b0)))
251
252#define BITS5(_b4,_b3,_b2,_b1,_b0)  \
253   (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
254#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
255   (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
256#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
257   (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
258
259#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
260   (((_b8) << 8) \
261    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
262
263#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
264   (((_b9) << 9) | ((_b8) << 8)                                \
265    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
266
267/* produces _uint[_bMax:_bMin] */
268#define SLICE_UInt(_uint,_bMax,_bMin) \
269   (( ((UInt)(_uint)) >> (_bMin)) \
270    & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
271
272
273/*------------------------------------------------------------*/
274/*--- Helper bits and pieces for creating IR fragments.    ---*/
275/*------------------------------------------------------------*/
276
277static IRExpr* mkU64 ( ULong i )
278{
279   return IRExpr_Const(IRConst_U64(i));
280}
281
282static IRExpr* mkU32 ( UInt i )
283{
284   return IRExpr_Const(IRConst_U32(i));
285}
286
287static IRExpr* mkU8 ( UInt i )
288{
289   vassert(i < 256);
290   return IRExpr_Const(IRConst_U8( (UChar)i ));
291}
292
293static IRExpr* mkexpr ( IRTemp tmp )
294{
295   return IRExpr_RdTmp(tmp);
296}
297
298static IRExpr* unop ( IROp op, IRExpr* a )
299{
300   return IRExpr_Unop(op, a);
301}
302
303static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
304{
305   return IRExpr_Binop(op, a1, a2);
306}
307
308static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
309{
310   return IRExpr_Triop(op, a1, a2, a3);
311}
312
313static IRExpr* loadLE ( IRType ty, IRExpr* addr )
314{
315   return IRExpr_Load(Iend_LE, ty, addr);
316}
317
318/* Add a statement to the list held by "irbb". */
319static void stmt ( IRStmt* st )
320{
321   addStmtToIRSB( irsb, st );
322}
323
324static void assign ( IRTemp dst, IRExpr* e )
325{
326   stmt( IRStmt_WrTmp(dst, e) );
327}
328
329static void storeLE ( IRExpr* addr, IRExpr* data )
330{
331   stmt( IRStmt_Store(Iend_LE, addr, data) );
332}
333
334static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
335{
336   if (guardT == IRTemp_INVALID) {
337      /* unconditional */
338      storeLE(addr, data);
339   } else {
340      stmt( IRStmt_StoreG(Iend_LE, addr, data,
341                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
342   }
343}
344
345static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
346                            IRExpr* addr, IRExpr* alt,
347                            IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
348{
349   if (guardT == IRTemp_INVALID) {
350      /* unconditional */
351      IRExpr* loaded = NULL;
352      switch (cvt) {
353         case ILGop_Ident32:
354            loaded = loadLE(Ity_I32, addr); break;
355         case ILGop_8Uto32:
356            loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
357         case ILGop_8Sto32:
358            loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
359         case ILGop_16Uto32:
360            loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
361         case ILGop_16Sto32:
362            loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
363         default:
364            vassert(0);
365      }
366      vassert(loaded != NULL);
367      assign(dst, loaded);
368   } else {
369      /* Generate a guarded load into 'dst', but apply 'cvt' to the
370         loaded data before putting the data in 'dst'.  If the load
371         does not take place, 'alt' is placed directly in 'dst'. */
372      stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
373                         binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
374   }
375}
376
377/* Generate a new temporary of the given type. */
378static IRTemp newTemp ( IRType ty )
379{
380   vassert(isPlausibleIRType(ty));
381   return newIRTemp( irsb->tyenv, ty );
382}
383
384/* Produces a value in 0 .. 3, which is encoded as per the type
385   IRRoundingMode. */
386static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
387{
388   return mkU32(Irrm_NEAREST);
389}
390
391/* Generate an expression for SRC rotated right by ROT. */
392static IRExpr* genROR32( IRTemp src, Int rot )
393{
394   vassert(rot >= 0 && rot < 32);
395   if (rot == 0)
396      return mkexpr(src);
397   return
398      binop(Iop_Or32,
399            binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
400            binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
401}
402
403static IRExpr* mkU128 ( ULong i )
404{
405   return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
406}
407
408/* Generate a 4-aligned version of the given expression if
409   the given condition is true.  Else return it unchanged. */
410static IRExpr* align4if ( IRExpr* e, Bool b )
411{
412   if (b)
413      return binop(Iop_And32, e, mkU32(~3));
414   else
415      return e;
416}
417
418
419/*------------------------------------------------------------*/
420/*--- Helpers for accessing guest registers.               ---*/
421/*------------------------------------------------------------*/
422
423#define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
424#define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
425#define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
426#define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
427#define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
428#define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
429#define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
430#define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
431#define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
432#define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
433#define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
434#define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
435#define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
436#define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
437#define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
438#define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
439
440#define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
441#define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
442#define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
443#define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
444#define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
445
446#define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
447#define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
448#define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
449#define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
450#define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
451#define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
452#define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
453#define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
454#define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
455#define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
456#define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
457#define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
458#define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
459#define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
460#define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
461#define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
462#define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
463#define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
464#define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
465#define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
466#define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
467#define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
468#define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
469#define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
470#define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
471#define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
472#define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
473#define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
474#define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
475#define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
476#define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
477#define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
478
479#define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
480#define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
481#define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
482#define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
483#define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
484#define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
485#define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
486#define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
487
488#define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
489#define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
490
491
492/* ---------------- Integer registers ---------------- */
493
494static Int integerGuestRegOffset ( UInt iregNo )
495{
496   /* Do we care about endianness here?  We do if sub-parts of integer
497      registers are accessed, but I don't think that ever happens on
498      ARM. */
499   switch (iregNo) {
500      case 0:  return OFFB_R0;
501      case 1:  return OFFB_R1;
502      case 2:  return OFFB_R2;
503      case 3:  return OFFB_R3;
504      case 4:  return OFFB_R4;
505      case 5:  return OFFB_R5;
506      case 6:  return OFFB_R6;
507      case 7:  return OFFB_R7;
508      case 8:  return OFFB_R8;
509      case 9:  return OFFB_R9;
510      case 10: return OFFB_R10;
511      case 11: return OFFB_R11;
512      case 12: return OFFB_R12;
513      case 13: return OFFB_R13;
514      case 14: return OFFB_R14;
515      case 15: return OFFB_R15T;
516      default: vassert(0);
517   }
518}
519
520/* Plain ("low level") read from a reg; no +8 offset magic for r15. */
521static IRExpr* llGetIReg ( UInt iregNo )
522{
523   vassert(iregNo < 16);
524   return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
525}
526
527/* Architected read from a reg in ARM mode.  This automagically adds 8
528   to all reads of r15. */
529static IRExpr* getIRegA ( UInt iregNo )
530{
531   IRExpr* e;
532   ASSERT_IS_ARM;
533   vassert(iregNo < 16);
534   if (iregNo == 15) {
535      /* If asked for r15, don't read the guest state value, as that
536         may not be up to date in the case where loop unrolling has
537         happened, because the first insn's write to the block is
538         omitted; hence in the 2nd and subsequent unrollings we don't
539         have a correct value in guest r15.  Instead produce the
540         constant that we know would be produced at this point. */
541      vassert(0 == (guest_R15_curr_instr_notENC & 3));
542      e = mkU32(guest_R15_curr_instr_notENC + 8);
543   } else {
544      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
545   }
546   return e;
547}
548
549/* Architected read from a reg in Thumb mode.  This automagically adds
550   4 to all reads of r15. */
551static IRExpr* getIRegT ( UInt iregNo )
552{
553   IRExpr* e;
554   ASSERT_IS_THUMB;
555   vassert(iregNo < 16);
556   if (iregNo == 15) {
557      /* Ditto comment in getIReg. */
558      vassert(0 == (guest_R15_curr_instr_notENC & 1));
559      e = mkU32(guest_R15_curr_instr_notENC + 4);
560   } else {
561      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
562   }
563   return e;
564}
565
566/* Plain ("low level") write to a reg; no jump or alignment magic for
567   r15. */
568static void llPutIReg ( UInt iregNo, IRExpr* e )
569{
570   vassert(iregNo < 16);
571   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
572   stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
573}
574
575/* Architected write to an integer register in ARM mode.  If it is to
576   r15, record info so at the end of this insn's translation, a branch
577   to it can be made.  Also handles conditional writes to the
578   register: if guardT == IRTemp_INVALID then the write is
579   unconditional.  If writing r15, also 4-align it. */
580static void putIRegA ( UInt       iregNo,
581                       IRExpr*    e,
582                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
583                       IRJumpKind jk /* if a jump is generated */ )
584{
585   /* if writing r15, force e to be 4-aligned. */
586   // INTERWORKING FIXME.  this needs to be relaxed so that
587   // puts caused by LDMxx which load r15 interwork right.
588   // but is no aligned too relaxed?
589   //if (iregNo == 15)
590   //   e = binop(Iop_And32, e, mkU32(~3));
591   ASSERT_IS_ARM;
592   /* So, generate either an unconditional or a conditional write to
593      the reg. */
594   if (guardT == IRTemp_INVALID) {
595      /* unconditional write */
596      llPutIReg( iregNo, e );
597   } else {
598      llPutIReg( iregNo,
599                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
600                             e, llGetIReg(iregNo) ));
601   }
602   if (iregNo == 15) {
603      // assert against competing r15 updates.  Shouldn't
604      // happen; should be ruled out by the instr matching
605      // logic.
606      vassert(r15written == False);
607      vassert(r15guard   == IRTemp_INVALID);
608      vassert(r15kind    == Ijk_Boring);
609      r15written = True;
610      r15guard   = guardT;
611      r15kind    = jk;
612   }
613}
614
615
616/* Architected write to an integer register in Thumb mode.  Writes to
617   r15 are not allowed.  Handles conditional writes to the register:
618   if guardT == IRTemp_INVALID then the write is unconditional. */
619static void putIRegT ( UInt       iregNo,
620                       IRExpr*    e,
621                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
622{
623   /* So, generate either an unconditional or a conditional write to
624      the reg. */
625   ASSERT_IS_THUMB;
626   vassert(iregNo >= 0 && iregNo <= 14);
627   if (guardT == IRTemp_INVALID) {
628      /* unconditional write */
629      llPutIReg( iregNo, e );
630   } else {
631      llPutIReg( iregNo,
632                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
633                             e, llGetIReg(iregNo) ));
634   }
635}
636
637
638/* Thumb16 and Thumb32 only.
639   Returns true if reg is 13 or 15.  Implements the BadReg
640   predicate in the ARM ARM. */
641static Bool isBadRegT ( UInt r )
642{
643   vassert(r <= 15);
644   ASSERT_IS_THUMB;
645   return r == 13 || r == 15;
646}
647
648
649/* ---------------- Double registers ---------------- */
650
651static Int doubleGuestRegOffset ( UInt dregNo )
652{
653   /* Do we care about endianness here?  Probably do if we ever get
654      into the situation of dealing with the single-precision VFP
655      registers. */
656   switch (dregNo) {
657      case 0:  return OFFB_D0;
658      case 1:  return OFFB_D1;
659      case 2:  return OFFB_D2;
660      case 3:  return OFFB_D3;
661      case 4:  return OFFB_D4;
662      case 5:  return OFFB_D5;
663      case 6:  return OFFB_D6;
664      case 7:  return OFFB_D7;
665      case 8:  return OFFB_D8;
666      case 9:  return OFFB_D9;
667      case 10: return OFFB_D10;
668      case 11: return OFFB_D11;
669      case 12: return OFFB_D12;
670      case 13: return OFFB_D13;
671      case 14: return OFFB_D14;
672      case 15: return OFFB_D15;
673      case 16: return OFFB_D16;
674      case 17: return OFFB_D17;
675      case 18: return OFFB_D18;
676      case 19: return OFFB_D19;
677      case 20: return OFFB_D20;
678      case 21: return OFFB_D21;
679      case 22: return OFFB_D22;
680      case 23: return OFFB_D23;
681      case 24: return OFFB_D24;
682      case 25: return OFFB_D25;
683      case 26: return OFFB_D26;
684      case 27: return OFFB_D27;
685      case 28: return OFFB_D28;
686      case 29: return OFFB_D29;
687      case 30: return OFFB_D30;
688      case 31: return OFFB_D31;
689      default: vassert(0);
690   }
691}
692
693/* Plain ("low level") read from a VFP Dreg. */
694static IRExpr* llGetDReg ( UInt dregNo )
695{
696   vassert(dregNo < 32);
697   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
698}
699
700/* Architected read from a VFP Dreg. */
701static IRExpr* getDReg ( UInt dregNo ) {
702   return llGetDReg( dregNo );
703}
704
705/* Plain ("low level") write to a VFP Dreg. */
706static void llPutDReg ( UInt dregNo, IRExpr* e )
707{
708   vassert(dregNo < 32);
709   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
710   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
711}
712
713/* Architected write to a VFP Dreg.  Handles conditional writes to the
714   register: if guardT == IRTemp_INVALID then the write is
715   unconditional. */
716static void putDReg ( UInt    dregNo,
717                      IRExpr* e,
718                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
719{
720   /* So, generate either an unconditional or a conditional write to
721      the reg. */
722   if (guardT == IRTemp_INVALID) {
723      /* unconditional write */
724      llPutDReg( dregNo, e );
725   } else {
726      llPutDReg( dregNo,
727                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
728                             e, llGetDReg(dregNo) ));
729   }
730}
731
732/* And now exactly the same stuff all over again, but this time
733   taking/returning I64 rather than F64, to support 64-bit Neon
734   ops. */
735
736/* Plain ("low level") read from a Neon Integer Dreg. */
737static IRExpr* llGetDRegI64 ( UInt dregNo )
738{
739   vassert(dregNo < 32);
740   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
741}
742
743/* Architected read from a Neon Integer Dreg. */
744static IRExpr* getDRegI64 ( UInt dregNo ) {
745   return llGetDRegI64( dregNo );
746}
747
748/* Plain ("low level") write to a Neon Integer Dreg. */
749static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
750{
751   vassert(dregNo < 32);
752   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
753   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
754}
755
756/* Architected write to a Neon Integer Dreg.  Handles conditional
757   writes to the register: if guardT == IRTemp_INVALID then the write
758   is unconditional. */
759static void putDRegI64 ( UInt    dregNo,
760                         IRExpr* e,
761                         IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
762{
763   /* So, generate either an unconditional or a conditional write to
764      the reg. */
765   if (guardT == IRTemp_INVALID) {
766      /* unconditional write */
767      llPutDRegI64( dregNo, e );
768   } else {
769      llPutDRegI64( dregNo,
770                    IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
771                                e, llGetDRegI64(dregNo) ));
772   }
773}
774
775/* ---------------- Quad registers ---------------- */
776
777static Int quadGuestRegOffset ( UInt qregNo )
778{
779   /* Do we care about endianness here?  Probably do if we ever get
780      into the situation of dealing with the 64 bit Neon registers. */
781   switch (qregNo) {
782      case 0:  return OFFB_D0;
783      case 1:  return OFFB_D2;
784      case 2:  return OFFB_D4;
785      case 3:  return OFFB_D6;
786      case 4:  return OFFB_D8;
787      case 5:  return OFFB_D10;
788      case 6:  return OFFB_D12;
789      case 7:  return OFFB_D14;
790      case 8:  return OFFB_D16;
791      case 9:  return OFFB_D18;
792      case 10: return OFFB_D20;
793      case 11: return OFFB_D22;
794      case 12: return OFFB_D24;
795      case 13: return OFFB_D26;
796      case 14: return OFFB_D28;
797      case 15: return OFFB_D30;
798      default: vassert(0);
799   }
800}
801
802/* Plain ("low level") read from a Neon Qreg. */
803static IRExpr* llGetQReg ( UInt qregNo )
804{
805   vassert(qregNo < 16);
806   return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
807}
808
809/* Architected read from a Neon Qreg. */
810static IRExpr* getQReg ( UInt qregNo ) {
811   return llGetQReg( qregNo );
812}
813
814/* Plain ("low level") write to a Neon Qreg. */
815static void llPutQReg ( UInt qregNo, IRExpr* e )
816{
817   vassert(qregNo < 16);
818   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
819   stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
820}
821
822/* Architected write to a Neon Qreg.  Handles conditional writes to the
823   register: if guardT == IRTemp_INVALID then the write is
824   unconditional. */
825static void putQReg ( UInt    qregNo,
826                      IRExpr* e,
827                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
828{
829   /* So, generate either an unconditional or a conditional write to
830      the reg. */
831   if (guardT == IRTemp_INVALID) {
832      /* unconditional write */
833      llPutQReg( qregNo, e );
834   } else {
835      llPutQReg( qregNo,
836                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
837                             e, llGetQReg(qregNo) ));
838   }
839}
840
841
842/* ---------------- Float registers ---------------- */
843
844static Int floatGuestRegOffset ( UInt fregNo )
845{
846   /* Start with the offset of the containing double, and then correct
847      for endianness.  Actually this is completely bogus and needs
848      careful thought. */
849   Int off;
850   vassert(fregNo < 32);
851   off = doubleGuestRegOffset(fregNo >> 1);
852   if (host_endness == VexEndnessLE) {
853      if (fregNo & 1)
854         off += 4;
855   } else {
856      vassert(0);
857   }
858   return off;
859}
860
861/* Plain ("low level") read from a VFP Freg. */
862static IRExpr* llGetFReg ( UInt fregNo )
863{
864   vassert(fregNo < 32);
865   return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
866}
867
868/* Architected read from a VFP Freg. */
869static IRExpr* getFReg ( UInt fregNo ) {
870   return llGetFReg( fregNo );
871}
872
873/* Plain ("low level") write to a VFP Freg. */
874static void llPutFReg ( UInt fregNo, IRExpr* e )
875{
876   vassert(fregNo < 32);
877   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
878   stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
879}
880
881/* Architected write to a VFP Freg.  Handles conditional writes to the
882   register: if guardT == IRTemp_INVALID then the write is
883   unconditional. */
884static void putFReg ( UInt    fregNo,
885                      IRExpr* e,
886                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
887{
888   /* So, generate either an unconditional or a conditional write to
889      the reg. */
890   if (guardT == IRTemp_INVALID) {
891      /* unconditional write */
892      llPutFReg( fregNo, e );
893   } else {
894      llPutFReg( fregNo,
895                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
896                             e, llGetFReg(fregNo) ));
897   }
898}
899
900
901/* ---------------- Misc registers ---------------- */
902
903static void putMiscReg32 ( UInt    gsoffset,
904                           IRExpr* e, /* :: Ity_I32 */
905                           IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
906{
907   switch (gsoffset) {
908      case OFFB_FPSCR:   break;
909      case OFFB_QFLAG32: break;
910      case OFFB_GEFLAG0: break;
911      case OFFB_GEFLAG1: break;
912      case OFFB_GEFLAG2: break;
913      case OFFB_GEFLAG3: break;
914      default: vassert(0); /* awaiting more cases */
915   }
916   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
917
918   if (guardT == IRTemp_INVALID) {
919      /* unconditional write */
920      stmt(IRStmt_Put(gsoffset, e));
921   } else {
922      stmt(IRStmt_Put(
923         gsoffset,
924         IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
925                     e, IRExpr_Get(gsoffset, Ity_I32) )
926      ));
927   }
928}
929
930static IRTemp get_ITSTATE ( void )
931{
932   ASSERT_IS_THUMB;
933   IRTemp t = newTemp(Ity_I32);
934   assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
935   return t;
936}
937
938static void put_ITSTATE ( IRTemp t )
939{
940   ASSERT_IS_THUMB;
941   stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
942}
943
944static IRTemp get_QFLAG32 ( void )
945{
946   IRTemp t = newTemp(Ity_I32);
947   assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
948   return t;
949}
950
951static void put_QFLAG32 ( IRTemp t, IRTemp condT )
952{
953   putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
954}
955
956/* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
957   Status Register) to indicate that overflow or saturation occurred.
958   Nb: t must be zero to denote no saturation, and any nonzero
959   value to indicate saturation. */
960static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
961{
962   IRTemp old = get_QFLAG32();
963   IRTemp nyu = newTemp(Ity_I32);
964   assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
965   put_QFLAG32(nyu, condT);
966}
967
968/* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
969   flagNo: which flag bit to set [3...0]
970   lowbits_to_ignore:  0 = look at all 32 bits
971                       8 = look at top 24 bits only
972                      16 = look at top 16 bits only
973                      31 = look at the top bit only
974   e: input value to be evaluated.
975   The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
976   masked out.  If the resulting value is zero then the GE flag is
977   set to 0; any other value sets the flag to 1. */
978static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
979                           Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
980                           IRExpr* e,             /* Ity_I32 */
981                           IRTemp condT )
982{
983   vassert( flagNo >= 0 && flagNo <= 3 );
984   vassert( lowbits_to_ignore == 0  ||
985            lowbits_to_ignore == 8  ||
986            lowbits_to_ignore == 16 ||
987            lowbits_to_ignore == 31 );
988   IRTemp masked = newTemp(Ity_I32);
989   assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
990
991   switch (flagNo) {
992      case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
993      case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
994      case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
995      case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
996      default: vassert(0);
997   }
998}
999
1000/* Return the (32-bit, zero-or-nonzero representation scheme) of
1001   the specified GE flag. */
1002static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
1003{
1004   switch (flagNo) {
1005      case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
1006      case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
1007      case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
1008      case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
1009      default: vassert(0);
1010   }
1011}
1012
1013/* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
1014   2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
1015   15 of the value.  All other bits are ignored. */
1016static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
1017{
1018   IRTemp ge10 = newTemp(Ity_I32);
1019   IRTemp ge32 = newTemp(Ity_I32);
1020   assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1021   assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1022   put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
1023   put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
1024   put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
1025   put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
1026}
1027
1028
1029/* Set all 4 GE flags from the given 32-bit value as follows: GE 3
1030   from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
1031   bit 7.  All other bits are ignored. */
1032static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
1033{
1034   IRTemp ge0 = newTemp(Ity_I32);
1035   IRTemp ge1 = newTemp(Ity_I32);
1036   IRTemp ge2 = newTemp(Ity_I32);
1037   IRTemp ge3 = newTemp(Ity_I32);
1038   assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
1039   assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1040   assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
1041   assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1042   put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
1043   put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
1044   put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
1045   put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
1046}
1047
1048
1049/* ---------------- FPSCR stuff ---------------- */
1050
1051/* Generate IR to get hold of the rounding mode bits in FPSCR, and
1052   convert them to IR format.  Bind the final result to the
1053   returned temp. */
1054static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1055{
1056   /* The ARMvfp encoding for rounding mode bits is:
1057         00  to nearest
1058         01  to +infinity
1059         10  to -infinity
1060         11  to zero
1061      We need to convert that to the IR encoding:
1062         00  to nearest (the default)
1063         10  to +infinity
1064         01  to -infinity
1065         11  to zero
1066      Which can be done by swapping bits 0 and 1.
1067      The rmode bits are at 23:22 in FPSCR.
1068   */
1069   IRTemp armEncd = newTemp(Ity_I32);
1070   IRTemp swapped = newTemp(Ity_I32);
1071   /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
1072      we don't zero out bits 24 and above, since the assignment to
1073      'swapped' will mask them out anyway. */
1074   assign(armEncd,
1075          binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
1076   /* Now swap them. */
1077   assign(swapped,
1078          binop(Iop_Or32,
1079                binop(Iop_And32,
1080                      binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1081                      mkU32(2)),
1082                binop(Iop_And32,
1083                      binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1084                      mkU32(1))
1085         ));
1086   return swapped;
1087}
1088
1089
1090/*------------------------------------------------------------*/
1091/*--- Helpers for flag handling and conditional insns      ---*/
1092/*------------------------------------------------------------*/
1093
1094static const HChar* name_ARMCondcode ( ARMCondcode cond )
1095{
1096   switch (cond) {
1097      case ARMCondEQ:  return "{eq}";
1098      case ARMCondNE:  return "{ne}";
1099      case ARMCondHS:  return "{hs}";  // or 'cs'
1100      case ARMCondLO:  return "{lo}";  // or 'cc'
1101      case ARMCondMI:  return "{mi}";
1102      case ARMCondPL:  return "{pl}";
1103      case ARMCondVS:  return "{vs}";
1104      case ARMCondVC:  return "{vc}";
1105      case ARMCondHI:  return "{hi}";
1106      case ARMCondLS:  return "{ls}";
1107      case ARMCondGE:  return "{ge}";
1108      case ARMCondLT:  return "{lt}";
1109      case ARMCondGT:  return "{gt}";
1110      case ARMCondLE:  return "{le}";
1111      case ARMCondAL:  return ""; // {al}: is the default
1112      case ARMCondNV:  return "{nv}";
1113      default: vpanic("name_ARMCondcode");
1114   }
1115}
1116/* and a handy shorthand for it */
1117static const HChar* nCC ( ARMCondcode cond ) {
1118   return name_ARMCondcode(cond);
1119}
1120
1121
1122/* Build IR to calculate some particular condition from stored
1123   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1124   Ity_I32, suitable for narrowing.  Although the return type is
1125   Ity_I32, the returned value is either 0 or 1.  'cond' must be
1126   :: Ity_I32 and must denote the condition to compute in
1127   bits 7:4, and be zero everywhere else.
1128*/
1129static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
1130{
1131   vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
1132   /* And 'cond' had better produce a value in which only bits 7:4 are
1133      nonzero.  However, obviously we can't assert for that. */
1134
1135   /* So what we're constructing for the first argument is
1136      "(cond << 4) | stored-operation".
1137      However, as per comments above, 'cond' must be supplied
1138      pre-shifted to this function.
1139
1140      This pairing scheme requires that the ARM_CC_OP_ values all fit
1141      in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1142      8 bits of the first argument. */
1143   IRExpr** args
1144      = mkIRExprVec_4(
1145           binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
1146           IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1147           IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1148           IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
1149        );
1150   IRExpr* call
1151      = mkIRExprCCall(
1152           Ity_I32,
1153           0/*regparm*/,
1154           "armg_calculate_condition", &armg_calculate_condition,
1155           args
1156        );
1157
1158   /* Exclude the requested condition, OP and NDEP from definedness
1159      checking.  We're only interested in DEP1 and DEP2. */
1160   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1161   return call;
1162}
1163
1164
1165/* Build IR to calculate some particular condition from stored
1166   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1167   Ity_I32, suitable for narrowing.  Although the return type is
1168   Ity_I32, the returned value is either 0 or 1.
1169*/
1170static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
1171{
1172  /* First arg is "(cond << 4) | condition".  This requires that the
1173     ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1174     (COND, OP) pair in the lowest 8 bits of the first argument. */
1175   vassert(cond >= 0 && cond <= 15);
1176   return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
1177}
1178
1179
1180/* Build IR to calculate just the carry flag from stored
1181   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1182   Ity_I32. */
1183static IRExpr* mk_armg_calculate_flag_c ( void )
1184{
1185   IRExpr** args
1186      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1187                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1188                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1189                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1190   IRExpr* call
1191      = mkIRExprCCall(
1192           Ity_I32,
1193           0/*regparm*/,
1194           "armg_calculate_flag_c", &armg_calculate_flag_c,
1195           args
1196        );
1197   /* Exclude OP and NDEP from definedness checking.  We're only
1198      interested in DEP1 and DEP2. */
1199   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1200   return call;
1201}
1202
1203
1204/* Build IR to calculate just the overflow flag from stored
1205   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1206   Ity_I32. */
1207static IRExpr* mk_armg_calculate_flag_v ( void )
1208{
1209   IRExpr** args
1210      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1211                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1212                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1213                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1214   IRExpr* call
1215      = mkIRExprCCall(
1216           Ity_I32,
1217           0/*regparm*/,
1218           "armg_calculate_flag_v", &armg_calculate_flag_v,
1219           args
1220        );
1221   /* Exclude OP and NDEP from definedness checking.  We're only
1222      interested in DEP1 and DEP2. */
1223   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1224   return call;
1225}
1226
1227
1228/* Build IR to calculate N Z C V in bits 31:28 of the
1229   returned word. */
1230static IRExpr* mk_armg_calculate_flags_nzcv ( void )
1231{
1232   IRExpr** args
1233      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1234                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1235                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1236                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1237   IRExpr* call
1238      = mkIRExprCCall(
1239           Ity_I32,
1240           0/*regparm*/,
1241           "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
1242           args
1243        );
1244   /* Exclude OP and NDEP from definedness checking.  We're only
1245      interested in DEP1 and DEP2. */
1246   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1247   return call;
1248}
1249
1250static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
1251{
1252   IRExpr** args1;
1253   IRExpr** args2;
1254   IRExpr *call1, *call2, *res;
1255
1256   if (Q) {
1257      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
1258                              binop(Iop_GetElem32x4, resL, mkU8(1)),
1259                              binop(Iop_GetElem32x4, resR, mkU8(0)),
1260                              binop(Iop_GetElem32x4, resR, mkU8(1)) );
1261      args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
1262                              binop(Iop_GetElem32x4, resL, mkU8(3)),
1263                              binop(Iop_GetElem32x4, resR, mkU8(2)),
1264                              binop(Iop_GetElem32x4, resR, mkU8(3)) );
1265   } else {
1266      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
1267                              binop(Iop_GetElem32x2, resL, mkU8(1)),
1268                              binop(Iop_GetElem32x2, resR, mkU8(0)),
1269                              binop(Iop_GetElem32x2, resR, mkU8(1)) );
1270   }
1271
1272   call1 = mkIRExprCCall(
1273             Ity_I32,
1274             0/*regparm*/,
1275             "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1276             args1
1277          );
1278   if (Q) {
1279      call2 = mkIRExprCCall(
1280                Ity_I32,
1281                0/*regparm*/,
1282                "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1283                args2
1284             );
1285   }
1286   if (Q) {
1287      res = binop(Iop_Or32, call1, call2);
1288   } else {
1289      res = call1;
1290   }
1291   return res;
1292}
1293
1294// FIXME: this is named wrongly .. looks like a sticky set of
1295// QC, not a write to it.
1296static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
1297                         IRTemp condT )
1298{
1299   putMiscReg32 (OFFB_FPSCR,
1300                 binop(Iop_Or32,
1301                       IRExpr_Get(OFFB_FPSCR, Ity_I32),
1302                       binop(Iop_Shl32,
1303                             mk_armg_calculate_flag_qc(resL, resR, Q),
1304                             mkU8(27))),
1305                 condT);
1306}
1307
1308/* Build IR to conditionally set the flags thunk.  As with putIReg, if
1309   guard is IRTemp_INVALID then it's unconditional, else it holds a
1310   condition :: Ity_I32. */
1311static
1312void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
1313                         IRTemp t_dep2, IRTemp t_ndep,
1314                         IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1315{
1316   vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
1317   vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
1318   vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
1319   vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
1320   if (guardT == IRTemp_INVALID) {
1321      /* unconditional */
1322      stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
1323      stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1324      stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1325      stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1326   } else {
1327      /* conditional */
1328      IRTemp c1 = newTemp(Ity_I1);
1329      assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
1330      stmt( IRStmt_Put(
1331               OFFB_CC_OP,
1332               IRExpr_ITE( mkexpr(c1),
1333                           mkU32(cc_op),
1334                           IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
1335      stmt( IRStmt_Put(
1336               OFFB_CC_DEP1,
1337               IRExpr_ITE( mkexpr(c1),
1338                           mkexpr(t_dep1),
1339                           IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
1340      stmt( IRStmt_Put(
1341               OFFB_CC_DEP2,
1342               IRExpr_ITE( mkexpr(c1),
1343                           mkexpr(t_dep2),
1344                           IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
1345      stmt( IRStmt_Put(
1346               OFFB_CC_NDEP,
1347               IRExpr_ITE( mkexpr(c1),
1348                           mkexpr(t_ndep),
1349                           IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
1350   }
1351}
1352
1353
1354/* Minor variant of the above that sets NDEP to zero (if it
1355   sets it at all) */
1356static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1357                             IRTemp t_dep2,
1358                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1359{
1360   IRTemp z32 = newTemp(Ity_I32);
1361   assign( z32, mkU32(0) );
1362   setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1363}
1364
1365
1366/* Minor variant of the above that sets DEP2 to zero (if it
1367   sets it at all) */
1368static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1369                             IRTemp t_ndep,
1370                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1371{
1372   IRTemp z32 = newTemp(Ity_I32);
1373   assign( z32, mkU32(0) );
1374   setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1375}
1376
1377
1378/* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1379   sets them at all) */
1380static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1381                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1382{
1383   IRTemp z32 = newTemp(Ity_I32);
1384   assign( z32, mkU32(0) );
1385   setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1386}
1387
1388
1389/* ARM only */
1390/* Generate a side-exit to the next instruction, if the given guard
1391   expression :: Ity_I32 is 0 (note!  the side exit is taken if the
1392   condition is false!)  This is used to skip over conditional
1393   instructions which we can't generate straight-line code for, either
1394   because they are too complex or (more likely) they potentially
1395   generate exceptions.
1396*/
1397static void mk_skip_over_A32_if_cond_is_false (
1398               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1399            )
1400{
1401   ASSERT_IS_ARM;
1402   vassert(guardT != IRTemp_INVALID);
1403   vassert(0 == (guest_R15_curr_instr_notENC & 3));
1404   stmt( IRStmt_Exit(
1405            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1406            Ijk_Boring,
1407            IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
1408            OFFB_R15T
1409       ));
1410}
1411
1412/* Thumb16 only */
1413/* ditto, but jump over a 16-bit thumb insn */
1414static void mk_skip_over_T16_if_cond_is_false (
1415               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1416            )
1417{
1418   ASSERT_IS_THUMB;
1419   vassert(guardT != IRTemp_INVALID);
1420   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1421   stmt( IRStmt_Exit(
1422            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1423            Ijk_Boring,
1424            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
1425            OFFB_R15T
1426       ));
1427}
1428
1429
1430/* Thumb32 only */
1431/* ditto, but jump over a 32-bit thumb insn */
1432static void mk_skip_over_T32_if_cond_is_false (
1433               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1434            )
1435{
1436   ASSERT_IS_THUMB;
1437   vassert(guardT != IRTemp_INVALID);
1438   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1439   stmt( IRStmt_Exit(
1440            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1441            Ijk_Boring,
1442            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
1443            OFFB_R15T
1444       ));
1445}
1446
1447
1448/* Thumb16 and Thumb32 only
1449   Generate a SIGILL followed by a restart of the current instruction
1450   if the given temp is nonzero. */
1451static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
1452{
1453   ASSERT_IS_THUMB;
1454   vassert(t != IRTemp_INVALID);
1455   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1456   stmt(
1457      IRStmt_Exit(
1458         binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
1459         Ijk_NoDecode,
1460         IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
1461         OFFB_R15T
1462      )
1463   );
1464}
1465
1466
1467/* Inspect the old_itstate, and generate a SIGILL if it indicates that
1468   we are currently in an IT block and are not the last in the block.
1469   This also rolls back guest_ITSTATE to its old value before the exit
1470   and restores it to its new value afterwards.  This is so that if
1471   the exit is taken, we have an up to date version of ITSTATE
1472   available.  Without doing that, we have no hope of making precise
1473   exceptions work. */
1474static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
1475               IRTemp old_itstate /* :: Ity_I32 */,
1476               IRTemp new_itstate /* :: Ity_I32 */
1477            )
1478{
1479   ASSERT_IS_THUMB;
1480   put_ITSTATE(old_itstate); // backout
1481   IRTemp guards_for_next3 = newTemp(Ity_I32);
1482   assign(guards_for_next3,
1483          binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
1484   gen_SIGILL_T_if_nonzero(guards_for_next3);
1485   put_ITSTATE(new_itstate); //restore
1486}
1487
1488
1489/* Simpler version of the above, which generates a SIGILL if
1490   we're anywhere within an IT block. */
1491static void gen_SIGILL_T_if_in_ITBlock (
1492               IRTemp old_itstate /* :: Ity_I32 */,
1493               IRTemp new_itstate /* :: Ity_I32 */
1494            )
1495{
1496   put_ITSTATE(old_itstate); // backout
1497   gen_SIGILL_T_if_nonzero(old_itstate);
1498   put_ITSTATE(new_itstate); //restore
1499}
1500
1501
1502/* Generate an APSR value, from the NZCV thunk, and
1503   from QFLAG32 and GEFLAG0 .. GEFLAG3. */
1504static IRTemp synthesise_APSR ( void )
1505{
1506   IRTemp res1 = newTemp(Ity_I32);
1507   // Get NZCV
1508   assign( res1, mk_armg_calculate_flags_nzcv() );
1509   // OR in the Q value
1510   IRTemp res2 = newTemp(Ity_I32);
1511   assign(
1512      res2,
1513      binop(Iop_Or32,
1514            mkexpr(res1),
1515            binop(Iop_Shl32,
1516                  unop(Iop_1Uto32,
1517                       binop(Iop_CmpNE32,
1518                             mkexpr(get_QFLAG32()),
1519                             mkU32(0))),
1520                  mkU8(ARMG_CC_SHIFT_Q)))
1521   );
1522   // OR in GE0 .. GE3
1523   IRExpr* ge0
1524      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
1525   IRExpr* ge1
1526      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
1527   IRExpr* ge2
1528      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
1529   IRExpr* ge3
1530      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
1531   IRTemp res3 = newTemp(Ity_I32);
1532   assign(res3,
1533          binop(Iop_Or32,
1534                mkexpr(res2),
1535                binop(Iop_Or32,
1536                      binop(Iop_Or32,
1537                            binop(Iop_Shl32, ge0, mkU8(16)),
1538                            binop(Iop_Shl32, ge1, mkU8(17))),
1539                      binop(Iop_Or32,
1540                            binop(Iop_Shl32, ge2, mkU8(18)),
1541                            binop(Iop_Shl32, ge3, mkU8(19))) )));
1542   return res3;
1543}
1544
1545
1546/* and the inverse transformation: given an APSR value,
1547   set the NZCV thunk, the Q flag, and the GE flags. */
1548static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
1549                                IRTemp apsrT, IRTemp condT )
1550{
1551   vassert(write_nzcvq || write_ge);
1552   if (write_nzcvq) {
1553      // Do NZCV
1554      IRTemp immT = newTemp(Ity_I32);
1555      assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
1556      setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
1557      // Do Q
1558      IRTemp qnewT = newTemp(Ity_I32);
1559      assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
1560      put_QFLAG32(qnewT, condT);
1561   }
1562   if (write_ge) {
1563      // Do GE3..0
1564      put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
1565                   condT);
1566      put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
1567                   condT);
1568      put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
1569                   condT);
1570      put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
1571                   condT);
1572   }
1573}
1574
1575
1576/*------------------------------------------------------------*/
1577/*--- Helpers for saturation                               ---*/
1578/*------------------------------------------------------------*/
1579
1580/* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
1581   (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
1582   (b) the floor is computed from the value of imm5.  these two fnsn
1583   should be commoned up. */
1584
1585/* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
1586   Optionally return flag resQ saying whether saturation occurred.
1587   See definition in manual, section A2.2.1, page 41
1588   (bits(N), boolean) UnsignedSatQ( integer i, integer N )
1589   {
1590     if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
1591     elsif ( i < 0 )    { result = 0; saturated = TRUE; }
1592     else               { result = i; saturated = FALSE; }
1593     return ( result<N-1:0>, saturated );
1594   }
1595*/
1596static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
1597                             IRTemp* resQ, /* OUT - Ity_I32  */
1598                             IRTemp regT,  /* value to clamp - Ity_I32 */
1599                             UInt imm5 )   /* saturation ceiling */
1600{
1601   UInt ceil  = (1 << imm5) - 1;    // (2^imm5)-1
1602   UInt floor = 0;
1603
1604   IRTemp nd0 = newTemp(Ity_I32);
1605   IRTemp nd1 = newTemp(Ity_I32);
1606   IRTemp nd2 = newTemp(Ity_I1);
1607   IRTemp nd3 = newTemp(Ity_I32);
1608   IRTemp nd4 = newTemp(Ity_I32);
1609   IRTemp nd5 = newTemp(Ity_I1);
1610   IRTemp nd6 = newTemp(Ity_I32);
1611
1612   assign( nd0, mkexpr(regT) );
1613   assign( nd1, mkU32(ceil) );
1614   assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1615   assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
1616   assign( nd4, mkU32(floor) );
1617   assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1618   assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
1619   assign( *res, mkexpr(nd6) );
1620
1621   /* if saturation occurred, then resQ is set to some nonzero value
1622      if sat did not occur, resQ is guaranteed to be zero. */
1623   if (resQ) {
1624      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1625   }
1626}
1627
1628
1629/* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
1630   Optionally return flag resQ saying whether saturation occurred.
1631   - see definition in manual, section A2.2.1, page 41
1632   (bits(N), boolean ) SignedSatQ( integer i, integer N )
1633   {
1634     if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
1635     elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
1636     else                      { result = i;           saturated = FALSE; }
1637     return ( result[N-1:0], saturated );
1638   }
1639*/
1640static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
1641                           UInt imm5,      /* saturation ceiling */
1642                           IRTemp* res,    /* OUT - Ity_I32 */
1643                           IRTemp* resQ )  /* OUT - Ity_I32  */
1644{
1645   Int ceil  =  (1 << (imm5-1)) - 1;  //  (2^(imm5-1))-1
1646   Int floor = -(1 << (imm5-1));      // -(2^(imm5-1))
1647
1648   IRTemp nd0 = newTemp(Ity_I32);
1649   IRTemp nd1 = newTemp(Ity_I32);
1650   IRTemp nd2 = newTemp(Ity_I1);
1651   IRTemp nd3 = newTemp(Ity_I32);
1652   IRTemp nd4 = newTemp(Ity_I32);
1653   IRTemp nd5 = newTemp(Ity_I1);
1654   IRTemp nd6 = newTemp(Ity_I32);
1655
1656   assign( nd0, mkexpr(regT) );
1657   assign( nd1, mkU32(ceil) );
1658   assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1659   assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
1660   assign( nd4, mkU32(floor) );
1661   assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1662   assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
1663   assign( *res, mkexpr(nd6) );
1664
1665   /* if saturation occurred, then resQ is set to some nonzero value
1666      if sat did not occur, resQ is guaranteed to be zero. */
1667   if (resQ) {
1668     assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1669   }
1670}
1671
1672
1673/* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
1674   overflow occurred for 32-bit addition.  Needs both args and the
1675   result.  HD p27. */
1676static
1677IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
1678                                      IRTemp argL, IRTemp argR )
1679{
1680   IRTemp res = newTemp(Ity_I32);
1681   assign(res, resE);
1682   return
1683      binop( Iop_Shr32,
1684             binop( Iop_And32,
1685                    binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
1686                    binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
1687             mkU8(31) );
1688}
1689
1690/* Similarly .. also from HD p27 .. */
1691static
1692IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
1693                                      IRTemp argL, IRTemp argR )
1694{
1695   IRTemp res = newTemp(Ity_I32);
1696   assign(res, resE);
1697   return
1698      binop( Iop_Shr32,
1699             binop( Iop_And32,
1700                    binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
1701                    binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
1702             mkU8(31) );
1703}
1704
1705
1706/*------------------------------------------------------------*/
1707/*--- Larger helpers                                       ---*/
1708/*------------------------------------------------------------*/
1709
1710/* Compute both the result and new C flag value for a LSL by an imm5
1711   or by a register operand.  May generate reads of the old C value
1712   (hence only safe to use before any writes to guest state happen).
1713   Are factored out so can be used by both ARM and Thumb.
1714
1715   Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
1716   "res" (the result)  is a.k.a. "shop", shifter operand
1717   "newC" (the new C)  is a.k.a. "shco", shifter carry out
1718
1719   The calling convention for res and newC is a bit funny.  They could
1720   be passed by value, but instead are passed by ref.
1721
1722   The C (shco) value computed must be zero in bits 31:1, as the IR
1723   optimisations for flag handling (guest_arm_spechelper) rely on
1724   that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
1725   for it.  Same applies to all these functions that compute shco
1726   after a shift or rotate, not just this one.
1727*/
1728
1729static void compute_result_and_C_after_LSL_by_imm5 (
1730               /*OUT*/HChar* buf,
1731               IRTemp* res,
1732               IRTemp* newC,
1733               IRTemp rMt, UInt shift_amt, /* operands */
1734               UInt rM      /* only for debug printing */
1735            )
1736{
1737   if (shift_amt == 0) {
1738      if (newC) {
1739         assign( *newC, mk_armg_calculate_flag_c() );
1740      }
1741      assign( *res, mkexpr(rMt) );
1742      DIS(buf, "r%u", rM);
1743   } else {
1744      vassert(shift_amt >= 1 && shift_amt <= 31);
1745      if (newC) {
1746         assign( *newC,
1747                 binop(Iop_And32,
1748                       binop(Iop_Shr32, mkexpr(rMt),
1749                                        mkU8(32 - shift_amt)),
1750                       mkU32(1)));
1751      }
1752      assign( *res,
1753              binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
1754      DIS(buf, "r%u, LSL #%u", rM, shift_amt);
1755   }
1756}
1757
1758
1759static void compute_result_and_C_after_LSL_by_reg (
1760               /*OUT*/HChar* buf,
1761               IRTemp* res,
1762               IRTemp* newC,
1763               IRTemp rMt, IRTemp rSt,  /* operands */
1764               UInt rM,    UInt rS      /* only for debug printing */
1765            )
1766{
1767   // shift left in range 0 .. 255
1768   // amt  = rS & 255
1769   // res  = amt < 32 ?  Rm << amt  : 0
1770   // newC = amt == 0     ? oldC  :
1771   //        amt in 1..32 ?  Rm[32-amt]  : 0
1772   IRTemp amtT = newTemp(Ity_I32);
1773   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1774   if (newC) {
1775      /* mux0X(amt == 0,
1776               mux0X(amt < 32,
1777                     0,
1778                     Rm[(32-amt) & 31]),
1779               oldC)
1780      */
1781      /* About the best you can do is pray that iropt is able
1782         to nuke most or all of the following junk. */
1783      IRTemp oldC = newTemp(Ity_I32);
1784      assign(oldC, mk_armg_calculate_flag_c() );
1785      assign(
1786         *newC,
1787         IRExpr_ITE(
1788            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1789            mkexpr(oldC),
1790            IRExpr_ITE(
1791               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1792               binop(Iop_And32,
1793                     binop(Iop_Shr32,
1794                           mkexpr(rMt),
1795                           unop(Iop_32to8,
1796                                binop(Iop_And32,
1797                                      binop(Iop_Sub32,
1798                                            mkU32(32),
1799                                            mkexpr(amtT)),
1800                                      mkU32(31)
1801                                )
1802                           )
1803                     ),
1804                     mkU32(1)
1805                     ),
1806               mkU32(0)
1807            )
1808         )
1809      );
1810   }
1811   // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1812   // Lhs of the & limits the shift to 31 bits, so as to
1813   // give known IR semantics.  Rhs of the & is all 1s for
1814   // Rs <= 31 and all 0s for Rs >= 32.
1815   assign(
1816      *res,
1817      binop(
1818         Iop_And32,
1819         binop(Iop_Shl32,
1820               mkexpr(rMt),
1821               unop(Iop_32to8,
1822                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1823         binop(Iop_Sar32,
1824               binop(Iop_Sub32,
1825                     mkexpr(amtT),
1826                     mkU32(32)),
1827               mkU8(31))));
1828    DIS(buf, "r%u, LSL r%u", rM, rS);
1829}
1830
1831
1832static void compute_result_and_C_after_LSR_by_imm5 (
1833               /*OUT*/HChar* buf,
1834               IRTemp* res,
1835               IRTemp* newC,
1836               IRTemp rMt, UInt shift_amt, /* operands */
1837               UInt rM      /* only for debug printing */
1838            )
1839{
1840   if (shift_amt == 0) {
1841      // conceptually a 32-bit shift, however:
1842      // res  = 0
1843      // newC = Rm[31]
1844      if (newC) {
1845         assign( *newC,
1846                 binop(Iop_And32,
1847                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1848                       mkU32(1)));
1849      }
1850      assign( *res, mkU32(0) );
1851      DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
1852   } else {
1853      // shift in range 1..31
1854      // res  = Rm >>u shift_amt
1855      // newC = Rm[shift_amt - 1]
1856      vassert(shift_amt >= 1 && shift_amt <= 31);
1857      if (newC) {
1858         assign( *newC,
1859                 binop(Iop_And32,
1860                       binop(Iop_Shr32, mkexpr(rMt),
1861                                        mkU8(shift_amt - 1)),
1862                       mkU32(1)));
1863      }
1864      assign( *res,
1865              binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
1866      DIS(buf, "r%u, LSR #%u", rM, shift_amt);
1867   }
1868}
1869
1870
1871static void compute_result_and_C_after_LSR_by_reg (
1872               /*OUT*/HChar* buf,
1873               IRTemp* res,
1874               IRTemp* newC,
1875               IRTemp rMt, IRTemp rSt,  /* operands */
1876               UInt rM,    UInt rS      /* only for debug printing */
1877            )
1878{
1879   // shift right in range 0 .. 255
1880   // amt = rS & 255
1881   // res  = amt < 32 ?  Rm >>u amt  : 0
1882   // newC = amt == 0     ? oldC  :
1883   //        amt in 1..32 ?  Rm[amt-1]  : 0
1884   IRTemp amtT = newTemp(Ity_I32);
1885   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1886   if (newC) {
1887      /* mux0X(amt == 0,
1888               mux0X(amt < 32,
1889                     0,
1890                     Rm[(amt-1) & 31]),
1891               oldC)
1892      */
1893      IRTemp oldC = newTemp(Ity_I32);
1894      assign(oldC, mk_armg_calculate_flag_c() );
1895      assign(
1896         *newC,
1897         IRExpr_ITE(
1898            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1899            mkexpr(oldC),
1900            IRExpr_ITE(
1901               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1902               binop(Iop_And32,
1903                     binop(Iop_Shr32,
1904                           mkexpr(rMt),
1905                           unop(Iop_32to8,
1906                                binop(Iop_And32,
1907                                      binop(Iop_Sub32,
1908                                            mkexpr(amtT),
1909                                            mkU32(1)),
1910                                      mkU32(31)
1911                                )
1912                           )
1913                     ),
1914                     mkU32(1)
1915                     ),
1916               mkU32(0)
1917            )
1918         )
1919      );
1920   }
1921   // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1922   // Lhs of the & limits the shift to 31 bits, so as to
1923   // give known IR semantics.  Rhs of the & is all 1s for
1924   // Rs <= 31 and all 0s for Rs >= 32.
1925   assign(
1926      *res,
1927      binop(
1928         Iop_And32,
1929         binop(Iop_Shr32,
1930               mkexpr(rMt),
1931               unop(Iop_32to8,
1932                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1933         binop(Iop_Sar32,
1934               binop(Iop_Sub32,
1935                     mkexpr(amtT),
1936                     mkU32(32)),
1937               mkU8(31))));
1938    DIS(buf, "r%u, LSR r%u", rM, rS);
1939}
1940
1941
1942static void compute_result_and_C_after_ASR_by_imm5 (
1943               /*OUT*/HChar* buf,
1944               IRTemp* res,
1945               IRTemp* newC,
1946               IRTemp rMt, UInt shift_amt, /* operands */
1947               UInt rM      /* only for debug printing */
1948            )
1949{
1950   if (shift_amt == 0) {
1951      // conceptually a 32-bit shift, however:
1952      // res  = Rm >>s 31
1953      // newC = Rm[31]
1954      if (newC) {
1955         assign( *newC,
1956                 binop(Iop_And32,
1957                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1958                       mkU32(1)));
1959      }
1960      assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
1961      DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
1962   } else {
1963      // shift in range 1..31
1964      // res = Rm >>s shift_amt
1965      // newC = Rm[shift_amt - 1]
1966      vassert(shift_amt >= 1 && shift_amt <= 31);
1967      if (newC) {
1968         assign( *newC,
1969                 binop(Iop_And32,
1970                       binop(Iop_Shr32, mkexpr(rMt),
1971                                        mkU8(shift_amt - 1)),
1972                       mkU32(1)));
1973      }
1974      assign( *res,
1975              binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
1976      DIS(buf, "r%u, ASR #%u", rM, shift_amt);
1977   }
1978}
1979
1980
1981static void compute_result_and_C_after_ASR_by_reg (
1982               /*OUT*/HChar* buf,
1983               IRTemp* res,
1984               IRTemp* newC,
1985               IRTemp rMt, IRTemp rSt,  /* operands */
1986               UInt rM,    UInt rS      /* only for debug printing */
1987            )
1988{
1989   // arithmetic shift right in range 0 .. 255
1990   // amt = rS & 255
1991   // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
1992   // newC = amt == 0     ? oldC  :
1993   //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
1994   IRTemp amtT = newTemp(Ity_I32);
1995   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1996   if (newC) {
1997      /* mux0X(amt == 0,
1998               mux0X(amt < 32,
1999                     Rm[31],
2000                     Rm[(amt-1) & 31])
2001               oldC)
2002      */
2003      IRTemp oldC = newTemp(Ity_I32);
2004      assign(oldC, mk_armg_calculate_flag_c() );
2005      assign(
2006         *newC,
2007         IRExpr_ITE(
2008            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
2009            mkexpr(oldC),
2010            IRExpr_ITE(
2011               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
2012               binop(Iop_And32,
2013                     binop(Iop_Shr32,
2014                           mkexpr(rMt),
2015                           unop(Iop_32to8,
2016                                binop(Iop_And32,
2017                                      binop(Iop_Sub32,
2018                                            mkexpr(amtT),
2019                                            mkU32(1)),
2020                                      mkU32(31)
2021                                )
2022                           )
2023                     ),
2024                     mkU32(1)
2025                     ),
2026               binop(Iop_And32,
2027                     binop(Iop_Shr32,
2028                           mkexpr(rMt),
2029                           mkU8(31)
2030                     ),
2031                     mkU32(1)
2032               )
2033            )
2034         )
2035      );
2036   }
2037   // (Rm >>s (amt <u 32 ? amt : 31))
2038   assign(
2039      *res,
2040      binop(
2041         Iop_Sar32,
2042         mkexpr(rMt),
2043         unop(
2044            Iop_32to8,
2045            IRExpr_ITE(
2046               binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
2047               mkexpr(amtT),
2048               mkU32(31)))));
2049    DIS(buf, "r%u, ASR r%u", rM, rS);
2050}
2051
2052
2053static void compute_result_and_C_after_ROR_by_reg (
2054               /*OUT*/HChar* buf,
2055               IRTemp* res,
2056               IRTemp* newC,
2057               IRTemp rMt, IRTemp rSt,  /* operands */
2058               UInt rM,    UInt rS      /* only for debug printing */
2059            )
2060{
2061   // rotate right in range 0 .. 255
2062   // amt = rS & 255
2063   // shop =  Rm `ror` (amt & 31)
2064   // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
2065   IRTemp amtT = newTemp(Ity_I32);
2066   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2067   IRTemp amt5T = newTemp(Ity_I32);
2068   assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
2069   IRTemp oldC = newTemp(Ity_I32);
2070   assign(oldC, mk_armg_calculate_flag_c() );
2071   if (newC) {
2072      assign(
2073         *newC,
2074         IRExpr_ITE(
2075            binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
2076            binop(Iop_And32,
2077                  binop(Iop_Shr32,
2078                        mkexpr(rMt),
2079                        unop(Iop_32to8,
2080                             binop(Iop_And32,
2081                                   binop(Iop_Sub32,
2082                                         mkexpr(amtT),
2083                                         mkU32(1)
2084                                   ),
2085                                   mkU32(31)
2086                             )
2087                        )
2088                  ),
2089                  mkU32(1)
2090            ),
2091            mkexpr(oldC)
2092         )
2093      );
2094   }
2095   assign(
2096      *res,
2097      IRExpr_ITE(
2098         binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
2099         binop(Iop_Or32,
2100               binop(Iop_Shr32,
2101                     mkexpr(rMt),
2102                     unop(Iop_32to8, mkexpr(amt5T))
2103               ),
2104               binop(Iop_Shl32,
2105                     mkexpr(rMt),
2106                     unop(Iop_32to8,
2107                          binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
2108                     )
2109               )
2110               ),
2111         mkexpr(rMt)
2112      )
2113   );
2114   DIS(buf, "r%u, ROR r#%u", rM, rS);
2115}
2116
2117
2118/* Generate an expression corresponding to the immediate-shift case of
2119   a shifter operand.  This is used both for ARM and Thumb2.
2120
2121   Bind it to a temporary, and return that via *res.  If newC is
2122   non-NULL, also compute a value for the shifter's carry out (in the
2123   LSB of a word), bind it to a temporary, and return that via *shco.
2124
2125   Generates GETs from the guest state and is therefore not safe to
2126   use once we start doing PUTs to it, for any given instruction.
2127
2128   'how' is encoded thusly:
2129      00b LSL,  01b LSR,  10b ASR,  11b ROR
2130   Most but not all ARM and Thumb integer insns use this encoding.
2131   Be careful to ensure the right value is passed here.
2132*/
2133static void compute_result_and_C_after_shift_by_imm5 (
2134               /*OUT*/HChar* buf,
2135               /*OUT*/IRTemp* res,
2136               /*OUT*/IRTemp* newC,
2137               IRTemp  rMt,       /* reg to shift */
2138               UInt    how,       /* what kind of shift */
2139               UInt    shift_amt, /* shift amount (0..31) */
2140               UInt    rM         /* only for debug printing */
2141            )
2142{
2143   vassert(shift_amt < 32);
2144   vassert(how < 4);
2145
2146   switch (how) {
2147
2148      case 0:
2149         compute_result_and_C_after_LSL_by_imm5(
2150            buf, res, newC, rMt, shift_amt, rM
2151         );
2152         break;
2153
2154      case 1:
2155         compute_result_and_C_after_LSR_by_imm5(
2156            buf, res, newC, rMt, shift_amt, rM
2157         );
2158         break;
2159
2160      case 2:
2161         compute_result_and_C_after_ASR_by_imm5(
2162            buf, res, newC, rMt, shift_amt, rM
2163         );
2164         break;
2165
2166      case 3:
2167         if (shift_amt == 0) {
2168            IRTemp oldcT = newTemp(Ity_I32);
2169            // rotate right 1 bit through carry (?)
2170            // RRX -- described at ARM ARM A5-17
2171            // res  = (oldC << 31) | (Rm >>u 1)
2172            // newC = Rm[0]
2173            if (newC) {
2174               assign( *newC,
2175                       binop(Iop_And32, mkexpr(rMt), mkU32(1)));
2176            }
2177            assign( oldcT, mk_armg_calculate_flag_c() );
2178            assign( *res,
2179                    binop(Iop_Or32,
2180                          binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
2181                          binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
2182            DIS(buf, "r%u, RRX", rM);
2183         } else {
2184            // rotate right in range 1..31
2185            // res  = Rm `ror` shift_amt
2186            // newC = Rm[shift_amt - 1]
2187            vassert(shift_amt >= 1 && shift_amt <= 31);
2188            if (newC) {
2189               assign( *newC,
2190                       binop(Iop_And32,
2191                             binop(Iop_Shr32, mkexpr(rMt),
2192                                              mkU8(shift_amt - 1)),
2193                             mkU32(1)));
2194            }
2195            assign( *res,
2196                    binop(Iop_Or32,
2197                          binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
2198                          binop(Iop_Shl32, mkexpr(rMt),
2199                                           mkU8(32-shift_amt))));
2200            DIS(buf, "r%u, ROR #%u", rM, shift_amt);
2201         }
2202         break;
2203
2204      default:
2205         /*NOTREACHED*/
2206         vassert(0);
2207   }
2208}
2209
2210
2211/* Generate an expression corresponding to the register-shift case of
2212   a shifter operand.  This is used both for ARM and Thumb2.
2213
2214   Bind it to a temporary, and return that via *res.  If newC is
2215   non-NULL, also compute a value for the shifter's carry out (in the
2216   LSB of a word), bind it to a temporary, and return that via *shco.
2217
2218   Generates GETs from the guest state and is therefore not safe to
2219   use once we start doing PUTs to it, for any given instruction.
2220
2221   'how' is encoded thusly:
2222      00b LSL,  01b LSR,  10b ASR,  11b ROR
2223   Most but not all ARM and Thumb integer insns use this encoding.
2224   Be careful to ensure the right value is passed here.
2225*/
2226static void compute_result_and_C_after_shift_by_reg (
2227               /*OUT*/HChar*  buf,
2228               /*OUT*/IRTemp* res,
2229               /*OUT*/IRTemp* newC,
2230               IRTemp  rMt,       /* reg to shift */
2231               UInt    how,       /* what kind of shift */
2232               IRTemp  rSt,       /* shift amount */
2233               UInt    rM,        /* only for debug printing */
2234               UInt    rS         /* only for debug printing */
2235            )
2236{
2237   vassert(how < 4);
2238   switch (how) {
2239      case 0: { /* LSL */
2240         compute_result_and_C_after_LSL_by_reg(
2241            buf, res, newC, rMt, rSt, rM, rS
2242         );
2243         break;
2244      }
2245      case 1: { /* LSR */
2246         compute_result_and_C_after_LSR_by_reg(
2247            buf, res, newC, rMt, rSt, rM, rS
2248         );
2249         break;
2250      }
2251      case 2: { /* ASR */
2252         compute_result_and_C_after_ASR_by_reg(
2253            buf, res, newC, rMt, rSt, rM, rS
2254         );
2255         break;
2256      }
2257      case 3: { /* ROR */
2258         compute_result_and_C_after_ROR_by_reg(
2259             buf, res, newC, rMt, rSt, rM, rS
2260         );
2261         break;
2262      }
2263      default:
2264         /*NOTREACHED*/
2265         vassert(0);
2266   }
2267}
2268
2269
2270/* Generate an expression corresponding to a shifter_operand, bind it
2271   to a temporary, and return that via *shop.  If shco is non-NULL,
2272   also compute a value for the shifter's carry out (in the LSB of a
2273   word), bind it to a temporary, and return that via *shco.
2274
2275   If for some reason we can't come up with a shifter operand (missing
2276   case?  not really a shifter operand?) return False.
2277
2278   Generates GETs from the guest state and is therefore not safe to
2279   use once we start doing PUTs to it, for any given instruction.
2280
2281   For ARM insns only; not for Thumb.
2282*/
2283static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
2284                                 /*OUT*/IRTemp* shop,
2285                                 /*OUT*/IRTemp* shco,
2286                                 /*OUT*/HChar* buf )
2287{
2288   UInt insn_4 = (insn_11_0 >> 4) & 1;
2289   UInt insn_7 = (insn_11_0 >> 7) & 1;
2290   vassert(insn_25 <= 0x1);
2291   vassert(insn_11_0 <= 0xFFF);
2292
2293   vassert(shop && *shop == IRTemp_INVALID);
2294   *shop = newTemp(Ity_I32);
2295
2296   if (shco) {
2297      vassert(*shco == IRTemp_INVALID);
2298      *shco = newTemp(Ity_I32);
2299   }
2300
2301   /* 32-bit immediate */
2302
2303   if (insn_25 == 1) {
2304      /* immediate: (7:0) rotated right by 2 * (11:8) */
2305      UInt imm = (insn_11_0 >> 0) & 0xFF;
2306      UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
2307      vassert(rot <= 30);
2308      imm = ROR32(imm, rot);
2309      if (shco) {
2310         if (rot == 0) {
2311            assign( *shco, mk_armg_calculate_flag_c() );
2312         } else {
2313            assign( *shco, mkU32( (imm >> 31) & 1 ) );
2314         }
2315      }
2316      DIS(buf, "#0x%x", imm);
2317      assign( *shop, mkU32(imm) );
2318      return True;
2319   }
2320
2321   /* Shift/rotate by immediate */
2322
2323   if (insn_25 == 0 && insn_4 == 0) {
2324      /* Rm (3:0) shifted (6:5) by immediate (11:7) */
2325      UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
2326      UInt rM        = (insn_11_0 >> 0) & 0xF;
2327      UInt how       = (insn_11_0 >> 5) & 3;
2328      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2329      IRTemp rMt = newTemp(Ity_I32);
2330      assign(rMt, getIRegA(rM));
2331
2332      vassert(shift_amt <= 31);
2333
2334      compute_result_and_C_after_shift_by_imm5(
2335         buf, shop, shco, rMt, how, shift_amt, rM
2336      );
2337      return True;
2338   }
2339
2340   /* Shift/rotate by register */
2341   if (insn_25 == 0 && insn_4 == 1) {
2342      /* Rm (3:0) shifted (6:5) by Rs (11:8) */
2343      UInt rM  = (insn_11_0 >> 0) & 0xF;
2344      UInt rS  = (insn_11_0 >> 8) & 0xF;
2345      UInt how = (insn_11_0 >> 5) & 3;
2346      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2347      IRTemp rMt = newTemp(Ity_I32);
2348      IRTemp rSt = newTemp(Ity_I32);
2349
2350      if (insn_7 == 1)
2351         return False; /* not really a shifter operand */
2352
2353      assign(rMt, getIRegA(rM));
2354      assign(rSt, getIRegA(rS));
2355
2356      compute_result_and_C_after_shift_by_reg(
2357         buf, shop, shco, rMt, how, rSt, rM, rS
2358      );
2359      return True;
2360   }
2361
2362   vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
2363   return False;
2364}
2365
2366
2367/* ARM only */
2368static
2369IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
2370                                    /*OUT*/HChar* buf )
2371{
2372   vassert(rN < 16);
2373   vassert(bU < 2);
2374   vassert(imm12 < 0x1000);
2375   HChar opChar = bU == 1 ? '+' : '-';
2376   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
2377   return
2378      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2379             getIRegA(rN),
2380             mkU32(imm12) );
2381}
2382
2383
2384/* ARM only.
2385   NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
2386*/
2387static
2388IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
2389                                          UInt sh2, UInt imm5,
2390                                          /*OUT*/HChar* buf )
2391{
2392   vassert(rN < 16);
2393   vassert(bU < 2);
2394   vassert(rM < 16);
2395   vassert(sh2 < 4);
2396   vassert(imm5 < 32);
2397   HChar   opChar = bU == 1 ? '+' : '-';
2398   IRExpr* index  = NULL;
2399   switch (sh2) {
2400      case 0: /* LSL */
2401         /* imm5 can be in the range 0 .. 31 inclusive. */
2402         index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
2403         DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
2404         break;
2405      case 1: /* LSR */
2406         if (imm5 == 0) {
2407            index = mkU32(0);
2408            vassert(0); // ATC
2409         } else {
2410            index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
2411         }
2412         DIS(buf, "[r%u, %cr%u, LSR #%u]",
2413                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2414         break;
2415      case 2: /* ASR */
2416         /* Doesn't this just mean that the behaviour with imm5 == 0
2417            is the same as if it had been 31 ? */
2418         if (imm5 == 0) {
2419            index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
2420            vassert(0); // ATC
2421         } else {
2422            index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
2423         }
2424         DIS(buf, "[r%u, %cr%u, ASR #%u]",
2425                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2426         break;
2427      case 3: /* ROR or RRX */
2428         if (imm5 == 0) {
2429            IRTemp rmT    = newTemp(Ity_I32);
2430            IRTemp cflagT = newTemp(Ity_I32);
2431            assign(rmT, getIRegA(rM));
2432            assign(cflagT, mk_armg_calculate_flag_c());
2433            index = binop(Iop_Or32,
2434                          binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
2435                          binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
2436            DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
2437         } else {
2438            IRTemp rmT = newTemp(Ity_I32);
2439            assign(rmT, getIRegA(rM));
2440            vassert(imm5 >= 1 && imm5 <= 31);
2441            index = binop(Iop_Or32,
2442                          binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
2443                          binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
2444            DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
2445         }
2446         break;
2447      default:
2448         vassert(0);
2449   }
2450   vassert(index);
2451   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2452                getIRegA(rN), index);
2453}
2454
2455
2456/* ARM only */
2457static
2458IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
2459                                   /*OUT*/HChar* buf )
2460{
2461   vassert(rN < 16);
2462   vassert(bU < 2);
2463   vassert(imm8 < 0x100);
2464   HChar opChar = bU == 1 ? '+' : '-';
2465   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
2466   return
2467      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2468             getIRegA(rN),
2469             mkU32(imm8) );
2470}
2471
2472
2473/* ARM only */
2474static
2475IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
2476                                  /*OUT*/HChar* buf )
2477{
2478   vassert(rN < 16);
2479   vassert(bU < 2);
2480   vassert(rM < 16);
2481   HChar   opChar = bU == 1 ? '+' : '-';
2482   IRExpr* index  = getIRegA(rM);
2483   DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
2484   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2485                getIRegA(rN), index);
2486}
2487
2488
2489/* irRes :: Ity_I32 holds a floating point comparison result encoded
2490   as an IRCmpF64Result.  Generate code to convert it to an
2491   ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
2492   Assign a new temp to hold that value, and return the temp. */
2493static
2494IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
2495{
2496   IRTemp ix       = newTemp(Ity_I32);
2497   IRTemp termL    = newTemp(Ity_I32);
2498   IRTemp termR    = newTemp(Ity_I32);
2499   IRTemp nzcv     = newTemp(Ity_I32);
2500
2501   /* This is where the fun starts.  We have to convert 'irRes' from
2502      an IR-convention return result (IRCmpF64Result) to an
2503      ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2504      4 bits of 'nzcv'. */
2505   /* Map compare result from IR to ARM(nzcv) */
2506   /*
2507      FP cmp result | IR   | ARM(nzcv)
2508      --------------------------------
2509      UN              0x45   0011
2510      LT              0x01   1000
2511      GT              0x00   0010
2512      EQ              0x40   0110
2513   */
2514   /* Now since you're probably wondering WTF ..
2515
2516      ix fishes the useful bits out of the IR value, bits 6 and 0, and
2517      places them side by side, giving a number which is 0, 1, 2 or 3.
2518
2519      termL is a sequence cooked up by GNU superopt.  It converts ix
2520         into an almost correct value NZCV value (incredibly), except
2521         for the case of UN, where it produces 0100 instead of the
2522         required 0011.
2523
2524      termR is therefore a correction term, also computed from ix.  It
2525         is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2526         the final correct value, we subtract termR from termL.
2527
2528      Don't take my word for it.  There's a test program at the bottom
2529      of this file, to try this out with.
2530   */
2531   assign(
2532      ix,
2533      binop(Iop_Or32,
2534            binop(Iop_And32,
2535                  binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
2536                  mkU32(3)),
2537            binop(Iop_And32, mkexpr(irRes), mkU32(1))));
2538
2539   assign(
2540      termL,
2541      binop(Iop_Add32,
2542            binop(Iop_Shr32,
2543                  binop(Iop_Sub32,
2544                        binop(Iop_Shl32,
2545                              binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
2546                              mkU8(30)),
2547                        mkU32(1)),
2548                  mkU8(29)),
2549            mkU32(1)));
2550
2551   assign(
2552      termR,
2553      binop(Iop_And32,
2554            binop(Iop_And32,
2555                  mkexpr(ix),
2556                  binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
2557            mkU32(1)));
2558
2559   assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
2560   return nzcv;
2561}
2562
2563
2564/* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
2565   updatesC is non-NULL, a boolean is written to it indicating whether
2566   or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
2567*/
2568static UInt thumbExpandImm ( Bool* updatesC,
2569                             UInt imm1, UInt imm3, UInt imm8 )
2570{
2571   vassert(imm1 < (1<<1));
2572   vassert(imm3 < (1<<3));
2573   vassert(imm8 < (1<<8));
2574   UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
2575   UInt abcdefgh = imm8;
2576   UInt lbcdefgh = imm8 | 0x80;
2577   if (updatesC) {
2578      *updatesC = i_imm3_a >= 8;
2579   }
2580   switch (i_imm3_a) {
2581      case 0: case 1:
2582         return abcdefgh;
2583      case 2: case 3:
2584         return (abcdefgh << 16) | abcdefgh;
2585      case 4: case 5:
2586         return (abcdefgh << 24) | (abcdefgh << 8);
2587      case 6: case 7:
2588         return (abcdefgh << 24) | (abcdefgh << 16)
2589                | (abcdefgh << 8) | abcdefgh;
2590      case 8 ... 31:
2591         return lbcdefgh << (32 - i_imm3_a);
2592      default:
2593         break;
2594   }
2595   /*NOTREACHED*/vassert(0);
2596}
2597
2598
2599/* Version of thumbExpandImm where we simply feed it the
2600   instruction halfwords (the lowest addressed one is I0). */
2601static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
2602                                        UShort i0s, UShort i1s )
2603{
2604   UInt i0    = (UInt)i0s;
2605   UInt i1    = (UInt)i1s;
2606   UInt imm1  = SLICE_UInt(i0,10,10);
2607   UInt imm3  = SLICE_UInt(i1,14,12);
2608   UInt imm8  = SLICE_UInt(i1,7,0);
2609   return thumbExpandImm(updatesC, imm1, imm3, imm8);
2610}
2611
2612
2613/* Thumb16 only.  Given the firstcond and mask fields from an IT
2614   instruction, compute the 32-bit ITSTATE value implied, as described
2615   in libvex_guest_arm.h.  This is not the ARM ARM representation.
2616   Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
2617   disassembly printing.  Returns False if firstcond or mask
2618   denote something invalid.
2619
2620   The number and conditions for the instructions to be
2621   conditionalised depend on firstcond and mask:
2622
2623   mask      cond 1    cond 2      cond 3      cond 4
2624
2625   1000      fc[3:0]
2626   x100      fc[3:0]   fc[3:1]:x
2627   xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
2628   xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
2629
2630   The condition fields are assembled in *itstate backwards (cond 4 at
2631   the top, cond 1 at the bottom).  Conditions are << 4'd and then
2632   ^0xE'd, and those fields that correspond to instructions in the IT
2633   block are tagged with a 1 bit.
2634*/
2635static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
2636                              /*OUT*/HChar* ch1,
2637                              /*OUT*/HChar* ch2,
2638                              /*OUT*/HChar* ch3,
2639                              UInt firstcond, UInt mask )
2640{
2641   vassert(firstcond <= 0xF);
2642   vassert(mask <= 0xF);
2643   *itstate = 0;
2644   *ch1 = *ch2 = *ch3 = '.';
2645   if (mask == 0)
2646      return False; /* the logic below actually ensures this anyway,
2647                       but clearer to make it explicit. */
2648   if (firstcond == 0xF)
2649      return False; /* NV is not allowed */
2650   if (firstcond == 0xE && popcount32(mask) != 1)
2651      return False; /* if firstcond is AL then all the rest must be too */
2652
2653   UInt m3 = (mask >> 3) & 1;
2654   UInt m2 = (mask >> 2) & 1;
2655   UInt m1 = (mask >> 1) & 1;
2656   UInt m0 = (mask >> 0) & 1;
2657
2658   UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
2659   UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
2660
2661   if (m3 == 1 && (m2|m1|m0) == 0) {
2662      *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
2663      *itstate ^= 0xE0E0E0E0;
2664      return True;
2665   }
2666
2667   if (m2 == 1 && (m1|m0) == 0) {
2668      *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
2669      *itstate ^= 0xE0E0E0E0;
2670      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2671      return True;
2672   }
2673
2674   if (m1 == 1 && m0 == 0) {
2675      *itstate = (ni << 24)
2676                 | (setbit32(fc, 4, m2) << 16)
2677                 | (setbit32(fc, 4, m3) << 8) | fc;
2678      *itstate ^= 0xE0E0E0E0;
2679      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2680      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2681      return True;
2682   }
2683
2684   if (m0 == 1) {
2685      *itstate = (setbit32(fc, 4, m1) << 24)
2686                 | (setbit32(fc, 4, m2) << 16)
2687                 | (setbit32(fc, 4, m3) << 8) | fc;
2688      *itstate ^= 0xE0E0E0E0;
2689      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2690      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2691      *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
2692      return True;
2693   }
2694
2695   return False;
2696}
2697
2698
2699/* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
2700   Chapter 7 Section 1. */
2701static IRTemp gen_BITREV ( IRTemp x0 )
2702{
2703   IRTemp x1 = newTemp(Ity_I32);
2704   IRTemp x2 = newTemp(Ity_I32);
2705   IRTemp x3 = newTemp(Ity_I32);
2706   IRTemp x4 = newTemp(Ity_I32);
2707   IRTemp x5 = newTemp(Ity_I32);
2708   UInt   c1 = 0x55555555;
2709   UInt   c2 = 0x33333333;
2710   UInt   c3 = 0x0F0F0F0F;
2711   UInt   c4 = 0x00FF00FF;
2712   UInt   c5 = 0x0000FFFF;
2713   assign(x1,
2714          binop(Iop_Or32,
2715                binop(Iop_Shl32,
2716                      binop(Iop_And32, mkexpr(x0), mkU32(c1)),
2717                      mkU8(1)),
2718                binop(Iop_Shr32,
2719                      binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
2720                      mkU8(1))
2721   ));
2722   assign(x2,
2723          binop(Iop_Or32,
2724                binop(Iop_Shl32,
2725                      binop(Iop_And32, mkexpr(x1), mkU32(c2)),
2726                      mkU8(2)),
2727                binop(Iop_Shr32,
2728                      binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
2729                      mkU8(2))
2730   ));
2731   assign(x3,
2732          binop(Iop_Or32,
2733                binop(Iop_Shl32,
2734                      binop(Iop_And32, mkexpr(x2), mkU32(c3)),
2735                      mkU8(4)),
2736                binop(Iop_Shr32,
2737                      binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
2738                      mkU8(4))
2739   ));
2740   assign(x4,
2741          binop(Iop_Or32,
2742                binop(Iop_Shl32,
2743                      binop(Iop_And32, mkexpr(x3), mkU32(c4)),
2744                      mkU8(8)),
2745                binop(Iop_Shr32,
2746                      binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
2747                      mkU8(8))
2748   ));
2749   assign(x5,
2750          binop(Iop_Or32,
2751                binop(Iop_Shl32,
2752                      binop(Iop_And32, mkexpr(x4), mkU32(c5)),
2753                      mkU8(16)),
2754                binop(Iop_Shr32,
2755                      binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
2756                      mkU8(16))
2757   ));
2758   return x5;
2759}
2760
2761
2762/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2763   0:1:2:3 (aka byte-swap). */
2764static IRTemp gen_REV ( IRTemp arg )
2765{
2766   IRTemp res = newTemp(Ity_I32);
2767   assign(res,
2768          binop(Iop_Or32,
2769                binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
2770          binop(Iop_Or32,
2771                binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2772                                 mkU32(0x00FF0000)),
2773          binop(Iop_Or32,
2774                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2775                                       mkU32(0x0000FF00)),
2776                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
2777                                       mkU32(0x000000FF) )
2778   ))));
2779   return res;
2780}
2781
2782
2783/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2784   2:3:0:1 (swap within lo and hi halves). */
2785static IRTemp gen_REV16 ( IRTemp arg )
2786{
2787   IRTemp res = newTemp(Ity_I32);
2788   assign(res,
2789          binop(Iop_Or32,
2790                binop(Iop_And32,
2791                      binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2792                      mkU32(0xFF00FF00)),
2793                binop(Iop_And32,
2794                      binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2795                      mkU32(0x00FF00FF))));
2796   return res;
2797}
2798
2799
2800/*------------------------------------------------------------*/
2801/*--- Advanced SIMD (NEON) instructions                    ---*/
2802/*------------------------------------------------------------*/
2803
2804/*------------------------------------------------------------*/
2805/*--- NEON data processing                                 ---*/
2806/*------------------------------------------------------------*/
2807
2808/* For all NEON DP ops, we use the normal scheme to handle conditional
2809   writes to registers -- pass in condT and hand that on to the
2810   put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
2811   since NEON is unconditional for ARM.  In Thumb mode condT is
2812   derived from the ITSTATE shift register in the normal way. */
2813
2814static
2815UInt get_neon_d_regno(UInt theInstr)
2816{
2817   UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2818   if (theInstr & 0x40) {
2819      if (x & 1) {
2820         x = x + 0x100;
2821      } else {
2822         x = x >> 1;
2823      }
2824   }
2825   return x;
2826}
2827
2828static
2829UInt get_neon_n_regno(UInt theInstr)
2830{
2831   UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
2832   if (theInstr & 0x40) {
2833      if (x & 1) {
2834         x = x + 0x100;
2835      } else {
2836         x = x >> 1;
2837      }
2838   }
2839   return x;
2840}
2841
2842static
2843UInt get_neon_m_regno(UInt theInstr)
2844{
2845   UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2846   if (theInstr & 0x40) {
2847      if (x & 1) {
2848         x = x + 0x100;
2849      } else {
2850         x = x >> 1;
2851      }
2852   }
2853   return x;
2854}
2855
2856static
2857Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
2858{
2859   UInt dreg = get_neon_d_regno(theInstr);
2860   UInt mreg = get_neon_m_regno(theInstr);
2861   UInt nreg = get_neon_n_regno(theInstr);
2862   UInt imm4 = (theInstr >> 8) & 0xf;
2863   UInt Q = (theInstr >> 6) & 1;
2864   HChar reg_t = Q ? 'q' : 'd';
2865
2866   if (Q) {
2867      putQReg(dreg, triop(Iop_SliceV128, /*hiV128*/getQReg(mreg),
2868                          /*loV128*/getQReg(nreg), mkU8(imm4)), condT);
2869   } else {
2870      putDRegI64(dreg, triop(Iop_Slice64, /*hiI64*/getDRegI64(mreg),
2871                             /*loI64*/getDRegI64(nreg), mkU8(imm4)), condT);
2872   }
2873   DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg,
2874                                         reg_t, mreg, imm4);
2875   return True;
2876}
2877
2878/* Generate specific vector FP binary ops, possibly with a fake
2879   rounding mode as required by the primop. */
2880static
2881IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
2882{
2883   switch (op) {
2884      case Iop_Add32Fx4:
2885      case Iop_Sub32Fx4:
2886      case Iop_Mul32Fx4:
2887         return triop(op, get_FAKE_roundingmode(), argL, argR );
2888      case Iop_Add32x4: case Iop_Add16x8:
2889      case Iop_Sub32x4: case Iop_Sub16x8:
2890      case Iop_Mul32x4: case Iop_Mul16x8:
2891      case Iop_Mul32x2: case Iop_Mul16x4:
2892      case Iop_Add32Fx2:
2893      case Iop_Sub32Fx2:
2894      case Iop_Mul32Fx2:
2895      case Iop_PwAdd32Fx2:
2896         return binop(op, argL, argR);
2897      default:
2898        ppIROp(op);
2899        vassert(0);
2900   }
2901}
2902
2903/* VTBL, VTBX */
2904static
2905Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
2906{
2907   UInt op = (theInstr >> 6) & 1;
2908   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
2909   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
2910   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
2911   UInt len = (theInstr >> 8) & 3;
2912   Int i;
2913   IROp cmp;
2914   ULong imm;
2915   IRTemp arg_l;
2916   IRTemp old_mask, new_mask, cur_mask;
2917   IRTemp old_res, new_res;
2918   IRTemp old_arg, new_arg;
2919
2920   if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
2921      return False;
2922   if (nreg + len > 31)
2923      return False;
2924
2925   cmp = Iop_CmpGT8Ux8;
2926
2927   old_mask = newTemp(Ity_I64);
2928   old_res = newTemp(Ity_I64);
2929   old_arg = newTemp(Ity_I64);
2930   assign(old_mask, mkU64(0));
2931   assign(old_res, mkU64(0));
2932   assign(old_arg, getDRegI64(mreg));
2933   imm = 8;
2934   imm = (imm <<  8) | imm;
2935   imm = (imm << 16) | imm;
2936   imm = (imm << 32) | imm;
2937
2938   for (i = 0; i <= len; i++) {
2939      arg_l = newTemp(Ity_I64);
2940      new_mask = newTemp(Ity_I64);
2941      cur_mask = newTemp(Ity_I64);
2942      new_res = newTemp(Ity_I64);
2943      new_arg = newTemp(Ity_I64);
2944      assign(arg_l, getDRegI64(nreg+i));
2945      assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
2946      assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
2947      assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
2948      assign(new_res, binop(Iop_Or64,
2949                            mkexpr(old_res),
2950                            binop(Iop_And64,
2951                                  binop(Iop_Perm8x8,
2952                                        mkexpr(arg_l),
2953                                        binop(Iop_And64,
2954                                              mkexpr(old_arg),
2955                                              mkexpr(cur_mask))),
2956                                  mkexpr(cur_mask))));
2957
2958      old_arg = new_arg;
2959      old_mask = new_mask;
2960      old_res = new_res;
2961   }
2962   if (op) {
2963      new_res = newTemp(Ity_I64);
2964      assign(new_res, binop(Iop_Or64,
2965                            binop(Iop_And64,
2966                                  getDRegI64(dreg),
2967                                  unop(Iop_Not64, mkexpr(old_mask))),
2968                            mkexpr(old_res)));
2969      old_res = new_res;
2970   }
2971
2972   putDRegI64(dreg, mkexpr(old_res), condT);
2973   DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
2974   if (len > 0) {
2975      DIP("d%u-d%u", nreg, nreg + len);
2976   } else {
2977      DIP("d%u", nreg);
2978   }
2979   DIP("}, d%u\n", mreg);
2980   return True;
2981}
2982
2983/* VDUP (scalar)  */
2984static
2985Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
2986{
2987   UInt Q = (theInstr >> 6) & 1;
2988   UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2989   UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2990   UInt imm4 = (theInstr >> 16) & 0xF;
2991   UInt index;
2992   UInt size;
2993   IRTemp arg_m;
2994   IRTemp res;
2995   IROp op, op2;
2996
2997   if ((imm4 == 0) || (imm4 == 8))
2998      return False;
2999   if ((Q == 1) && ((dreg & 1) == 1))
3000      return False;
3001   if (Q)
3002      dreg >>= 1;
3003   arg_m = newTemp(Ity_I64);
3004   assign(arg_m, getDRegI64(mreg));
3005   if (Q)
3006      res = newTemp(Ity_V128);
3007   else
3008      res = newTemp(Ity_I64);
3009   if ((imm4 & 1) == 1) {
3010      op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
3011      op2 = Iop_GetElem8x8;
3012      index = imm4 >> 1;
3013      size = 8;
3014   } else if ((imm4 & 3) == 2) {
3015      op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
3016      op2 = Iop_GetElem16x4;
3017      index = imm4 >> 2;
3018      size = 16;
3019   } else if ((imm4 & 7) == 4) {
3020      op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
3021      op2 = Iop_GetElem32x2;
3022      index = imm4 >> 3;
3023      size = 32;
3024   } else {
3025      return False; // can this ever happen?
3026   }
3027   assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
3028   if (Q) {
3029      putQReg(dreg, mkexpr(res), condT);
3030   } else {
3031      putDRegI64(dreg, mkexpr(res), condT);
3032   }
3033   DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
3034   return True;
3035}
3036
3037/* A7.4.1 Three registers of the same length */
3038static
3039Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
3040{
3041   UInt Q = (theInstr >> 6) & 1;
3042   UInt dreg = get_neon_d_regno(theInstr);
3043   UInt nreg = get_neon_n_regno(theInstr);
3044   UInt mreg = get_neon_m_regno(theInstr);
3045   UInt A = (theInstr >> 8) & 0xF;
3046   UInt B = (theInstr >> 4) & 1;
3047   UInt C = (theInstr >> 20) & 0x3;
3048   UInt U = (theInstr >> 24) & 1;
3049   UInt size = C;
3050
3051   IRTemp arg_n;
3052   IRTemp arg_m;
3053   IRTemp res;
3054
3055   if (Q) {
3056      arg_n = newTemp(Ity_V128);
3057      arg_m = newTemp(Ity_V128);
3058      res = newTemp(Ity_V128);
3059      assign(arg_n, getQReg(nreg));
3060      assign(arg_m, getQReg(mreg));
3061   } else {
3062      arg_n = newTemp(Ity_I64);
3063      arg_m = newTemp(Ity_I64);
3064      res = newTemp(Ity_I64);
3065      assign(arg_n, getDRegI64(nreg));
3066      assign(arg_m, getDRegI64(mreg));
3067   }
3068
3069   switch(A) {
3070      case 0:
3071         if (B == 0) {
3072            /* VHADD */
3073            ULong imm = 0;
3074            IRExpr *imm_val;
3075            IROp addOp;
3076            IROp andOp;
3077            IROp shOp;
3078            HChar regType = Q ? 'q' : 'd';
3079
3080            if (size == 3)
3081               return False;
3082            switch(size) {
3083               case 0: imm = 0x101010101010101LL; break;
3084               case 1: imm = 0x1000100010001LL; break;
3085               case 2: imm = 0x100000001LL; break;
3086               default: vassert(0);
3087            }
3088            if (Q) {
3089               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3090               andOp = Iop_AndV128;
3091            } else {
3092               imm_val = mkU64(imm);
3093               andOp = Iop_And64;
3094            }
3095            if (U) {
3096               switch(size) {
3097                  case 0:
3098                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3099                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3100                     break;
3101                  case 1:
3102                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3103                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3104                     break;
3105                  case 2:
3106                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3107                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3108                     break;
3109                  default:
3110                     vassert(0);
3111               }
3112            } else {
3113               switch(size) {
3114                  case 0:
3115                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3116                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3117                     break;
3118                  case 1:
3119                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3120                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3121                     break;
3122                  case 2:
3123                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3124                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3125                     break;
3126                  default:
3127                     vassert(0);
3128               }
3129            }
3130            assign(res,
3131                   binop(addOp,
3132                         binop(addOp,
3133                               binop(shOp, mkexpr(arg_m), mkU8(1)),
3134                               binop(shOp, mkexpr(arg_n), mkU8(1))),
3135                         binop(shOp,
3136                               binop(addOp,
3137                                     binop(andOp, mkexpr(arg_m), imm_val),
3138                                     binop(andOp, mkexpr(arg_n), imm_val)),
3139                               mkU8(1))));
3140            DIP("vhadd.%c%d %c%d, %c%d, %c%d\n",
3141                U ? 'u' : 's', 8 << size, regType,
3142                dreg, regType, nreg, regType, mreg);
3143         } else {
3144            /* VQADD */
3145            IROp op, op2;
3146            IRTemp tmp;
3147            HChar reg_t = Q ? 'q' : 'd';
3148            if (Q) {
3149               switch (size) {
3150                  case 0:
3151                     op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
3152                     op2 = Iop_Add8x16;
3153                     break;
3154                  case 1:
3155                     op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
3156                     op2 = Iop_Add16x8;
3157                     break;
3158                  case 2:
3159                     op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
3160                     op2 = Iop_Add32x4;
3161                     break;
3162                  case 3:
3163                     op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
3164                     op2 = Iop_Add64x2;
3165                     break;
3166                  default:
3167                     vassert(0);
3168               }
3169            } else {
3170               switch (size) {
3171                  case 0:
3172                     op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
3173                     op2 = Iop_Add8x8;
3174                     break;
3175                  case 1:
3176                     op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
3177                     op2 = Iop_Add16x4;
3178                     break;
3179                  case 2:
3180                     op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
3181                     op2 = Iop_Add32x2;
3182                     break;
3183                  case 3:
3184                     op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
3185                     op2 = Iop_Add64;
3186                     break;
3187                  default:
3188                     vassert(0);
3189               }
3190            }
3191            if (Q) {
3192               tmp = newTemp(Ity_V128);
3193            } else {
3194               tmp = newTemp(Ity_I64);
3195            }
3196            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3197            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3198            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3199            DIP("vqadd.%c%d %c%d, %c%d, %c%d\n",
3200                U ? 'u' : 's',
3201                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3202         }
3203         break;
3204      case 1:
3205         if (B == 0) {
3206            /* VRHADD */
3207            /* VRHADD C, A, B ::=
3208                 C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
3209            IROp shift_op, add_op;
3210            IRTemp cc;
3211            ULong one = 1;
3212            HChar reg_t = Q ? 'q' : 'd';
3213            switch (size) {
3214               case 0: one = (one <<  8) | one; /* fall through */
3215               case 1: one = (one << 16) | one; /* fall through */
3216               case 2: one = (one << 32) | one; break;
3217               case 3: return False;
3218               default: vassert(0);
3219            }
3220            if (Q) {
3221               switch (size) {
3222                  case 0:
3223                     shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
3224                     add_op = Iop_Add8x16;
3225                     break;
3226                  case 1:
3227                     shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
3228                     add_op = Iop_Add16x8;
3229                     break;
3230                  case 2:
3231                     shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
3232                     add_op = Iop_Add32x4;
3233                     break;
3234                  case 3:
3235                     return False;
3236                  default:
3237                     vassert(0);
3238               }
3239            } else {
3240               switch (size) {
3241                  case 0:
3242                     shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
3243                     add_op = Iop_Add8x8;
3244                     break;
3245                  case 1:
3246                     shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
3247                     add_op = Iop_Add16x4;
3248                     break;
3249                  case 2:
3250                     shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
3251                     add_op = Iop_Add32x2;
3252                     break;
3253                  case 3:
3254                     return False;
3255                  default:
3256                     vassert(0);
3257               }
3258            }
3259            if (Q) {
3260               cc = newTemp(Ity_V128);
3261               assign(cc, binop(shift_op,
3262                                binop(add_op,
3263                                      binop(add_op,
3264                                            binop(Iop_AndV128,
3265                                                  mkexpr(arg_n),
3266                                                  binop(Iop_64HLtoV128,
3267                                                        mkU64(one),
3268                                                        mkU64(one))),
3269                                            binop(Iop_AndV128,
3270                                                  mkexpr(arg_m),
3271                                                  binop(Iop_64HLtoV128,
3272                                                        mkU64(one),
3273                                                        mkU64(one)))),
3274                                      binop(Iop_64HLtoV128,
3275                                            mkU64(one),
3276                                            mkU64(one))),
3277                                mkU8(1)));
3278               assign(res, binop(add_op,
3279                                 binop(add_op,
3280                                       binop(shift_op,
3281                                             mkexpr(arg_n),
3282                                             mkU8(1)),
3283                                       binop(shift_op,
3284                                             mkexpr(arg_m),
3285                                             mkU8(1))),
3286                                 mkexpr(cc)));
3287            } else {
3288               cc = newTemp(Ity_I64);
3289               assign(cc, binop(shift_op,
3290                                binop(add_op,
3291                                      binop(add_op,
3292                                            binop(Iop_And64,
3293                                                  mkexpr(arg_n),
3294                                                  mkU64(one)),
3295                                            binop(Iop_And64,
3296                                                  mkexpr(arg_m),
3297                                                  mkU64(one))),
3298                                      mkU64(one)),
3299                                mkU8(1)));
3300               assign(res, binop(add_op,
3301                                 binop(add_op,
3302                                       binop(shift_op,
3303                                             mkexpr(arg_n),
3304                                             mkU8(1)),
3305                                       binop(shift_op,
3306                                             mkexpr(arg_m),
3307                                             mkU8(1))),
3308                                 mkexpr(cc)));
3309            }
3310            DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n",
3311                U ? 'u' : 's',
3312                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3313         } else {
3314            if (U == 0)  {
3315               switch(C) {
3316                  case 0: {
3317                     /* VAND  */
3318                     HChar reg_t = Q ? 'q' : 'd';
3319                     if (Q) {
3320                        assign(res, binop(Iop_AndV128, mkexpr(arg_n),
3321                                                       mkexpr(arg_m)));
3322                     } else {
3323                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3324                                                     mkexpr(arg_m)));
3325                     }
3326                     DIP("vand %c%d, %c%d, %c%d\n",
3327                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3328                     break;
3329                  }
3330                  case 1: {
3331                     /* VBIC  */
3332                     HChar reg_t = Q ? 'q' : 'd';
3333                     if (Q) {
3334                        assign(res, binop(Iop_AndV128,mkexpr(arg_n),
3335                               unop(Iop_NotV128, mkexpr(arg_m))));
3336                     } else {
3337                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3338                               unop(Iop_Not64, mkexpr(arg_m))));
3339                     }
3340                     DIP("vbic %c%d, %c%d, %c%d\n",
3341                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3342                     break;
3343                  }
3344                  case 2:
3345                     if ( nreg != mreg) {
3346                        /* VORR  */
3347                        HChar reg_t = Q ? 'q' : 'd';
3348                        if (Q) {
3349                           assign(res, binop(Iop_OrV128, mkexpr(arg_n),
3350                                                         mkexpr(arg_m)));
3351                        } else {
3352                           assign(res, binop(Iop_Or64, mkexpr(arg_n),
3353                                                       mkexpr(arg_m)));
3354                        }
3355                        DIP("vorr %c%d, %c%d, %c%d\n",
3356                            reg_t, dreg, reg_t, nreg, reg_t, mreg);
3357                     } else {
3358                        /* VMOV  */
3359                        HChar reg_t = Q ? 'q' : 'd';
3360                        assign(res, mkexpr(arg_m));
3361                        DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg);
3362                     }
3363                     break;
3364                  case 3:{
3365                     /* VORN  */
3366                     HChar reg_t = Q ? 'q' : 'd';
3367                     if (Q) {
3368                        assign(res, binop(Iop_OrV128,mkexpr(arg_n),
3369                               unop(Iop_NotV128, mkexpr(arg_m))));
3370                     } else {
3371                        assign(res, binop(Iop_Or64, mkexpr(arg_n),
3372                               unop(Iop_Not64, mkexpr(arg_m))));
3373                     }
3374                     DIP("vorn %c%d, %c%d, %c%d\n",
3375                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3376                     break;
3377                  }
3378               }
3379            } else {
3380               switch(C) {
3381                  case 0:
3382                     /* VEOR (XOR)  */
3383                     if (Q) {
3384                        assign(res, binop(Iop_XorV128, mkexpr(arg_n),
3385                                                       mkexpr(arg_m)));
3386                     } else {
3387                        assign(res, binop(Iop_Xor64, mkexpr(arg_n),
3388                                                     mkexpr(arg_m)));
3389                     }
3390                     DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
3391                           Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3392                     break;
3393                  case 1:
3394                     /* VBSL  */
3395                     if (Q) {
3396                        IRTemp reg_d = newTemp(Ity_V128);
3397                        assign(reg_d, getQReg(dreg));
3398                        assign(res,
3399                               binop(Iop_OrV128,
3400                                     binop(Iop_AndV128, mkexpr(arg_n),
3401                                                        mkexpr(reg_d)),
3402                                     binop(Iop_AndV128,
3403                                           mkexpr(arg_m),
3404                                           unop(Iop_NotV128,
3405                                                 mkexpr(reg_d)) ) ) );
3406                     } else {
3407                        IRTemp reg_d = newTemp(Ity_I64);
3408                        assign(reg_d, getDRegI64(dreg));
3409                        assign(res,
3410                               binop(Iop_Or64,
3411                                     binop(Iop_And64, mkexpr(arg_n),
3412                                                      mkexpr(reg_d)),
3413                                     binop(Iop_And64,
3414                                           mkexpr(arg_m),
3415                                           unop(Iop_Not64, mkexpr(reg_d)))));
3416                     }
3417                     DIP("vbsl %c%u, %c%u, %c%u\n",
3418                         Q ? 'q' : 'd', dreg,
3419                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3420                     break;
3421                  case 2:
3422                     /* VBIT  */
3423                     if (Q) {
3424                        IRTemp reg_d = newTemp(Ity_V128);
3425                        assign(reg_d, getQReg(dreg));
3426                        assign(res,
3427                               binop(Iop_OrV128,
3428                                     binop(Iop_AndV128, mkexpr(arg_n),
3429                                                        mkexpr(arg_m)),
3430                                     binop(Iop_AndV128,
3431                                           mkexpr(reg_d),
3432                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3433                     } else {
3434                        IRTemp reg_d = newTemp(Ity_I64);
3435                        assign(reg_d, getDRegI64(dreg));
3436                        assign(res,
3437                               binop(Iop_Or64,
3438                                     binop(Iop_And64, mkexpr(arg_n),
3439                                                      mkexpr(arg_m)),
3440                                     binop(Iop_And64,
3441                                           mkexpr(reg_d),
3442                                           unop(Iop_Not64, mkexpr(arg_m)))));
3443                     }
3444                     DIP("vbit %c%u, %c%u, %c%u\n",
3445                         Q ? 'q' : 'd', dreg,
3446                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3447                     break;
3448                  case 3:
3449                     /* VBIF  */
3450                     if (Q) {
3451                        IRTemp reg_d = newTemp(Ity_V128);
3452                        assign(reg_d, getQReg(dreg));
3453                        assign(res,
3454                               binop(Iop_OrV128,
3455                                     binop(Iop_AndV128, mkexpr(reg_d),
3456                                                        mkexpr(arg_m)),
3457                                     binop(Iop_AndV128,
3458                                           mkexpr(arg_n),
3459                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3460                     } else {
3461                        IRTemp reg_d = newTemp(Ity_I64);
3462                        assign(reg_d, getDRegI64(dreg));
3463                        assign(res,
3464                               binop(Iop_Or64,
3465                                     binop(Iop_And64, mkexpr(reg_d),
3466                                                      mkexpr(arg_m)),
3467                                     binop(Iop_And64,
3468                                           mkexpr(arg_n),
3469                                           unop(Iop_Not64, mkexpr(arg_m)))));
3470                     }
3471                     DIP("vbif %c%u, %c%u, %c%u\n",
3472                         Q ? 'q' : 'd', dreg,
3473                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3474                     break;
3475               }
3476            }
3477         }
3478         break;
3479      case 2:
3480         if (B == 0) {
3481            /* VHSUB */
3482            /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
3483            ULong imm = 0;
3484            IRExpr *imm_val;
3485            IROp subOp;
3486            IROp notOp;
3487            IROp andOp;
3488            IROp shOp;
3489            if (size == 3)
3490               return False;
3491            switch(size) {
3492               case 0: imm = 0x101010101010101LL; break;
3493               case 1: imm = 0x1000100010001LL; break;
3494               case 2: imm = 0x100000001LL; break;
3495               default: vassert(0);
3496            }
3497            if (Q) {
3498               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3499               andOp = Iop_AndV128;
3500               notOp = Iop_NotV128;
3501            } else {
3502               imm_val = mkU64(imm);
3503               andOp = Iop_And64;
3504               notOp = Iop_Not64;
3505            }
3506            if (U) {
3507               switch(size) {
3508                  case 0:
3509                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3510                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3511                     break;
3512                  case 1:
3513                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3514                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3515                     break;
3516                  case 2:
3517                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3518                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3519                     break;
3520                  default:
3521                     vassert(0);
3522               }
3523            } else {
3524               switch(size) {
3525                  case 0:
3526                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3527                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3528                     break;
3529                  case 1:
3530                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3531                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3532                     break;
3533                  case 2:
3534                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3535                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3536                     break;
3537                  default:
3538                     vassert(0);
3539               }
3540            }
3541            assign(res,
3542                   binop(subOp,
3543                         binop(subOp,
3544                               binop(shOp, mkexpr(arg_n), mkU8(1)),
3545                               binop(shOp, mkexpr(arg_m), mkU8(1))),
3546                         binop(andOp,
3547                               binop(andOp,
3548                                     unop(notOp, mkexpr(arg_n)),
3549                                     mkexpr(arg_m)),
3550                               imm_val)));
3551            DIP("vhsub.%c%u %c%u, %c%u, %c%u\n",
3552                U ? 'u' : 's', 8 << size,
3553                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3554                mreg);
3555         } else {
3556            /* VQSUB */
3557            IROp op, op2;
3558            IRTemp tmp;
3559            if (Q) {
3560               switch (size) {
3561                  case 0:
3562                     op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
3563                     op2 = Iop_Sub8x16;
3564                     break;
3565                  case 1:
3566                     op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
3567                     op2 = Iop_Sub16x8;
3568                     break;
3569                  case 2:
3570                     op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
3571                     op2 = Iop_Sub32x4;
3572                     break;
3573                  case 3:
3574                     op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
3575                     op2 = Iop_Sub64x2;
3576                     break;
3577                  default:
3578                     vassert(0);
3579               }
3580            } else {
3581               switch (size) {
3582                  case 0:
3583                     op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
3584                     op2 = Iop_Sub8x8;
3585                     break;
3586                  case 1:
3587                     op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
3588                     op2 = Iop_Sub16x4;
3589                     break;
3590                  case 2:
3591                     op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
3592                     op2 = Iop_Sub32x2;
3593                     break;
3594                  case 3:
3595                     op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
3596                     op2 = Iop_Sub64;
3597                     break;
3598                  default:
3599                     vassert(0);
3600               }
3601            }
3602            if (Q)
3603               tmp = newTemp(Ity_V128);
3604            else
3605               tmp = newTemp(Ity_I64);
3606            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3607            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3608            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3609            DIP("vqsub.%c%u %c%u, %c%u, %c%u\n",
3610                U ? 'u' : 's', 8 << size,
3611                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3612                mreg);
3613         }
3614         break;
3615      case 3: {
3616            IROp op;
3617            if (Q) {
3618               switch (size) {
3619                  case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
3620                  case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
3621                  case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
3622                  case 3: return False;
3623                  default: vassert(0);
3624               }
3625            } else {
3626               switch (size) {
3627                  case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
3628                  case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
3629                  case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
3630                  case 3: return False;
3631                  default: vassert(0);
3632               }
3633            }
3634            if (B == 0) {
3635               /* VCGT  */
3636               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3637               DIP("vcgt.%c%u %c%u, %c%u, %c%u\n",
3638                   U ? 'u' : 's', 8 << size,
3639                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3640                   mreg);
3641            } else {
3642               /* VCGE  */
3643               /* VCGE res, argn, argm
3644                    is equal to
3645                  VCGT tmp, argm, argn
3646                  VNOT res, tmp */
3647               assign(res,
3648                      unop(Q ? Iop_NotV128 : Iop_Not64,
3649                           binop(op, mkexpr(arg_m), mkexpr(arg_n))));
3650               DIP("vcge.%c%u %c%u, %c%u, %c%u\n",
3651                   U ? 'u' : 's', 8 << size,
3652                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3653                   mreg);
3654            }
3655         }
3656         break;
3657      case 4:
3658         if (B == 0) {
3659            /* VSHL */
3660            IROp op = Iop_INVALID, sub_op = Iop_INVALID;
3661            IRTemp tmp = IRTemp_INVALID;
3662            if (U) {
3663               switch (size) {
3664                  case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
3665                  case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
3666                  case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
3667                  case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
3668                  default: vassert(0);
3669               }
3670            } else {
3671               tmp = newTemp(Q ? Ity_V128 : Ity_I64);
3672               switch (size) {
3673                  case 0:
3674                     op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3675                     sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3676                     break;
3677                  case 1:
3678                     op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3679                     sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3680                     break;
3681                  case 2:
3682                     op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3683                     sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3684                     break;
3685                  case 3:
3686                     op = Q ? Iop_Sar64x2 : Iop_Sar64;
3687                     sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
3688                     break;
3689                  default:
3690                     vassert(0);
3691               }
3692            }
3693            if (U) {
3694               if (!Q && (size == 3))
3695                  assign(res, binop(op, mkexpr(arg_m),
3696                                        unop(Iop_64to8, mkexpr(arg_n))));
3697               else
3698                  assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3699            } else {
3700               if (Q)
3701                  assign(tmp, binop(sub_op,
3702                                    binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
3703                                    mkexpr(arg_n)));
3704               else
3705                  assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
3706               if (!Q && (size == 3))
3707                  assign(res, binop(op, mkexpr(arg_m),
3708                                        unop(Iop_64to8, mkexpr(tmp))));
3709               else
3710                  assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
3711            }
3712            DIP("vshl.%c%u %c%u, %c%u, %c%u\n",
3713                U ? 'u' : 's', 8 << size,
3714                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3715                nreg);
3716         } else {
3717            /* VQSHL */
3718            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
3719            IRTemp tmp, shval, mask, old_shval;
3720            UInt i;
3721            ULong esize;
3722            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3723            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3724            if (U) {
3725               switch (size) {
3726                  case 0:
3727                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
3728                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
3729                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3730                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3731                     break;
3732                  case 1:
3733                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
3734                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
3735                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3736                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3737                     break;
3738                  case 2:
3739                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
3740                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
3741                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3742                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3743                     break;
3744                  case 3:
3745                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
3746                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
3747                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3748                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3749                     break;
3750                  default:
3751                     vassert(0);
3752               }
3753            } else {
3754               switch (size) {
3755                  case 0:
3756                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
3757                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3758                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3759                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3760                     break;
3761                  case 1:
3762                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
3763                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3764                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3765                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3766                     break;
3767                  case 2:
3768                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
3769                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3770                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3771                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3772                     break;
3773                  case 3:
3774                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
3775                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
3776                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3777                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3778                     break;
3779                  default:
3780                     vassert(0);
3781               }
3782            }
3783            if (Q) {
3784               tmp = newTemp(Ity_V128);
3785               shval = newTemp(Ity_V128);
3786               mask = newTemp(Ity_V128);
3787            } else {
3788               tmp = newTemp(Ity_I64);
3789               shval = newTemp(Ity_I64);
3790               mask = newTemp(Ity_I64);
3791            }
3792            assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3793            /* Only least significant byte from second argument is used.
3794               Copy this byte to the whole vector element. */
3795            assign(shval, binop(op_shrn,
3796                                binop(op_shln,
3797                                       mkexpr(arg_n),
3798                                       mkU8((8 << size) - 8)),
3799                                mkU8((8 << size) - 8)));
3800            for(i = 0; i < size; i++) {
3801               old_shval = shval;
3802               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3803               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3804                                   mkexpr(old_shval),
3805                                   binop(op_shln,
3806                                         mkexpr(old_shval),
3807                                         mkU8(8 << i))));
3808            }
3809            /* If shift is greater or equal to the element size and
3810               element is non-zero, then QC flag should be set. */
3811            esize = (8 << size) - 1;
3812            esize = (esize <<  8) | esize;
3813            esize = (esize << 16) | esize;
3814            esize = (esize << 32) | esize;
3815            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3816                             binop(cmp_gt, mkexpr(shval),
3817                                           Q ? mkU128(esize) : mkU64(esize)),
3818                             unop(cmp_neq, mkexpr(arg_m))),
3819                       Q ? mkU128(0) : mkU64(0),
3820                       Q, condT);
3821            /* Othervise QC flag should be set if shift value is positive and
3822               result beign rightshifted the same value is not equal to left
3823               argument. */
3824            assign(mask, binop(cmp_gt, mkexpr(shval),
3825                                       Q ? mkU128(0) : mkU64(0)));
3826            if (!Q && size == 3)
3827               assign(tmp, binop(op_rev, mkexpr(res),
3828                                         unop(Iop_64to8, mkexpr(arg_n))));
3829            else
3830               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
3831            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3832                             mkexpr(tmp), mkexpr(mask)),
3833                       binop(Q ? Iop_AndV128 : Iop_And64,
3834                             mkexpr(arg_m), mkexpr(mask)),
3835                       Q, condT);
3836            DIP("vqshl.%c%u %c%u, %c%u, %c%u\n",
3837                U ? 'u' : 's', 8 << size,
3838                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3839                nreg);
3840         }
3841         break;
3842      case 5:
3843         if (B == 0) {
3844            /* VRSHL */
3845            IROp op, op_shrn, op_shln, cmp_gt, op_add;
3846            IRTemp shval, old_shval, imm_val, round;
3847            UInt i;
3848            ULong imm;
3849            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3850            imm = 1L;
3851            switch (size) {
3852               case 0: imm = (imm <<  8) | imm; /* fall through */
3853               case 1: imm = (imm << 16) | imm; /* fall through */
3854               case 2: imm = (imm << 32) | imm; /* fall through */
3855               case 3: break;
3856               default: vassert(0);
3857            }
3858            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3859            round = newTemp(Q ? Ity_V128 : Ity_I64);
3860            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3861            if (U) {
3862               switch (size) {
3863                  case 0:
3864                     op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
3865                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3866                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3867                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3868                     break;
3869                  case 1:
3870                     op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
3871                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3872                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3873                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3874                     break;
3875                  case 2:
3876                     op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
3877                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3878                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3879                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3880                     break;
3881                  case 3:
3882                     op = Q ? Iop_Shl64x2 : Iop_Shl64;
3883                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3884                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3885                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3886                     break;
3887                  default:
3888                     vassert(0);
3889               }
3890            } else {
3891               switch (size) {
3892                  case 0:
3893                     op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
3894                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3895                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3896                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3897                     break;
3898                  case 1:
3899                     op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
3900                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3901                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3902                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3903                     break;
3904                  case 2:
3905                     op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
3906                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3907                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3908                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3909                     break;
3910                  case 3:
3911                     op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
3912                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3913                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3914                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3915                     break;
3916                  default:
3917                     vassert(0);
3918               }
3919            }
3920            if (Q) {
3921               shval = newTemp(Ity_V128);
3922            } else {
3923               shval = newTemp(Ity_I64);
3924            }
3925            /* Only least significant byte from second argument is used.
3926               Copy this byte to the whole vector element. */
3927            assign(shval, binop(op_shrn,
3928                                binop(op_shln,
3929                                       mkexpr(arg_n),
3930                                       mkU8((8 << size) - 8)),
3931                                mkU8((8 << size) - 8)));
3932            for (i = 0; i < size; i++) {
3933               old_shval = shval;
3934               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3935               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3936                                   mkexpr(old_shval),
3937                                   binop(op_shln,
3938                                         mkexpr(old_shval),
3939                                         mkU8(8 << i))));
3940            }
3941            /* Compute the result */
3942            if (!Q && size == 3 && U) {
3943               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3944                                   binop(op,
3945                                         mkexpr(arg_m),
3946                                         unop(Iop_64to8,
3947                                              binop(op_add,
3948                                                    mkexpr(arg_n),
3949                                                    mkexpr(imm_val)))),
3950                                   binop(Q ? Iop_AndV128 : Iop_And64,
3951                                         mkexpr(imm_val),
3952                                         binop(cmp_gt,
3953                                               Q ? mkU128(0) : mkU64(0),
3954                                               mkexpr(arg_n)))));
3955               assign(res, binop(op_add,
3956                                 binop(op,
3957                                       mkexpr(arg_m),
3958                                       unop(Iop_64to8, mkexpr(arg_n))),
3959                                 mkexpr(round)));
3960            } else {
3961               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3962                                   binop(op,
3963                                         mkexpr(arg_m),
3964                                         binop(op_add,
3965                                               mkexpr(arg_n),
3966                                               mkexpr(imm_val))),
3967                                   binop(Q ? Iop_AndV128 : Iop_And64,
3968                                         mkexpr(imm_val),
3969                                         binop(cmp_gt,
3970                                               Q ? mkU128(0) : mkU64(0),
3971                                               mkexpr(arg_n)))));
3972               assign(res, binop(op_add,
3973                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
3974                                 mkexpr(round)));
3975            }
3976            DIP("vrshl.%c%u %c%u, %c%u, %c%u\n",
3977                U ? 'u' : 's', 8 << size,
3978                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3979                nreg);
3980         } else {
3981            /* VQRSHL */
3982            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
3983            IRTemp tmp, shval, mask, old_shval, imm_val, round;
3984            UInt i;
3985            ULong esize, imm;
3986            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3987            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3988            imm = 1L;
3989            switch (size) {
3990               case 0: imm = (imm <<  8) | imm; /* fall through */
3991               case 1: imm = (imm << 16) | imm; /* fall through */
3992               case 2: imm = (imm << 32) | imm; /* fall through */
3993               case 3: break;
3994               default: vassert(0);
3995            }
3996            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3997            round = newTemp(Q ? Ity_V128 : Ity_I64);
3998            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3999            if (U) {
4000               switch (size) {
4001                  case 0:
4002                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
4003                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4004                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
4005                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4006                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4007                     break;
4008                  case 1:
4009                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
4010                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4011                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
4012                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4013                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4014                     break;
4015                  case 2:
4016                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
4017                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4018                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
4019                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4020                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4021                     break;
4022                  case 3:
4023                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
4024                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
4025                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
4026                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4027                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4028                     break;
4029                  default:
4030                     vassert(0);
4031               }
4032            } else {
4033               switch (size) {
4034                  case 0:
4035                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
4036                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4037                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
4038                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4039                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4040                     break;
4041                  case 1:
4042                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
4043                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4044                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
4045                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4046                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4047                     break;
4048                  case 2:
4049                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
4050                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4051                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
4052                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4053                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4054                     break;
4055                  case 3:
4056                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
4057                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
4058                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
4059                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4060                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4061                     break;
4062                  default:
4063                     vassert(0);
4064               }
4065            }
4066            if (Q) {
4067               tmp = newTemp(Ity_V128);
4068               shval = newTemp(Ity_V128);
4069               mask = newTemp(Ity_V128);
4070            } else {
4071               tmp = newTemp(Ity_I64);
4072               shval = newTemp(Ity_I64);
4073               mask = newTemp(Ity_I64);
4074            }
4075            /* Only least significant byte from second argument is used.
4076               Copy this byte to the whole vector element. */
4077            assign(shval, binop(op_shrn,
4078                                binop(op_shln,
4079                                       mkexpr(arg_n),
4080                                       mkU8((8 << size) - 8)),
4081                                mkU8((8 << size) - 8)));
4082            for (i = 0; i < size; i++) {
4083               old_shval = shval;
4084               shval = newTemp(Q ? Ity_V128 : Ity_I64);
4085               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
4086                                   mkexpr(old_shval),
4087                                   binop(op_shln,
4088                                         mkexpr(old_shval),
4089                                         mkU8(8 << i))));
4090            }
4091            /* Compute the result */
4092            assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4093                                binop(op,
4094                                      mkexpr(arg_m),
4095                                      binop(op_add,
4096                                            mkexpr(arg_n),
4097                                            mkexpr(imm_val))),
4098                                binop(Q ? Iop_AndV128 : Iop_And64,
4099                                      mkexpr(imm_val),
4100                                      binop(cmp_gt,
4101                                            Q ? mkU128(0) : mkU64(0),
4102                                            mkexpr(arg_n)))));
4103            assign(res, binop(op_add,
4104                              binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4105                              mkexpr(round)));
4106            /* If shift is greater or equal to the element size and element is
4107               non-zero, then QC flag should be set. */
4108            esize = (8 << size) - 1;
4109            esize = (esize <<  8) | esize;
4110            esize = (esize << 16) | esize;
4111            esize = (esize << 32) | esize;
4112            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4113                             binop(cmp_gt, mkexpr(shval),
4114                                           Q ? mkU128(esize) : mkU64(esize)),
4115                             unop(cmp_neq, mkexpr(arg_m))),
4116                       Q ? mkU128(0) : mkU64(0),
4117                       Q, condT);
4118            /* Othervise QC flag should be set if shift value is positive and
4119               result beign rightshifted the same value is not equal to left
4120               argument. */
4121            assign(mask, binop(cmp_gt, mkexpr(shval),
4122                               Q ? mkU128(0) : mkU64(0)));
4123            if (!Q && size == 3)
4124               assign(tmp, binop(op_rev, mkexpr(res),
4125                                         unop(Iop_64to8, mkexpr(arg_n))));
4126            else
4127               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
4128            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4129                             mkexpr(tmp), mkexpr(mask)),
4130                       binop(Q ? Iop_AndV128 : Iop_And64,
4131                             mkexpr(arg_m), mkexpr(mask)),
4132                       Q, condT);
4133            DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n",
4134                U ? 'u' : 's', 8 << size,
4135                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4136                nreg);
4137         }
4138         break;
4139      case 6:
4140         /* VMAX, VMIN  */
4141         if (B == 0) {
4142            /* VMAX */
4143            IROp op;
4144            if (U == 0) {
4145               switch (size) {
4146                  case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
4147                  case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
4148                  case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
4149                  case 3: return False;
4150                  default: vassert(0);
4151               }
4152            } else {
4153               switch (size) {
4154                  case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
4155                  case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
4156                  case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
4157                  case 3: return False;
4158                  default: vassert(0);
4159               }
4160            }
4161            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4162            DIP("vmax.%c%u %c%u, %c%u, %c%u\n",
4163                U ? 'u' : 's', 8 << size,
4164                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4165                mreg);
4166         } else {
4167            /* VMIN */
4168            IROp op;
4169            if (U == 0) {
4170               switch (size) {
4171                  case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
4172                  case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
4173                  case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
4174                  case 3: return False;
4175                  default: vassert(0);
4176               }
4177            } else {
4178               switch (size) {
4179                  case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
4180                  case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
4181                  case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
4182                  case 3: return False;
4183                  default: vassert(0);
4184               }
4185            }
4186            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4187            DIP("vmin.%c%u %c%u, %c%u, %c%u\n",
4188                U ? 'u' : 's', 8 << size,
4189                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4190                mreg);
4191         }
4192         break;
4193      case 7:
4194         if (B == 0) {
4195            /* VABD */
4196            IROp op_cmp, op_sub;
4197            IRTemp cond;
4198            if ((theInstr >> 23) & 1) {
4199               vpanic("VABDL should not be in dis_neon_data_3same\n");
4200            }
4201            if (Q) {
4202               switch (size) {
4203                  case 0:
4204                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4205                     op_sub = Iop_Sub8x16;
4206                     break;
4207                  case 1:
4208                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4209                     op_sub = Iop_Sub16x8;
4210                     break;
4211                  case 2:
4212                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4213                     op_sub = Iop_Sub32x4;
4214                     break;
4215                  case 3:
4216                     return False;
4217                  default:
4218                     vassert(0);
4219               }
4220            } else {
4221               switch (size) {
4222                  case 0:
4223                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4224                     op_sub = Iop_Sub8x8;
4225                     break;
4226                  case 1:
4227                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4228                     op_sub = Iop_Sub16x4;
4229                     break;
4230                  case 2:
4231                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4232                     op_sub = Iop_Sub32x2;
4233                     break;
4234                  case 3:
4235                     return False;
4236                  default:
4237                     vassert(0);
4238               }
4239            }
4240            if (Q) {
4241               cond = newTemp(Ity_V128);
4242            } else {
4243               cond = newTemp(Ity_I64);
4244            }
4245            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4246            assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
4247                              binop(Q ? Iop_AndV128 : Iop_And64,
4248                                    binop(op_sub, mkexpr(arg_n),
4249                                                  mkexpr(arg_m)),
4250                                    mkexpr(cond)),
4251                              binop(Q ? Iop_AndV128 : Iop_And64,
4252                                    binop(op_sub, mkexpr(arg_m),
4253                                                  mkexpr(arg_n)),
4254                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4255                                         mkexpr(cond)))));
4256            DIP("vabd.%c%u %c%u, %c%u, %c%u\n",
4257                U ? 'u' : 's', 8 << size,
4258                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4259                mreg);
4260         } else {
4261            /* VABA */
4262            IROp op_cmp, op_sub, op_add;
4263            IRTemp cond, acc, tmp;
4264            if ((theInstr >> 23) & 1) {
4265               vpanic("VABAL should not be in dis_neon_data_3same");
4266            }
4267            if (Q) {
4268               switch (size) {
4269                  case 0:
4270                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4271                     op_sub = Iop_Sub8x16;
4272                     op_add = Iop_Add8x16;
4273                     break;
4274                  case 1:
4275                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4276                     op_sub = Iop_Sub16x8;
4277                     op_add = Iop_Add16x8;
4278                     break;
4279                  case 2:
4280                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4281                     op_sub = Iop_Sub32x4;
4282                     op_add = Iop_Add32x4;
4283                     break;
4284                  case 3:
4285                     return False;
4286                  default:
4287                     vassert(0);
4288               }
4289            } else {
4290               switch (size) {
4291                  case 0:
4292                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4293                     op_sub = Iop_Sub8x8;
4294                     op_add = Iop_Add8x8;
4295                     break;
4296                  case 1:
4297                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4298                     op_sub = Iop_Sub16x4;
4299                     op_add = Iop_Add16x4;
4300                     break;
4301                  case 2:
4302                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4303                     op_sub = Iop_Sub32x2;
4304                     op_add = Iop_Add32x2;
4305                     break;
4306                  case 3:
4307                     return False;
4308                  default:
4309                     vassert(0);
4310               }
4311            }
4312            if (Q) {
4313               cond = newTemp(Ity_V128);
4314               acc = newTemp(Ity_V128);
4315               tmp = newTemp(Ity_V128);
4316               assign(acc, getQReg(dreg));
4317            } else {
4318               cond = newTemp(Ity_I64);
4319               acc = newTemp(Ity_I64);
4320               tmp = newTemp(Ity_I64);
4321               assign(acc, getDRegI64(dreg));
4322            }
4323            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4324            assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
4325                              binop(Q ? Iop_AndV128 : Iop_And64,
4326                                    binop(op_sub, mkexpr(arg_n),
4327                                                  mkexpr(arg_m)),
4328                                    mkexpr(cond)),
4329                              binop(Q ? Iop_AndV128 : Iop_And64,
4330                                    binop(op_sub, mkexpr(arg_m),
4331                                                  mkexpr(arg_n)),
4332                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4333                                         mkexpr(cond)))));
4334            assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
4335            DIP("vaba.%c%u %c%u, %c%u, %c%u\n",
4336                U ? 'u' : 's', 8 << size,
4337                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4338                mreg);
4339         }
4340         break;
4341      case 8:
4342         if (B == 0) {
4343            IROp op;
4344            if (U == 0) {
4345               /* VADD  */
4346               switch (size) {
4347                  case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
4348                  case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
4349                  case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
4350                  case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
4351                  default: vassert(0);
4352               }
4353               DIP("vadd.i%u %c%u, %c%u, %c%u\n",
4354                   8 << size, Q ? 'q' : 'd',
4355                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4356            } else {
4357               /* VSUB  */
4358               switch (size) {
4359                  case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
4360                  case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
4361                  case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
4362                  case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
4363                  default: vassert(0);
4364               }
4365               DIP("vsub.i%u %c%u, %c%u, %c%u\n",
4366                   8 << size, Q ? 'q' : 'd',
4367                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4368            }
4369            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4370         } else {
4371            IROp op;
4372            switch (size) {
4373               case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
4374               case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
4375               case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
4376               case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
4377               default: vassert(0);
4378            }
4379            if (U == 0) {
4380               /* VTST  */
4381               assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
4382                                          mkexpr(arg_n),
4383                                          mkexpr(arg_m))));
4384               DIP("vtst.%u %c%u, %c%u, %c%u\n",
4385                   8 << size, Q ? 'q' : 'd',
4386                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4387            } else {
4388               /* VCEQ  */
4389               assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
4390                                unop(op,
4391                                     binop(Q ? Iop_XorV128 : Iop_Xor64,
4392                                           mkexpr(arg_n),
4393                                           mkexpr(arg_m)))));
4394               DIP("vceq.i%u %c%u, %c%u, %c%u\n",
4395                   8 << size, Q ? 'q' : 'd',
4396                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4397            }
4398         }
4399         break;
4400      case 9:
4401         if (B == 0) {
4402            /* VMLA, VMLS (integer) */
4403            IROp op, op2;
4404            UInt P = (theInstr >> 24) & 1;
4405            if (P) {
4406               switch (size) {
4407                  case 0:
4408                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4409                     op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
4410                     break;
4411                  case 1:
4412                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4413                     op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
4414                     break;
4415                  case 2:
4416                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4417                     op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
4418                     break;
4419                  case 3:
4420                     return False;
4421                  default:
4422                     vassert(0);
4423               }
4424            } else {
4425               switch (size) {
4426                  case 0:
4427                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4428                     op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
4429                     break;
4430                  case 1:
4431                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4432                     op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
4433                     break;
4434                  case 2:
4435                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4436                     op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
4437                     break;
4438                  case 3:
4439                     return False;
4440                  default:
4441                     vassert(0);
4442               }
4443            }
4444            assign(res, binop(op2,
4445                              Q ? getQReg(dreg) : getDRegI64(dreg),
4446                              binop(op, mkexpr(arg_n), mkexpr(arg_m))));
4447            DIP("vml%c.i%u %c%u, %c%u, %c%u\n",
4448                P ? 's' : 'a', 8 << size,
4449                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4450                mreg);
4451         } else {
4452            /* VMUL */
4453            IROp op;
4454            UInt P = (theInstr >> 24) & 1;
4455            if (P) {
4456               switch (size) {
4457                  case 0:
4458                     op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
4459                     break;
4460                  case 1: case 2: case 3: return False;
4461                  default: vassert(0);
4462               }
4463            } else {
4464               switch (size) {
4465                  case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
4466                  case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
4467                  case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
4468                  case 3: return False;
4469                  default: vassert(0);
4470               }
4471            }
4472            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4473            DIP("vmul.%c%u %c%u, %c%u, %c%u\n",
4474                P ? 'p' : 'i', 8 << size,
4475                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4476                mreg);
4477         }
4478         break;
4479      case 10: {
4480         /* VPMAX, VPMIN  */
4481         UInt P = (theInstr >> 4) & 1;
4482         IROp op;
4483         if (Q)
4484            return False;
4485         if (P) {
4486            switch (size) {
4487               case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
4488               case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
4489               case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
4490               case 3: return False;
4491               default: vassert(0);
4492            }
4493         } else {
4494            switch (size) {
4495               case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
4496               case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
4497               case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
4498               case 3: return False;
4499               default: vassert(0);
4500            }
4501         }
4502         assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4503         DIP("vp%s.%c%u %c%u, %c%u, %c%u\n",
4504             P ? "min" : "max", U ? 'u' : 's',
4505             8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4506             Q ? 'q' : 'd', mreg);
4507         break;
4508      }
4509      case 11:
4510         if (B == 0) {
4511            if (U == 0) {
4512               /* VQDMULH  */
4513               IROp op ,op2;
4514               ULong imm;
4515               switch (size) {
4516                  case 0: case 3:
4517                     return False;
4518                  case 1:
4519                     op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
4520                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4521                     imm = 1LL << 15;
4522                     imm = (imm << 16) | imm;
4523                     imm = (imm << 32) | imm;
4524                     break;
4525                  case 2:
4526                     op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
4527                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4528                     imm = 1LL << 31;
4529                     imm = (imm << 32) | imm;
4530                     break;
4531                  default:
4532                     vassert(0);
4533               }
4534               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4535               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4536                                binop(op2, mkexpr(arg_n),
4537                                           Q ? mkU128(imm) : mkU64(imm)),
4538                                binop(op2, mkexpr(arg_m),
4539                                           Q ? mkU128(imm) : mkU64(imm))),
4540                          Q ? mkU128(0) : mkU64(0),
4541                          Q, condT);
4542               DIP("vqdmulh.s%u %c%u, %c%u, %c%u\n",
4543                   8 << size, Q ? 'q' : 'd',
4544                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4545            } else {
4546               /* VQRDMULH */
4547               IROp op ,op2;
4548               ULong imm;
4549               switch(size) {
4550                  case 0: case 3:
4551                     return False;
4552                  case 1:
4553                     imm = 1LL << 15;
4554                     imm = (imm << 16) | imm;
4555                     imm = (imm << 32) | imm;
4556                     op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
4557                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4558                     break;
4559                  case 2:
4560                     imm = 1LL << 31;
4561                     imm = (imm << 32) | imm;
4562                     op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
4563                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4564                     break;
4565                  default:
4566                     vassert(0);
4567               }
4568               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4569               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4570                                binop(op2, mkexpr(arg_n),
4571                                           Q ? mkU128(imm) : mkU64(imm)),
4572                                binop(op2, mkexpr(arg_m),
4573                                           Q ? mkU128(imm) : mkU64(imm))),
4574                          Q ? mkU128(0) : mkU64(0),
4575                          Q, condT);
4576               DIP("vqrdmulh.s%u %c%u, %c%u, %c%u\n",
4577                   8 << size, Q ? 'q' : 'd',
4578                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4579            }
4580         } else {
4581            if (U == 0) {
4582               /* VPADD */
4583               IROp op;
4584               if (Q)
4585                  return False;
4586               switch (size) {
4587                  case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
4588                  case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
4589                  case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
4590                  case 3: return False;
4591                  default: vassert(0);
4592               }
4593               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4594               DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
4595                   8 << size, Q ? 'q' : 'd',
4596                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4597            }
4598         }
4599         break;
4600      /* Starting from here these are FP SIMD cases */
4601      case 13:
4602         if (B == 0) {
4603            IROp op;
4604            if (U == 0) {
4605               if ((C >> 1) == 0) {
4606                  /* VADD  */
4607                  op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
4608                  DIP("vadd.f32 %c%u, %c%u, %c%u\n",
4609                      Q ? 'q' : 'd', dreg,
4610                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4611               } else {
4612                  /* VSUB  */
4613                  op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
4614                  DIP("vsub.f32 %c%u, %c%u, %c%u\n",
4615                      Q ? 'q' : 'd', dreg,
4616                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4617               }
4618            } else {
4619               if ((C >> 1) == 0) {
4620                  /* VPADD */
4621                  if (Q)
4622                     return False;
4623                  op = Iop_PwAdd32Fx2;
4624                  DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4625               } else {
4626                  /* VABD  */
4627                  if (Q) {
4628                     assign(res, unop(Iop_Abs32Fx4,
4629                                      triop(Iop_Sub32Fx4,
4630                                            get_FAKE_roundingmode(),
4631                                            mkexpr(arg_n),
4632                                            mkexpr(arg_m))));
4633                  } else {
4634                     assign(res, unop(Iop_Abs32Fx2,
4635                                      binop(Iop_Sub32Fx2,
4636                                            mkexpr(arg_n),
4637                                            mkexpr(arg_m))));
4638                  }
4639                  DIP("vabd.f32 %c%u, %c%u, %c%u\n",
4640                      Q ? 'q' : 'd', dreg,
4641                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4642                  break;
4643               }
4644            }
4645            assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4646         } else {
4647            if (U == 0) {
4648               /* VMLA, VMLS  */
4649               IROp op, op2;
4650               UInt P = (theInstr >> 21) & 1;
4651               if (P) {
4652                  switch (size & 1) {
4653                     case 0:
4654                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4655                        op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
4656                        break;
4657                     case 1: return False;
4658                     default: vassert(0);
4659                  }
4660               } else {
4661                  switch (size & 1) {
4662                     case 0:
4663                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4664                        op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
4665                        break;
4666                     case 1: return False;
4667                     default: vassert(0);
4668                  }
4669               }
4670               assign(res, binop_w_fake_RM(
4671                              op2,
4672                              Q ? getQReg(dreg) : getDRegI64(dreg),
4673                              binop_w_fake_RM(op, mkexpr(arg_n),
4674                                                  mkexpr(arg_m))));
4675
4676               DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
4677                   P ? 's' : 'a', Q ? 'q' : 'd',
4678                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4679            } else {
4680               /* VMUL  */
4681               IROp op;
4682               if ((C >> 1) != 0)
4683                  return False;
4684               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
4685               assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4686               DIP("vmul.f32 %c%u, %c%u, %c%u\n",
4687                   Q ? 'q' : 'd', dreg,
4688                   Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4689            }
4690         }
4691         break;
4692      case 14:
4693         if (B == 0) {
4694            if (U == 0) {
4695               if ((C >> 1) == 0) {
4696                  /* VCEQ  */
4697                  IROp op;
4698                  if ((theInstr >> 20) & 1)
4699                     return False;
4700                  op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
4701                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4702                  DIP("vceq.f32 %c%u, %c%u, %c%u\n",
4703                      Q ? 'q' : 'd', dreg,
4704                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4705               } else {
4706                  return False;
4707               }
4708            } else {
4709               if ((C >> 1) == 0) {
4710                  /* VCGE  */
4711                  IROp op;
4712                  if ((theInstr >> 20) & 1)
4713                     return False;
4714                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4715                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4716                  DIP("vcge.f32 %c%u, %c%u, %c%u\n",
4717                      Q ? 'q' : 'd', dreg,
4718                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4719               } else {
4720                  /* VCGT  */
4721                  IROp op;
4722                  if ((theInstr >> 20) & 1)
4723                     return False;
4724                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4725                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4726                  DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
4727                      Q ? 'q' : 'd', dreg,
4728                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4729               }
4730            }
4731         } else {
4732            if (U == 1) {
4733               /* VACGE, VACGT */
4734               UInt op_bit = (theInstr >> 21) & 1;
4735               IROp op, op2;
4736               op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
4737               if (op_bit) {
4738                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4739                  assign(res, binop(op,
4740                                    unop(op2, mkexpr(arg_n)),
4741                                    unop(op2, mkexpr(arg_m))));
4742               } else {
4743                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4744                  assign(res, binop(op,
4745                                    unop(op2, mkexpr(arg_n)),
4746                                    unop(op2, mkexpr(arg_m))));
4747               }
4748               DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
4749                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4750                   Q ? 'q' : 'd', mreg);
4751            }
4752         }
4753         break;
4754      case 15:
4755         if (B == 0) {
4756            if (U == 0) {
4757               /* VMAX, VMIN  */
4758               IROp op;
4759               if ((theInstr >> 20) & 1)
4760                  return False;
4761               if ((theInstr >> 21) & 1) {
4762                  op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
4763                  DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4764                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4765               } else {
4766                  op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
4767                  DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4768                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4769               }
4770               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4771            } else {
4772               /* VPMAX, VPMIN   */
4773               IROp op;
4774               if (Q)
4775                  return False;
4776               if ((theInstr >> 20) & 1)
4777                  return False;
4778               if ((theInstr >> 21) & 1) {
4779                  op = Iop_PwMin32Fx2;
4780                  DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4781               } else {
4782                  op = Iop_PwMax32Fx2;
4783                  DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4784               }
4785               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4786            }
4787         } else {
4788            if (U == 0) {
4789               if ((C >> 1) == 0) {
4790                  /* VRECPS */
4791                  if ((theInstr >> 20) & 1)
4792                     return False;
4793                  assign(res, binop(Q ? Iop_RecipStep32Fx4
4794                                      : Iop_RecipStep32Fx2,
4795                                    mkexpr(arg_n),
4796                                    mkexpr(arg_m)));
4797                  DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4798                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4799               } else {
4800                  /* VRSQRTS  */
4801                  if ((theInstr >> 20) & 1)
4802                     return False;
4803                  assign(res, binop(Q ? Iop_RSqrtStep32Fx4
4804                                      : Iop_RSqrtStep32Fx2,
4805                                    mkexpr(arg_n),
4806                                    mkexpr(arg_m)));
4807                  DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4808                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4809               }
4810            }
4811         }
4812         break;
4813   }
4814
4815   if (Q) {
4816      putQReg(dreg, mkexpr(res), condT);
4817   } else {
4818      putDRegI64(dreg, mkexpr(res), condT);
4819   }
4820
4821   return True;
4822}
4823
4824/* A7.4.2 Three registers of different length */
4825static
4826Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
4827{
4828   UInt A = (theInstr >> 8) & 0xf;
4829   UInt B = (theInstr >> 20) & 3;
4830   UInt U = (theInstr >> 24) & 1;
4831   UInt P = (theInstr >> 9) & 1;
4832   UInt mreg = get_neon_m_regno(theInstr);
4833   UInt nreg = get_neon_n_regno(theInstr);
4834   UInt dreg = get_neon_d_regno(theInstr);
4835   UInt size = B;
4836   ULong imm;
4837   IRTemp res, arg_m, arg_n, cond, tmp;
4838   IROp cvt, cvt2, cmp, op, op2, sh, add;
4839   switch (A) {
4840      case 0: case 1: case 2: case 3:
4841         /* VADDL, VADDW, VSUBL, VSUBW */
4842         if (dreg & 1)
4843            return False;
4844         dreg >>= 1;
4845         size = B;
4846         switch (size) {
4847            case 0:
4848               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4849               op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
4850               break;
4851            case 1:
4852               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4853               op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
4854               break;
4855            case 2:
4856               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4857               op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
4858               break;
4859            case 3:
4860               return False;
4861            default:
4862               vassert(0);
4863         }
4864         arg_n = newTemp(Ity_V128);
4865         arg_m = newTemp(Ity_V128);
4866         if (A & 1) {
4867            if (nreg & 1)
4868               return False;
4869            nreg >>= 1;
4870            assign(arg_n, getQReg(nreg));
4871         } else {
4872            assign(arg_n, unop(cvt, getDRegI64(nreg)));
4873         }
4874         assign(arg_m, unop(cvt, getDRegI64(mreg)));
4875         putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4876                       condT);
4877         DIP("v%s%c.%c%u q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
4878             (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
4879             (A & 1) ? 'q' : 'd', nreg, mreg);
4880         return True;
4881      case 4:
4882         /* VADDHN, VRADDHN */
4883         if (mreg & 1)
4884            return False;
4885         mreg >>= 1;
4886         if (nreg & 1)
4887            return False;
4888         nreg >>= 1;
4889         size = B;
4890         switch (size) {
4891            case 0:
4892               op = Iop_Add16x8;
4893               cvt = Iop_NarrowUn16to8x8;
4894               sh = Iop_ShrN16x8;
4895               imm = 1U << 7;
4896               imm = (imm << 16) | imm;
4897               imm = (imm << 32) | imm;
4898               break;
4899            case 1:
4900               op = Iop_Add32x4;
4901               cvt = Iop_NarrowUn32to16x4;
4902               sh = Iop_ShrN32x4;
4903               imm = 1U << 15;
4904               imm = (imm << 32) | imm;
4905               break;
4906            case 2:
4907               op = Iop_Add64x2;
4908               cvt = Iop_NarrowUn64to32x2;
4909               sh = Iop_ShrN64x2;
4910               imm = 1U << 31;
4911               break;
4912            case 3:
4913               return False;
4914            default:
4915               vassert(0);
4916         }
4917         tmp = newTemp(Ity_V128);
4918         res = newTemp(Ity_V128);
4919         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
4920         if (U) {
4921            /* VRADDHN */
4922            assign(res, binop(op, mkexpr(tmp),
4923                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
4924         } else {
4925            assign(res, mkexpr(tmp));
4926         }
4927         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
4928                    condT);
4929         DIP("v%saddhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
4930             nreg, mreg);
4931         return True;
4932      case 5:
4933         /* VABAL */
4934         if (!((theInstr >> 23) & 1)) {
4935            vpanic("VABA should not be in dis_neon_data_3diff\n");
4936         }
4937         if (dreg & 1)
4938            return False;
4939         dreg >>= 1;
4940         switch (size) {
4941            case 0:
4942               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4943               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4944               cvt2 = Iop_Widen8Sto16x8;
4945               op = Iop_Sub16x8;
4946               op2 = Iop_Add16x8;
4947               break;
4948            case 1:
4949               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4950               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4951               cvt2 = Iop_Widen16Sto32x4;
4952               op = Iop_Sub32x4;
4953               op2 = Iop_Add32x4;
4954               break;
4955            case 2:
4956               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4957               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4958               cvt2 = Iop_Widen32Sto64x2;
4959               op = Iop_Sub64x2;
4960               op2 = Iop_Add64x2;
4961               break;
4962            case 3:
4963               return False;
4964            default:
4965               vassert(0);
4966         }
4967         arg_n = newTemp(Ity_V128);
4968         arg_m = newTemp(Ity_V128);
4969         cond = newTemp(Ity_V128);
4970         res = newTemp(Ity_V128);
4971         assign(arg_n, unop(cvt, getDRegI64(nreg)));
4972         assign(arg_m, unop(cvt, getDRegI64(mreg)));
4973         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
4974                                            getDRegI64(mreg))));
4975         assign(res, binop(op2,
4976                           binop(Iop_OrV128,
4977                                 binop(Iop_AndV128,
4978                                       binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4979                                       mkexpr(cond)),
4980                                 binop(Iop_AndV128,
4981                                       binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4982                                       unop(Iop_NotV128, mkexpr(cond)))),
4983                           getQReg(dreg)));
4984         putQReg(dreg, mkexpr(res), condT);
4985         DIP("vabal.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
4986             nreg, mreg);
4987         return True;
4988      case 6:
4989         /* VSUBHN, VRSUBHN */
4990         if (mreg & 1)
4991            return False;
4992         mreg >>= 1;
4993         if (nreg & 1)
4994            return False;
4995         nreg >>= 1;
4996         size = B;
4997         switch (size) {
4998            case 0:
4999               op = Iop_Sub16x8;
5000               op2 = Iop_Add16x8;
5001               cvt = Iop_NarrowUn16to8x8;
5002               sh = Iop_ShrN16x8;
5003               imm = 1U << 7;
5004               imm = (imm << 16) | imm;
5005               imm = (imm << 32) | imm;
5006               break;
5007            case 1:
5008               op = Iop_Sub32x4;
5009               op2 = Iop_Add32x4;
5010               cvt = Iop_NarrowUn32to16x4;
5011               sh = Iop_ShrN32x4;
5012               imm = 1U << 15;
5013               imm = (imm << 32) | imm;
5014               break;
5015            case 2:
5016               op = Iop_Sub64x2;
5017               op2 = Iop_Add64x2;
5018               cvt = Iop_NarrowUn64to32x2;
5019               sh = Iop_ShrN64x2;
5020               imm = 1U << 31;
5021               break;
5022            case 3:
5023               return False;
5024            default:
5025               vassert(0);
5026         }
5027         tmp = newTemp(Ity_V128);
5028         res = newTemp(Ity_V128);
5029         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
5030         if (U) {
5031            /* VRSUBHN */
5032            assign(res, binop(op2, mkexpr(tmp),
5033                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
5034         } else {
5035            assign(res, mkexpr(tmp));
5036         }
5037         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
5038                    condT);
5039         DIP("v%ssubhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
5040             nreg, mreg);
5041         return True;
5042      case 7:
5043         /* VABDL */
5044         if (!((theInstr >> 23) & 1)) {
5045            vpanic("VABL should not be in dis_neon_data_3diff\n");
5046         }
5047         if (dreg & 1)
5048            return False;
5049         dreg >>= 1;
5050         switch (size) {
5051            case 0:
5052               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5053               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5054               cvt2 = Iop_Widen8Sto16x8;
5055               op = Iop_Sub16x8;
5056               break;
5057            case 1:
5058               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5059               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5060               cvt2 = Iop_Widen16Sto32x4;
5061               op = Iop_Sub32x4;
5062               break;
5063            case 2:
5064               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5065               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5066               cvt2 = Iop_Widen32Sto64x2;
5067               op = Iop_Sub64x2;
5068               break;
5069            case 3:
5070               return False;
5071            default:
5072               vassert(0);
5073         }
5074         arg_n = newTemp(Ity_V128);
5075         arg_m = newTemp(Ity_V128);
5076         cond = newTemp(Ity_V128);
5077         res = newTemp(Ity_V128);
5078         assign(arg_n, unop(cvt, getDRegI64(nreg)));
5079         assign(arg_m, unop(cvt, getDRegI64(mreg)));
5080         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5081                                            getDRegI64(mreg))));
5082         assign(res, binop(Iop_OrV128,
5083                           binop(Iop_AndV128,
5084                                 binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5085                                 mkexpr(cond)),
5086                           binop(Iop_AndV128,
5087                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5088                                 unop(Iop_NotV128, mkexpr(cond)))));
5089         putQReg(dreg, mkexpr(res), condT);
5090         DIP("vabdl.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5091             nreg, mreg);
5092         return True;
5093      case 8:
5094      case 10:
5095         /* VMLAL, VMLSL (integer) */
5096         if (dreg & 1)
5097            return False;
5098         dreg >>= 1;
5099         size = B;
5100         switch (size) {
5101            case 0:
5102               op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5103               op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
5104               break;
5105            case 1:
5106               op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5107               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5108               break;
5109            case 2:
5110               op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5111               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5112               break;
5113            case 3:
5114               return False;
5115            default:
5116               vassert(0);
5117         }
5118         res = newTemp(Ity_V128);
5119         assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
5120         putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5121         DIP("vml%cl.%c%u q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
5122             8 << size, dreg, nreg, mreg);
5123         return True;
5124      case 9:
5125      case 11:
5126         /* VQDMLAL, VQDMLSL */
5127         if (U)
5128            return False;
5129         if (dreg & 1)
5130            return False;
5131         dreg >>= 1;
5132         size = B;
5133         switch (size) {
5134            case 0: case 3:
5135               return False;
5136            case 1:
5137               op = Iop_QDMull16Sx4;
5138               cmp = Iop_CmpEQ16x4;
5139               add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5140               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5141               imm = 1LL << 15;
5142               imm = (imm << 16) | imm;
5143               imm = (imm << 32) | imm;
5144               break;
5145            case 2:
5146               op = Iop_QDMull32Sx2;
5147               cmp = Iop_CmpEQ32x2;
5148               add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5149               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5150               imm = 1LL << 31;
5151               imm = (imm << 32) | imm;
5152               break;
5153            default:
5154               vassert(0);
5155         }
5156         res = newTemp(Ity_V128);
5157         tmp = newTemp(Ity_V128);
5158         assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
5159         assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5160         setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5161                    True, condT);
5162         setFlag_QC(binop(Iop_And64,
5163                          binop(cmp, getDRegI64(nreg), mkU64(imm)),
5164                          binop(cmp, getDRegI64(mreg), mkU64(imm))),
5165                    mkU64(0),
5166                    False, condT);
5167         putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5168         DIP("vqdml%cl.s%u q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
5169             nreg, mreg);
5170         return True;
5171      case 12:
5172      case 14:
5173         /* VMULL (integer or polynomial) */
5174         if (dreg & 1)
5175            return False;
5176         dreg >>= 1;
5177         size = B;
5178         switch (size) {
5179            case 0:
5180               op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5181               if (P)
5182                  op = Iop_PolynomialMull8x8;
5183               break;
5184            case 1:
5185               op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5186               break;
5187            case 2:
5188               op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5189               break;
5190            default:
5191               vassert(0);
5192         }
5193         putQReg(dreg, binop(op, getDRegI64(nreg),
5194                                 getDRegI64(mreg)), condT);
5195         DIP("vmull.%c%u q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
5196               8 << size, dreg, nreg, mreg);
5197         return True;
5198      case 13:
5199         /* VQDMULL */
5200         if (U)
5201            return False;
5202         if (dreg & 1)
5203            return False;
5204         dreg >>= 1;
5205         size = B;
5206         switch (size) {
5207            case 0:
5208            case 3:
5209               return False;
5210            case 1:
5211               op = Iop_QDMull16Sx4;
5212               op2 = Iop_CmpEQ16x4;
5213               imm = 1LL << 15;
5214               imm = (imm << 16) | imm;
5215               imm = (imm << 32) | imm;
5216               break;
5217            case 2:
5218               op = Iop_QDMull32Sx2;
5219               op2 = Iop_CmpEQ32x2;
5220               imm = 1LL << 31;
5221               imm = (imm << 32) | imm;
5222               break;
5223            default:
5224               vassert(0);
5225         }
5226         putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
5227               condT);
5228         setFlag_QC(binop(Iop_And64,
5229                          binop(op2, getDRegI64(nreg), mkU64(imm)),
5230                          binop(op2, getDRegI64(mreg), mkU64(imm))),
5231                    mkU64(0),
5232                    False, condT);
5233         DIP("vqdmull.s%u q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
5234         return True;
5235      default:
5236         return False;
5237   }
5238   return False;
5239}
5240
5241/* A7.4.3 Two registers and a scalar */
5242static
5243Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
5244{
5245#  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
5246   UInt U = INSN(24,24);
5247   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
5248   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
5249   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
5250   UInt size = INSN(21,20);
5251   UInt index;
5252   UInt Q = INSN(24,24);
5253
5254   if (INSN(27,25) != 1 || INSN(23,23) != 1
5255       || INSN(6,6) != 1 || INSN(4,4) != 0)
5256      return False;
5257
5258   /* VMLA, VMLS (scalar)  */
5259   if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
5260      IRTemp res, arg_m, arg_n;
5261      IROp dup, get, op, op2, add, sub;
5262      if (Q) {
5263         if ((dreg & 1) || (nreg & 1))
5264            return False;
5265         dreg >>= 1;
5266         nreg >>= 1;
5267         res = newTemp(Ity_V128);
5268         arg_m = newTemp(Ity_V128);
5269         arg_n = newTemp(Ity_V128);
5270         assign(arg_n, getQReg(nreg));
5271         switch(size) {
5272            case 1:
5273               dup = Iop_Dup16x8;
5274               get = Iop_GetElem16x4;
5275               index = mreg >> 3;
5276               mreg &= 7;
5277               break;
5278            case 2:
5279               dup = Iop_Dup32x4;
5280               get = Iop_GetElem32x2;
5281               index = mreg >> 4;
5282               mreg &= 0xf;
5283               break;
5284            case 0:
5285            case 3:
5286               return False;
5287            default:
5288               vassert(0);
5289         }
5290         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5291      } else {
5292         res = newTemp(Ity_I64);
5293         arg_m = newTemp(Ity_I64);
5294         arg_n = newTemp(Ity_I64);
5295         assign(arg_n, getDRegI64(nreg));
5296         switch(size) {
5297            case 1:
5298               dup = Iop_Dup16x4;
5299               get = Iop_GetElem16x4;
5300               index = mreg >> 3;
5301               mreg &= 7;
5302               break;
5303            case 2:
5304               dup = Iop_Dup32x2;
5305               get = Iop_GetElem32x2;
5306               index = mreg >> 4;
5307               mreg &= 0xf;
5308               break;
5309            case 0:
5310            case 3:
5311               return False;
5312            default:
5313               vassert(0);
5314         }
5315         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5316      }
5317      if (INSN(8,8)) {
5318         switch (size) {
5319            case 2:
5320               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5321               add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
5322               sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
5323               break;
5324            case 0:
5325            case 1:
5326            case 3:
5327               return False;
5328            default:
5329               vassert(0);
5330         }
5331      } else {
5332         switch (size) {
5333            case 1:
5334               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5335               add = Q ? Iop_Add16x8 : Iop_Add16x4;
5336               sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
5337               break;
5338            case 2:
5339               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5340               add = Q ? Iop_Add32x4 : Iop_Add32x2;
5341               sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
5342               break;
5343            case 0:
5344            case 3:
5345               return False;
5346            default:
5347               vassert(0);
5348         }
5349      }
5350      op2 = INSN(10,10) ? sub : add;
5351      assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5352      if (Q)
5353         putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
5354                 condT);
5355      else
5356         putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
5357                    condT);
5358      DIP("vml%c.%c%u %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
5359            INSN(8,8) ? 'f' : 'i', 8 << size,
5360            Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
5361      return True;
5362   }
5363
5364   /* VMLAL, VMLSL (scalar)   */
5365   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
5366      IRTemp res, arg_m, arg_n;
5367      IROp dup, get, op, op2, add, sub;
5368      if (dreg & 1)
5369         return False;
5370      dreg >>= 1;
5371      res = newTemp(Ity_V128);
5372      arg_m = newTemp(Ity_I64);
5373      arg_n = newTemp(Ity_I64);
5374      assign(arg_n, getDRegI64(nreg));
5375      switch(size) {
5376         case 1:
5377            dup = Iop_Dup16x4;
5378            get = Iop_GetElem16x4;
5379            index = mreg >> 3;
5380            mreg &= 7;
5381            break;
5382         case 2:
5383            dup = Iop_Dup32x2;
5384            get = Iop_GetElem32x2;
5385            index = mreg >> 4;
5386            mreg &= 0xf;
5387            break;
5388         case 0:
5389         case 3:
5390            return False;
5391         default:
5392            vassert(0);
5393      }
5394      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5395      switch (size) {
5396         case 1:
5397            op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5398            add = Iop_Add32x4;
5399            sub = Iop_Sub32x4;
5400            break;
5401         case 2:
5402            op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5403            add = Iop_Add64x2;
5404            sub = Iop_Sub64x2;
5405            break;
5406         case 0:
5407         case 3:
5408            return False;
5409         default:
5410            vassert(0);
5411      }
5412      op2 = INSN(10,10) ? sub : add;
5413      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5414      putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5415      DIP("vml%cl.%c%u q%u, d%u, d%u[%u]\n",
5416          INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
5417          8 << size, dreg, nreg, mreg, index);
5418      return True;
5419   }
5420
5421   /* VQDMLAL, VQDMLSL (scalar)  */
5422   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
5423      IRTemp res, arg_m, arg_n, tmp;
5424      IROp dup, get, op, op2, add, cmp;
5425      UInt P = INSN(10,10);
5426      ULong imm;
5427      if (dreg & 1)
5428         return False;
5429      dreg >>= 1;
5430      res = newTemp(Ity_V128);
5431      arg_m = newTemp(Ity_I64);
5432      arg_n = newTemp(Ity_I64);
5433      assign(arg_n, getDRegI64(nreg));
5434      switch(size) {
5435         case 1:
5436            dup = Iop_Dup16x4;
5437            get = Iop_GetElem16x4;
5438            index = mreg >> 3;
5439            mreg &= 7;
5440            break;
5441         case 2:
5442            dup = Iop_Dup32x2;
5443            get = Iop_GetElem32x2;
5444            index = mreg >> 4;
5445            mreg &= 0xf;
5446            break;
5447         case 0:
5448         case 3:
5449            return False;
5450         default:
5451            vassert(0);
5452      }
5453      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5454      switch (size) {
5455         case 0:
5456         case 3:
5457            return False;
5458         case 1:
5459            op = Iop_QDMull16Sx4;
5460            cmp = Iop_CmpEQ16x4;
5461            add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5462            op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5463            imm = 1LL << 15;
5464            imm = (imm << 16) | imm;
5465            imm = (imm << 32) | imm;
5466            break;
5467         case 2:
5468            op = Iop_QDMull32Sx2;
5469            cmp = Iop_CmpEQ32x2;
5470            add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5471            op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5472            imm = 1LL << 31;
5473            imm = (imm << 32) | imm;
5474            break;
5475         default:
5476            vassert(0);
5477      }
5478      res = newTemp(Ity_V128);
5479      tmp = newTemp(Ity_V128);
5480      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5481      assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5482      setFlag_QC(binop(Iop_And64,
5483                       binop(cmp, mkexpr(arg_n), mkU64(imm)),
5484                       binop(cmp, mkexpr(arg_m), mkU64(imm))),
5485                 mkU64(0),
5486                 False, condT);
5487      setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5488                 True, condT);
5489      putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5490      DIP("vqdml%cl.s%u q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
5491          dreg, nreg, mreg, index);
5492      return True;
5493   }
5494
5495   /* VMUL (by scalar)  */
5496   if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
5497      IRTemp res, arg_m, arg_n;
5498      IROp dup, get, op;
5499      if (Q) {
5500         if ((dreg & 1) || (nreg & 1))
5501            return False;
5502         dreg >>= 1;
5503         nreg >>= 1;
5504         res = newTemp(Ity_V128);
5505         arg_m = newTemp(Ity_V128);
5506         arg_n = newTemp(Ity_V128);
5507         assign(arg_n, getQReg(nreg));
5508         switch(size) {
5509            case 1:
5510               dup = Iop_Dup16x8;
5511               get = Iop_GetElem16x4;
5512               index = mreg >> 3;
5513               mreg &= 7;
5514               break;
5515            case 2:
5516               dup = Iop_Dup32x4;
5517               get = Iop_GetElem32x2;
5518               index = mreg >> 4;
5519               mreg &= 0xf;
5520               break;
5521            case 0:
5522            case 3:
5523               return False;
5524            default:
5525               vassert(0);
5526         }
5527         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5528      } else {
5529         res = newTemp(Ity_I64);
5530         arg_m = newTemp(Ity_I64);
5531         arg_n = newTemp(Ity_I64);
5532         assign(arg_n, getDRegI64(nreg));
5533         switch(size) {
5534            case 1:
5535               dup = Iop_Dup16x4;
5536               get = Iop_GetElem16x4;
5537               index = mreg >> 3;
5538               mreg &= 7;
5539               break;
5540            case 2:
5541               dup = Iop_Dup32x2;
5542               get = Iop_GetElem32x2;
5543               index = mreg >> 4;
5544               mreg &= 0xf;
5545               break;
5546            case 0:
5547            case 3:
5548               return False;
5549            default:
5550               vassert(0);
5551         }
5552         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5553      }
5554      if (INSN(8,8)) {
5555         switch (size) {
5556            case 2:
5557               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5558               break;
5559            case 0:
5560            case 1:
5561            case 3:
5562               return False;
5563            default:
5564               vassert(0);
5565         }
5566      } else {
5567         switch (size) {
5568            case 1:
5569               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5570               break;
5571            case 2:
5572               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5573               break;
5574            case 0:
5575            case 3:
5576               return False;
5577            default:
5578               vassert(0);
5579         }
5580      }
5581      assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5582      if (Q)
5583         putQReg(dreg, mkexpr(res), condT);
5584      else
5585         putDRegI64(dreg, mkexpr(res), condT);
5586      DIP("vmul.%c%u %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
5587          8 << size, Q ? 'q' : 'd', dreg,
5588          Q ? 'q' : 'd', nreg, mreg, index);
5589      return True;
5590   }
5591
5592   /* VMULL (scalar) */
5593   if (INSN(11,8) == BITS4(1,0,1,0)) {
5594      IRTemp res, arg_m, arg_n;
5595      IROp dup, get, op;
5596      if (dreg & 1)
5597         return False;
5598      dreg >>= 1;
5599      res = newTemp(Ity_V128);
5600      arg_m = newTemp(Ity_I64);
5601      arg_n = newTemp(Ity_I64);
5602      assign(arg_n, getDRegI64(nreg));
5603      switch(size) {
5604         case 1:
5605            dup = Iop_Dup16x4;
5606            get = Iop_GetElem16x4;
5607            index = mreg >> 3;
5608            mreg &= 7;
5609            break;
5610         case 2:
5611            dup = Iop_Dup32x2;
5612            get = Iop_GetElem32x2;
5613            index = mreg >> 4;
5614            mreg &= 0xf;
5615            break;
5616         case 0:
5617         case 3:
5618            return False;
5619         default:
5620            vassert(0);
5621      }
5622      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5623      switch (size) {
5624         case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
5625         case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
5626         case 0: case 3: return False;
5627         default: vassert(0);
5628      }
5629      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5630      putQReg(dreg, mkexpr(res), condT);
5631      DIP("vmull.%c%u q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
5632          nreg, mreg, index);
5633      return True;
5634   }
5635
5636   /* VQDMULL */
5637   if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
5638      IROp op ,op2, dup, get;
5639      ULong imm;
5640      IRTemp arg_m, arg_n;
5641      if (dreg & 1)
5642         return False;
5643      dreg >>= 1;
5644      arg_m = newTemp(Ity_I64);
5645      arg_n = newTemp(Ity_I64);
5646      assign(arg_n, getDRegI64(nreg));
5647      switch(size) {
5648         case 1:
5649            dup = Iop_Dup16x4;
5650            get = Iop_GetElem16x4;
5651            index = mreg >> 3;
5652            mreg &= 7;
5653            break;
5654         case 2:
5655            dup = Iop_Dup32x2;
5656            get = Iop_GetElem32x2;
5657            index = mreg >> 4;
5658            mreg &= 0xf;
5659            break;
5660         case 0:
5661         case 3:
5662            return False;
5663         default:
5664            vassert(0);
5665      }
5666      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5667      switch (size) {
5668         case 0:
5669         case 3:
5670            return False;
5671         case 1:
5672            op = Iop_QDMull16Sx4;
5673            op2 = Iop_CmpEQ16x4;
5674            imm = 1LL << 15;
5675            imm = (imm << 16) | imm;
5676            imm = (imm << 32) | imm;
5677            break;
5678         case 2:
5679            op = Iop_QDMull32Sx2;
5680            op2 = Iop_CmpEQ32x2;
5681            imm = 1LL << 31;
5682            imm = (imm << 32) | imm;
5683            break;
5684         default:
5685            vassert(0);
5686      }
5687      putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5688            condT);
5689      setFlag_QC(binop(Iop_And64,
5690                       binop(op2, mkexpr(arg_n), mkU64(imm)),
5691                       binop(op2, mkexpr(arg_m), mkU64(imm))),
5692                 mkU64(0),
5693                 False, condT);
5694      DIP("vqdmull.s%u q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
5695          index);
5696      return True;
5697   }
5698
5699   /* VQDMULH */
5700   if (INSN(11,8) == BITS4(1,1,0,0)) {
5701      IROp op ,op2, dup, get;
5702      ULong imm;
5703      IRTemp res, arg_m, arg_n;
5704      if (Q) {
5705         if ((dreg & 1) || (nreg & 1))
5706            return False;
5707         dreg >>= 1;
5708         nreg >>= 1;
5709         res = newTemp(Ity_V128);
5710         arg_m = newTemp(Ity_V128);
5711         arg_n = newTemp(Ity_V128);
5712         assign(arg_n, getQReg(nreg));
5713         switch(size) {
5714            case 1:
5715               dup = Iop_Dup16x8;
5716               get = Iop_GetElem16x4;
5717               index = mreg >> 3;
5718               mreg &= 7;
5719               break;
5720            case 2:
5721               dup = Iop_Dup32x4;
5722               get = Iop_GetElem32x2;
5723               index = mreg >> 4;
5724               mreg &= 0xf;
5725               break;
5726            case 0:
5727            case 3:
5728               return False;
5729            default:
5730               vassert(0);
5731         }
5732         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5733      } else {
5734         res = newTemp(Ity_I64);
5735         arg_m = newTemp(Ity_I64);
5736         arg_n = newTemp(Ity_I64);
5737         assign(arg_n, getDRegI64(nreg));
5738         switch(size) {
5739            case 1:
5740               dup = Iop_Dup16x4;
5741               get = Iop_GetElem16x4;
5742               index = mreg >> 3;
5743               mreg &= 7;
5744               break;
5745            case 2:
5746               dup = Iop_Dup32x2;
5747               get = Iop_GetElem32x2;
5748               index = mreg >> 4;
5749               mreg &= 0xf;
5750               break;
5751            case 0:
5752            case 3:
5753               return False;
5754            default:
5755               vassert(0);
5756         }
5757         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5758      }
5759      switch (size) {
5760         case 0:
5761         case 3:
5762            return False;
5763         case 1:
5764            op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
5765            op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5766            imm = 1LL << 15;
5767            imm = (imm << 16) | imm;
5768            imm = (imm << 32) | imm;
5769            break;
5770         case 2:
5771            op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
5772            op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5773            imm = 1LL << 31;
5774            imm = (imm << 32) | imm;
5775            break;
5776         default:
5777            vassert(0);
5778      }
5779      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5780      setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5781                       binop(op2, mkexpr(arg_n),
5782                                  Q ? mkU128(imm) : mkU64(imm)),
5783                       binop(op2, mkexpr(arg_m),
5784                             Q ? mkU128(imm) : mkU64(imm))),
5785                 Q ? mkU128(0) : mkU64(0),
5786                 Q, condT);
5787      if (Q)
5788         putQReg(dreg, mkexpr(res), condT);
5789      else
5790         putDRegI64(dreg, mkexpr(res), condT);
5791      DIP("vqdmulh.s%u %c%u, %c%u, d%u[%u]\n",
5792          8 << size, Q ? 'q' : 'd', dreg,
5793          Q ? 'q' : 'd', nreg, mreg, index);
5794      return True;
5795   }
5796
5797   /* VQRDMULH (scalar) */
5798   if (INSN(11,8) == BITS4(1,1,0,1)) {
5799      IROp op ,op2, dup, get;
5800      ULong imm;
5801      IRTemp res, arg_m, arg_n;
5802      if (Q) {
5803         if ((dreg & 1) || (nreg & 1))
5804            return False;
5805         dreg >>= 1;
5806         nreg >>= 1;
5807         res = newTemp(Ity_V128);
5808         arg_m = newTemp(Ity_V128);
5809         arg_n = newTemp(Ity_V128);
5810         assign(arg_n, getQReg(nreg));
5811         switch(size) {
5812            case 1:
5813               dup = Iop_Dup16x8;
5814               get = Iop_GetElem16x4;
5815               index = mreg >> 3;
5816               mreg &= 7;
5817               break;
5818            case 2:
5819               dup = Iop_Dup32x4;
5820               get = Iop_GetElem32x2;
5821               index = mreg >> 4;
5822               mreg &= 0xf;
5823               break;
5824            case 0:
5825            case 3:
5826               return False;
5827            default:
5828               vassert(0);
5829         }
5830         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5831      } else {
5832         res = newTemp(Ity_I64);
5833         arg_m = newTemp(Ity_I64);
5834         arg_n = newTemp(Ity_I64);
5835         assign(arg_n, getDRegI64(nreg));
5836         switch(size) {
5837            case 1:
5838               dup = Iop_Dup16x4;
5839               get = Iop_GetElem16x4;
5840               index = mreg >> 3;
5841               mreg &= 7;
5842               break;
5843            case 2:
5844               dup = Iop_Dup32x2;
5845               get = Iop_GetElem32x2;
5846               index = mreg >> 4;
5847               mreg &= 0xf;
5848               break;
5849            case 0:
5850            case 3:
5851               return False;
5852            default:
5853               vassert(0);
5854         }
5855         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5856      }
5857      switch (size) {
5858         case 0:
5859         case 3:
5860            return False;
5861         case 1:
5862            op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
5863            op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5864            imm = 1LL << 15;
5865            imm = (imm << 16) | imm;
5866            imm = (imm << 32) | imm;
5867            break;
5868         case 2:
5869            op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
5870            op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5871            imm = 1LL << 31;
5872            imm = (imm << 32) | imm;
5873            break;
5874         default:
5875            vassert(0);
5876      }
5877      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5878      setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5879                       binop(op2, mkexpr(arg_n),
5880                                  Q ? mkU128(imm) : mkU64(imm)),
5881                       binop(op2, mkexpr(arg_m),
5882                                  Q ? mkU128(imm) : mkU64(imm))),
5883                 Q ? mkU128(0) : mkU64(0),
5884                 Q, condT);
5885      if (Q)
5886         putQReg(dreg, mkexpr(res), condT);
5887      else
5888         putDRegI64(dreg, mkexpr(res), condT);
5889      DIP("vqrdmulh.s%u %c%u, %c%u, d%u[%u]\n",
5890          8 << size, Q ? 'q' : 'd', dreg,
5891          Q ? 'q' : 'd', nreg, mreg, index);
5892      return True;
5893   }
5894
5895   return False;
5896#  undef INSN
5897}
5898
5899/* A7.4.4 Two registers and a shift amount */
5900static
5901Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
5902{
5903   UInt A = (theInstr >> 8) & 0xf;
5904   UInt B = (theInstr >> 6) & 1;
5905   UInt L = (theInstr >> 7) & 1;
5906   UInt U = (theInstr >> 24) & 1;
5907   UInt Q = B;
5908   UInt imm6 = (theInstr >> 16) & 0x3f;
5909   UInt shift_imm;
5910   UInt size = 4;
5911   UInt tmp;
5912   UInt mreg = get_neon_m_regno(theInstr);
5913   UInt dreg = get_neon_d_regno(theInstr);
5914   ULong imm = 0;
5915   IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
5916   IRTemp reg_m, res, mask;
5917
5918   if (L == 0 && ((theInstr >> 19) & 7) == 0)
5919      /* It is one reg and immediate */
5920      return False;
5921
5922   tmp = (L << 6) | imm6;
5923   if (tmp & 0x40) {
5924      size = 3;
5925      shift_imm = 64 - imm6;
5926   } else if (tmp & 0x20) {
5927      size = 2;
5928      shift_imm = 64 - imm6;
5929   } else if (tmp & 0x10) {
5930      size = 1;
5931      shift_imm = 32 - imm6;
5932   } else if (tmp & 0x8) {
5933      size = 0;
5934      shift_imm = 16 - imm6;
5935   } else {
5936      return False;
5937   }
5938
5939   switch (A) {
5940      case 3:
5941      case 2:
5942         /* VRSHR, VRSRA */
5943         if (shift_imm > 0) {
5944            IRExpr *imm_val;
5945            imm = 1L;
5946            switch (size) {
5947               case 0:
5948                  imm = (imm << 8) | imm;
5949                  /* fall through */
5950               case 1:
5951                  imm = (imm << 16) | imm;
5952                  /* fall through */
5953               case 2:
5954                  imm = (imm << 32) | imm;
5955                  /* fall through */
5956               case 3:
5957                  break;
5958               default:
5959                  vassert(0);
5960            }
5961            if (Q) {
5962               reg_m = newTemp(Ity_V128);
5963               res = newTemp(Ity_V128);
5964               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
5965               assign(reg_m, getQReg(mreg));
5966               switch (size) {
5967                  case 0:
5968                     add = Iop_Add8x16;
5969                     op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
5970                     break;
5971                  case 1:
5972                     add = Iop_Add16x8;
5973                     op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
5974                     break;
5975                  case 2:
5976                     add = Iop_Add32x4;
5977                     op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
5978                     break;
5979                  case 3:
5980                     add = Iop_Add64x2;
5981                     op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
5982                     break;
5983                  default:
5984                     vassert(0);
5985               }
5986            } else {
5987               reg_m = newTemp(Ity_I64);
5988               res = newTemp(Ity_I64);
5989               imm_val = mkU64(imm);
5990               assign(reg_m, getDRegI64(mreg));
5991               switch (size) {
5992                  case 0:
5993                     add = Iop_Add8x8;
5994                     op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
5995                     break;
5996                  case 1:
5997                     add = Iop_Add16x4;
5998                     op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
5999                     break;
6000                  case 2:
6001                     add = Iop_Add32x2;
6002                     op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6003                     break;
6004                  case 3:
6005                     add = Iop_Add64;
6006                     op = U ? Iop_Shr64 : Iop_Sar64;
6007                     break;
6008                  default:
6009                     vassert(0);
6010               }
6011            }
6012            assign(res,
6013                   binop(add,
6014                         binop(op,
6015                               mkexpr(reg_m),
6016                               mkU8(shift_imm)),
6017                         binop(Q ? Iop_AndV128 : Iop_And64,
6018                               binop(op,
6019                                     mkexpr(reg_m),
6020                                     mkU8(shift_imm - 1)),
6021                               imm_val)));
6022         } else {
6023            if (Q) {
6024               res = newTemp(Ity_V128);
6025               assign(res, getQReg(mreg));
6026            } else {
6027               res = newTemp(Ity_I64);
6028               assign(res, getDRegI64(mreg));
6029            }
6030         }
6031         if (A == 3) {
6032            if (Q) {
6033               putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6034                             condT);
6035            } else {
6036               putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6037                                condT);
6038            }
6039            DIP("vrsra.%c%u %c%u, %c%u, #%u\n",
6040                U ? 'u' : 's', 8 << size,
6041                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6042         } else {
6043            if (Q) {
6044               putQReg(dreg, mkexpr(res), condT);
6045            } else {
6046               putDRegI64(dreg, mkexpr(res), condT);
6047            }
6048            DIP("vrshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6049                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6050         }
6051         return True;
6052      case 1:
6053      case 0:
6054         /* VSHR, VSRA */
6055         if (Q) {
6056            reg_m = newTemp(Ity_V128);
6057            assign(reg_m, getQReg(mreg));
6058            res = newTemp(Ity_V128);
6059         } else {
6060            reg_m = newTemp(Ity_I64);
6061            assign(reg_m, getDRegI64(mreg));
6062            res = newTemp(Ity_I64);
6063         }
6064         if (Q) {
6065            switch (size) {
6066               case 0:
6067                  op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6068                  add = Iop_Add8x16;
6069                  break;
6070               case 1:
6071                  op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6072                  add = Iop_Add16x8;
6073                  break;
6074               case 2:
6075                  op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6076                  add = Iop_Add32x4;
6077                  break;
6078               case 3:
6079                  op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6080                  add = Iop_Add64x2;
6081                  break;
6082               default:
6083                  vassert(0);
6084            }
6085         } else {
6086            switch (size) {
6087               case 0:
6088                  op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
6089                  add = Iop_Add8x8;
6090                  break;
6091               case 1:
6092                  op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6093                  add = Iop_Add16x4;
6094                  break;
6095               case 2:
6096                  op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6097                  add = Iop_Add32x2;
6098                  break;
6099               case 3:
6100                  op = U ? Iop_Shr64 : Iop_Sar64;
6101                  add = Iop_Add64;
6102                  break;
6103               default:
6104                  vassert(0);
6105            }
6106         }
6107         assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6108         if (A == 1) {
6109            if (Q) {
6110               putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6111                             condT);
6112            } else {
6113               putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6114                                condT);
6115            }
6116            DIP("vsra.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6117                  Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6118         } else {
6119            if (Q) {
6120               putQReg(dreg, mkexpr(res), condT);
6121            } else {
6122               putDRegI64(dreg, mkexpr(res), condT);
6123            }
6124            DIP("vshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6125                  Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6126         }
6127         return True;
6128      case 4:
6129         /* VSRI */
6130         if (!U)
6131            return False;
6132         if (Q) {
6133            res = newTemp(Ity_V128);
6134            mask = newTemp(Ity_V128);
6135         } else {
6136            res = newTemp(Ity_I64);
6137            mask = newTemp(Ity_I64);
6138         }
6139         switch (size) {
6140            case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
6141            case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
6142            case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
6143            case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
6144            default: vassert(0);
6145         }
6146         if (Q) {
6147            assign(mask, binop(op, binop(Iop_64HLtoV128,
6148                                         mkU64(0xFFFFFFFFFFFFFFFFLL),
6149                                         mkU64(0xFFFFFFFFFFFFFFFFLL)),
6150                               mkU8(shift_imm)));
6151            assign(res, binop(Iop_OrV128,
6152                              binop(Iop_AndV128,
6153                                    getQReg(dreg),
6154                                    unop(Iop_NotV128,
6155                                         mkexpr(mask))),
6156                              binop(op,
6157                                    getQReg(mreg),
6158                                    mkU8(shift_imm))));
6159            putQReg(dreg, mkexpr(res), condT);
6160         } else {
6161            assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6162                               mkU8(shift_imm)));
6163            assign(res, binop(Iop_Or64,
6164                              binop(Iop_And64,
6165                                    getDRegI64(dreg),
6166                                    unop(Iop_Not64,
6167                                         mkexpr(mask))),
6168                              binop(op,
6169                                    getDRegI64(mreg),
6170                                    mkU8(shift_imm))));
6171            putDRegI64(dreg, mkexpr(res), condT);
6172         }
6173         DIP("vsri.%u %c%u, %c%u, #%u\n",
6174             8 << size, Q ? 'q' : 'd', dreg,
6175             Q ? 'q' : 'd', mreg, shift_imm);
6176         return True;
6177      case 5:
6178         if (U) {
6179            /* VSLI */
6180            shift_imm = 8 * (1 << size) - shift_imm;
6181            if (Q) {
6182               res = newTemp(Ity_V128);
6183               mask = newTemp(Ity_V128);
6184            } else {
6185               res = newTemp(Ity_I64);
6186               mask = newTemp(Ity_I64);
6187            }
6188            switch (size) {
6189               case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6190               case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6191               case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6192               case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6193               default: vassert(0);
6194            }
6195            if (Q) {
6196               assign(mask, binop(op, binop(Iop_64HLtoV128,
6197                                            mkU64(0xFFFFFFFFFFFFFFFFLL),
6198                                            mkU64(0xFFFFFFFFFFFFFFFFLL)),
6199                                  mkU8(shift_imm)));
6200               assign(res, binop(Iop_OrV128,
6201                                 binop(Iop_AndV128,
6202                                       getQReg(dreg),
6203                                       unop(Iop_NotV128,
6204                                            mkexpr(mask))),
6205                                 binop(op,
6206                                       getQReg(mreg),
6207                                       mkU8(shift_imm))));
6208               putQReg(dreg, mkexpr(res), condT);
6209            } else {
6210               assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6211                                  mkU8(shift_imm)));
6212               assign(res, binop(Iop_Or64,
6213                                 binop(Iop_And64,
6214                                       getDRegI64(dreg),
6215                                       unop(Iop_Not64,
6216                                            mkexpr(mask))),
6217                                 binop(op,
6218                                       getDRegI64(mreg),
6219                                       mkU8(shift_imm))));
6220               putDRegI64(dreg, mkexpr(res), condT);
6221            }
6222            DIP("vsli.%u %c%u, %c%u, #%u\n",
6223                8 << size, Q ? 'q' : 'd', dreg,
6224                Q ? 'q' : 'd', mreg, shift_imm);
6225            return True;
6226         } else {
6227            /* VSHL #imm */
6228            shift_imm = 8 * (1 << size) - shift_imm;
6229            if (Q) {
6230               res = newTemp(Ity_V128);
6231            } else {
6232               res = newTemp(Ity_I64);
6233            }
6234            switch (size) {
6235               case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6236               case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6237               case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6238               case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6239               default: vassert(0);
6240            }
6241            assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
6242                     mkU8(shift_imm)));
6243            if (Q) {
6244               putQReg(dreg, mkexpr(res), condT);
6245            } else {
6246               putDRegI64(dreg, mkexpr(res), condT);
6247            }
6248            DIP("vshl.i%u %c%u, %c%u, #%u\n",
6249                8 << size, Q ? 'q' : 'd', dreg,
6250                Q ? 'q' : 'd', mreg, shift_imm);
6251            return True;
6252         }
6253         break;
6254      case 6:
6255      case 7:
6256         /* VQSHL, VQSHLU */
6257         shift_imm = 8 * (1 << size) - shift_imm;
6258         if (U) {
6259            if (A & 1) {
6260               switch (size) {
6261                  case 0:
6262                     op = Q ? Iop_QShlNsatUU8x16 : Iop_QShlNsatUU8x8;
6263                     op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6264                     break;
6265                  case 1:
6266                     op = Q ? Iop_QShlNsatUU16x8 : Iop_QShlNsatUU16x4;
6267                     op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6268                     break;
6269                  case 2:
6270                     op = Q ? Iop_QShlNsatUU32x4 : Iop_QShlNsatUU32x2;
6271                     op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6272                     break;
6273                  case 3:
6274                     op = Q ? Iop_QShlNsatUU64x2 : Iop_QShlNsatUU64x1;
6275                     op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6276                     break;
6277                  default:
6278                     vassert(0);
6279               }
6280               DIP("vqshl.u%u %c%u, %c%u, #%u\n",
6281                   8 << size,
6282                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6283            } else {
6284               switch (size) {
6285                  case 0:
6286                     op = Q ? Iop_QShlNsatSU8x16 : Iop_QShlNsatSU8x8;
6287                     op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6288                     break;
6289                  case 1:
6290                     op = Q ? Iop_QShlNsatSU16x8 : Iop_QShlNsatSU16x4;
6291                     op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6292                     break;
6293                  case 2:
6294                     op = Q ? Iop_QShlNsatSU32x4 : Iop_QShlNsatSU32x2;
6295                     op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6296                     break;
6297                  case 3:
6298                     op = Q ? Iop_QShlNsatSU64x2 : Iop_QShlNsatSU64x1;
6299                     op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6300                     break;
6301                  default:
6302                     vassert(0);
6303               }
6304               DIP("vqshlu.s%u %c%u, %c%u, #%u\n",
6305                   8 << size,
6306                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6307            }
6308         } else {
6309            if (!(A & 1))
6310               return False;
6311            switch (size) {
6312               case 0:
6313                  op = Q ? Iop_QShlNsatSS8x16 : Iop_QShlNsatSS8x8;
6314                  op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
6315                  break;
6316               case 1:
6317                  op = Q ? Iop_QShlNsatSS16x8 : Iop_QShlNsatSS16x4;
6318                  op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
6319                  break;
6320               case 2:
6321                  op = Q ? Iop_QShlNsatSS32x4 : Iop_QShlNsatSS32x2;
6322                  op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
6323                  break;
6324               case 3:
6325                  op = Q ? Iop_QShlNsatSS64x2 : Iop_QShlNsatSS64x1;
6326                  op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
6327                  break;
6328               default:
6329                  vassert(0);
6330            }
6331            DIP("vqshl.s%u %c%u, %c%u, #%u\n",
6332                8 << size,
6333                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6334         }
6335         if (Q) {
6336            tmp = newTemp(Ity_V128);
6337            res = newTemp(Ity_V128);
6338            reg_m = newTemp(Ity_V128);
6339            assign(reg_m, getQReg(mreg));
6340         } else {
6341            tmp = newTemp(Ity_I64);
6342            res = newTemp(Ity_I64);
6343            reg_m = newTemp(Ity_I64);
6344            assign(reg_m, getDRegI64(mreg));
6345         }
6346         assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6347         assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
6348         setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
6349         if (Q)
6350            putQReg(dreg, mkexpr(res), condT);
6351         else
6352            putDRegI64(dreg, mkexpr(res), condT);
6353         return True;
6354      case 8:
6355         if (!U) {
6356            if (L == 1)
6357               return False;
6358            size++;
6359            dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6360            mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
6361            if (mreg & 1)
6362               return False;
6363            mreg >>= 1;
6364            if (!B) {
6365               /* VSHRN*/
6366               IROp narOp;
6367               reg_m = newTemp(Ity_V128);
6368               assign(reg_m, getQReg(mreg));
6369               res = newTemp(Ity_I64);
6370               switch (size) {
6371                  case 1:
6372                     op = Iop_ShrN16x8;
6373                     narOp = Iop_NarrowUn16to8x8;
6374                     break;
6375                  case 2:
6376                     op = Iop_ShrN32x4;
6377                     narOp = Iop_NarrowUn32to16x4;
6378                     break;
6379                  case 3:
6380                     op = Iop_ShrN64x2;
6381                     narOp = Iop_NarrowUn64to32x2;
6382                     break;
6383                  default:
6384                     vassert(0);
6385               }
6386               assign(res, unop(narOp,
6387                                binop(op,
6388                                      mkexpr(reg_m),
6389                                      mkU8(shift_imm))));
6390               putDRegI64(dreg, mkexpr(res), condT);
6391               DIP("vshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6392                   shift_imm);
6393               return True;
6394            } else {
6395               /* VRSHRN   */
6396               IROp addOp, shOp, narOp;
6397               IRExpr *imm_val;
6398               reg_m = newTemp(Ity_V128);
6399               assign(reg_m, getQReg(mreg));
6400               res = newTemp(Ity_I64);
6401               imm = 1L;
6402               switch (size) {
6403                  case 0: imm = (imm <<  8) | imm; /* fall through */
6404                  case 1: imm = (imm << 16) | imm; /* fall through */
6405                  case 2: imm = (imm << 32) | imm; /* fall through */
6406                  case 3: break;
6407                  default: vassert(0);
6408               }
6409               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6410               switch (size) {
6411                  case 1:
6412                     addOp = Iop_Add16x8;
6413                     shOp = Iop_ShrN16x8;
6414                     narOp = Iop_NarrowUn16to8x8;
6415                     break;
6416                  case 2:
6417                     addOp = Iop_Add32x4;
6418                     shOp = Iop_ShrN32x4;
6419                     narOp = Iop_NarrowUn32to16x4;
6420                     break;
6421                  case 3:
6422                     addOp = Iop_Add64x2;
6423                     shOp = Iop_ShrN64x2;
6424                     narOp = Iop_NarrowUn64to32x2;
6425                     break;
6426                  default:
6427                     vassert(0);
6428               }
6429               assign(res, unop(narOp,
6430                                binop(addOp,
6431                                      binop(shOp,
6432                                            mkexpr(reg_m),
6433                                            mkU8(shift_imm)),
6434                                      binop(Iop_AndV128,
6435                                            binop(shOp,
6436                                                  mkexpr(reg_m),
6437                                                  mkU8(shift_imm - 1)),
6438                                            imm_val))));
6439               putDRegI64(dreg, mkexpr(res), condT);
6440               if (shift_imm == 0) {
6441                  DIP("vmov%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6442                      shift_imm);
6443               } else {
6444                  DIP("vrshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6445                      shift_imm);
6446               }
6447               return True;
6448            }
6449         } else {
6450            /* fall through */
6451         }
6452      case 9:
6453         dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6454         mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
6455         if (mreg & 1)
6456            return False;
6457         mreg >>= 1;
6458         size++;
6459         if ((theInstr >> 8) & 1) {
6460            switch (size) {
6461               case 1:
6462                  op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6463                  cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
6464                  cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6465                  break;
6466               case 2:
6467                  op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6468                  cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
6469                  cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6470                  break;
6471               case 3:
6472                  op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6473                  cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
6474                  cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6475                  break;
6476               default:
6477                  vassert(0);
6478            }
6479            DIP("vq%sshrn.%c%u d%u, q%u, #%u\n", B ? "r" : "",
6480                U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
6481         } else {
6482            vassert(U);
6483            switch (size) {
6484               case 1:
6485                  op = Iop_SarN16x8;
6486                  cvt = Iop_QNarrowUn16Sto8Ux8;
6487                  cvt2 = Iop_Widen8Uto16x8;
6488                  break;
6489               case 2:
6490                  op = Iop_SarN32x4;
6491                  cvt = Iop_QNarrowUn32Sto16Ux4;
6492                  cvt2 = Iop_Widen16Uto32x4;
6493                  break;
6494               case 3:
6495                  op = Iop_SarN64x2;
6496                  cvt = Iop_QNarrowUn64Sto32Ux2;
6497                  cvt2 = Iop_Widen32Uto64x2;
6498                  break;
6499               default:
6500                  vassert(0);
6501            }
6502            DIP("vq%sshrun.s%u d%u, q%u, #%u\n", B ? "r" : "",
6503                8 << size, dreg, mreg, shift_imm);
6504         }
6505         if (B) {
6506            if (shift_imm > 0) {
6507               imm = 1;
6508               switch (size) {
6509                  case 1: imm = (imm << 16) | imm; /* fall through */
6510                  case 2: imm = (imm << 32) | imm; /* fall through */
6511                  case 3: break;
6512                  case 0: default: vassert(0);
6513               }
6514               switch (size) {
6515                  case 1: add = Iop_Add16x8; break;
6516                  case 2: add = Iop_Add32x4; break;
6517                  case 3: add = Iop_Add64x2; break;
6518                  case 0: default: vassert(0);
6519               }
6520            }
6521         }
6522         reg_m = newTemp(Ity_V128);
6523         res = newTemp(Ity_V128);
6524         assign(reg_m, getQReg(mreg));
6525         if (B) {
6526            /* VQRSHRN, VQRSHRUN */
6527            assign(res, binop(add,
6528                              binop(op, mkexpr(reg_m), mkU8(shift_imm)),
6529                              binop(Iop_AndV128,
6530                                    binop(op,
6531                                          mkexpr(reg_m),
6532                                          mkU8(shift_imm - 1)),
6533                                    mkU128(imm))));
6534         } else {
6535            /* VQSHRN, VQSHRUN */
6536            assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6537         }
6538         setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
6539                    True, condT);
6540         putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
6541         return True;
6542      case 10:
6543         /* VSHLL
6544            VMOVL ::= VSHLL #0 */
6545         if (B)
6546            return False;
6547         if (dreg & 1)
6548            return False;
6549         dreg >>= 1;
6550         shift_imm = (8 << size) - shift_imm;
6551         res = newTemp(Ity_V128);
6552         switch (size) {
6553            case 0:
6554               op = Iop_ShlN16x8;
6555               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6556               break;
6557            case 1:
6558               op = Iop_ShlN32x4;
6559               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6560               break;
6561            case 2:
6562               op = Iop_ShlN64x2;
6563               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6564               break;
6565            case 3:
6566               return False;
6567            default:
6568               vassert(0);
6569         }
6570         assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
6571         putQReg(dreg, mkexpr(res), condT);
6572         if (shift_imm == 0) {
6573            DIP("vmovl.%c%u q%u, d%u\n", U ? 'u' : 's', 8 << size,
6574                dreg, mreg);
6575         } else {
6576            DIP("vshll.%c%u q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
6577                dreg, mreg, shift_imm);
6578         }
6579         return True;
6580      case 14:
6581      case 15:
6582         /* VCVT floating-point <-> fixed-point */
6583         if ((theInstr >> 8) & 1) {
6584            if (U) {
6585               op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
6586            } else {
6587               op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
6588            }
6589            DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6590                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6591                64 - ((theInstr >> 16) & 0x3f));
6592         } else {
6593            if (U) {
6594               op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
6595            } else {
6596               op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
6597            }
6598            DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6599                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6600                64 - ((theInstr >> 16) & 0x3f));
6601         }
6602         if (((theInstr >> 21) & 1) == 0)
6603            return False;
6604         if (Q) {
6605            putQReg(dreg, binop(op, getQReg(mreg),
6606                     mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6607         } else {
6608            putDRegI64(dreg, binop(op, getDRegI64(mreg),
6609                       mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6610         }
6611         return True;
6612      default:
6613         return False;
6614
6615   }
6616   return False;
6617}
6618
6619/* A7.4.5 Two registers, miscellaneous */
6620static
6621Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
6622{
6623   UInt A = (theInstr >> 16) & 3;
6624   UInt B = (theInstr >> 6) & 0x1f;
6625   UInt Q = (theInstr >> 6) & 1;
6626   UInt U = (theInstr >> 24) & 1;
6627   UInt size = (theInstr >> 18) & 3;
6628   UInt dreg = get_neon_d_regno(theInstr);
6629   UInt mreg = get_neon_m_regno(theInstr);
6630   UInt F = (theInstr >> 10) & 1;
6631   IRTemp arg_d = IRTemp_INVALID;
6632   IRTemp arg_m = IRTemp_INVALID;
6633   IRTemp res = IRTemp_INVALID;
6634   switch (A) {
6635      case 0:
6636         if (Q) {
6637            arg_m = newTemp(Ity_V128);
6638            res = newTemp(Ity_V128);
6639            assign(arg_m, getQReg(mreg));
6640         } else {
6641            arg_m = newTemp(Ity_I64);
6642            res = newTemp(Ity_I64);
6643            assign(arg_m, getDRegI64(mreg));
6644         }
6645         switch (B >> 1) {
6646            case 0: {
6647               /* VREV64 */
6648               IROp op;
6649               switch (size) {
6650                  case 0:
6651                     op = Q ? Iop_Reverse8sIn64_x2 : Iop_Reverse8sIn64_x1;
6652                     break;
6653                  case 1:
6654                     op = Q ? Iop_Reverse16sIn64_x2 : Iop_Reverse16sIn64_x1;
6655                     break;
6656                  case 2:
6657                     op = Q ? Iop_Reverse32sIn64_x2 : Iop_Reverse32sIn64_x1;
6658                     break;
6659                  case 3:
6660                     return False;
6661                  default:
6662                     vassert(0);
6663               }
6664               assign(res, unop(op, mkexpr(arg_m)));
6665               DIP("vrev64.%u %c%u, %c%u\n", 8 << size,
6666                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6667               break;
6668            }
6669            case 1: {
6670               /* VREV32 */
6671               IROp op;
6672               switch (size) {
6673                  case 0:
6674                     op = Q ? Iop_Reverse8sIn32_x4 : Iop_Reverse8sIn32_x2;
6675                     break;
6676                  case 1:
6677                     op = Q ? Iop_Reverse16sIn32_x4 : Iop_Reverse16sIn32_x2;
6678                     break;
6679                  case 2:
6680                  case 3:
6681                     return False;
6682                  default:
6683                     vassert(0);
6684               }
6685               assign(res, unop(op, mkexpr(arg_m)));
6686               DIP("vrev32.%u %c%u, %c%u\n", 8 << size,
6687                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6688               break;
6689            }
6690            case 2: {
6691               /* VREV16 */
6692               IROp op;
6693               switch (size) {
6694                  case 0:
6695                     op = Q ? Iop_Reverse8sIn16_x8 : Iop_Reverse8sIn16_x4;
6696                     break;
6697                  case 1:
6698                  case 2:
6699                  case 3:
6700                     return False;
6701                  default:
6702                     vassert(0);
6703               }
6704               assign(res, unop(op, mkexpr(arg_m)));
6705               DIP("vrev16.%u %c%u, %c%u\n", 8 << size,
6706                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6707               break;
6708            }
6709            case 3:
6710               return False;
6711            case 4:
6712            case 5: {
6713               /* VPADDL */
6714               IROp op;
6715               U = (theInstr >> 7) & 1;
6716               if (Q) {
6717                  switch (size) {
6718                     case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
6719                     case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
6720                     case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
6721                     case 3: return False;
6722                     default: vassert(0);
6723                  }
6724               } else {
6725                  switch (size) {
6726                     case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
6727                     case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
6728                     case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
6729                     case 3: return False;
6730                     default: vassert(0);
6731                  }
6732               }
6733               assign(res, unop(op, mkexpr(arg_m)));
6734               DIP("vpaddl.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6735                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6736               break;
6737            }
6738            case 6:
6739            case 7:
6740               return False;
6741            case 8: {
6742               /* VCLS */
6743               IROp op;
6744               switch (size) {
6745                  case 0: op = Q ? Iop_Cls8x16 : Iop_Cls8x8; break;
6746                  case 1: op = Q ? Iop_Cls16x8 : Iop_Cls16x4; break;
6747                  case 2: op = Q ? Iop_Cls32x4 : Iop_Cls32x2; break;
6748                  case 3: return False;
6749                  default: vassert(0);
6750               }
6751               assign(res, unop(op, mkexpr(arg_m)));
6752               DIP("vcls.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6753                   Q ? 'q' : 'd', mreg);
6754               break;
6755            }
6756            case 9: {
6757               /* VCLZ */
6758               IROp op;
6759               switch (size) {
6760                  case 0: op = Q ? Iop_Clz8x16 : Iop_Clz8x8; break;
6761                  case 1: op = Q ? Iop_Clz16x8 : Iop_Clz16x4; break;
6762                  case 2: op = Q ? Iop_Clz32x4 : Iop_Clz32x2; break;
6763                  case 3: return False;
6764                  default: vassert(0);
6765               }
6766               assign(res, unop(op, mkexpr(arg_m)));
6767               DIP("vclz.i%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6768                   Q ? 'q' : 'd', mreg);
6769               break;
6770            }
6771            case 10:
6772               /* VCNT */
6773               assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
6774               DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6775                   mreg);
6776               break;
6777            case 11:
6778               /* VMVN */
6779               if (Q)
6780                  assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
6781               else
6782                  assign(res, unop(Iop_Not64, mkexpr(arg_m)));
6783               DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6784                   mreg);
6785               break;
6786            case 12:
6787            case 13: {
6788               /* VPADAL */
6789               IROp op, add_op;
6790               U = (theInstr >> 7) & 1;
6791               if (Q) {
6792                  switch (size) {
6793                     case 0:
6794                        op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
6795                        add_op = Iop_Add16x8;
6796                        break;
6797                     case 1:
6798                        op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
6799                        add_op = Iop_Add32x4;
6800                        break;
6801                     case 2:
6802                        op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
6803                        add_op = Iop_Add64x2;
6804                        break;
6805                     case 3:
6806                        return False;
6807                     default:
6808                        vassert(0);
6809                  }
6810               } else {
6811                  switch (size) {
6812                     case 0:
6813                        op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
6814                        add_op = Iop_Add16x4;
6815                        break;
6816                     case 1:
6817                        op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
6818                        add_op = Iop_Add32x2;
6819                        break;
6820                     case 2:
6821                        op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
6822                        add_op = Iop_Add64;
6823                        break;
6824                     case 3:
6825                        return False;
6826                     default:
6827                        vassert(0);
6828                  }
6829               }
6830               if (Q) {
6831                  arg_d = newTemp(Ity_V128);
6832                  assign(arg_d, getQReg(dreg));
6833               } else {
6834                  arg_d = newTemp(Ity_I64);
6835                  assign(arg_d, getDRegI64(dreg));
6836               }
6837               assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
6838                                         mkexpr(arg_d)));
6839               DIP("vpadal.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6840                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6841               break;
6842            }
6843            case 14: {
6844               /* VQABS */
6845               IROp op_sub, op_qsub, op_cmp;
6846               IRTemp mask, tmp;
6847               IRExpr *zero1, *zero2;
6848               IRExpr *neg, *neg2;
6849               if (Q) {
6850                  zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6851                  zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6852                  mask = newTemp(Ity_V128);
6853                  tmp = newTemp(Ity_V128);
6854               } else {
6855                  zero1 = mkU64(0);
6856                  zero2 = mkU64(0);
6857                  mask = newTemp(Ity_I64);
6858                  tmp = newTemp(Ity_I64);
6859               }
6860               switch (size) {
6861                  case 0:
6862                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6863                     op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6864                     op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
6865                     break;
6866                  case 1:
6867                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6868                     op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6869                     op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
6870                     break;
6871                  case 2:
6872                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6873                     op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6874                     op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
6875                     break;
6876                  case 3:
6877                     return False;
6878                  default:
6879                     vassert(0);
6880               }
6881               assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
6882               neg = binop(op_qsub, zero2, mkexpr(arg_m));
6883               neg2 = binop(op_sub, zero2, mkexpr(arg_m));
6884               assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
6885                                 binop(Q ? Iop_AndV128 : Iop_And64,
6886                                       mkexpr(mask),
6887                                       mkexpr(arg_m)),
6888                                 binop(Q ? Iop_AndV128 : Iop_And64,
6889                                       unop(Q ? Iop_NotV128 : Iop_Not64,
6890                                            mkexpr(mask)),
6891                                       neg)));
6892               assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
6893                                 binop(Q ? Iop_AndV128 : Iop_And64,
6894                                       mkexpr(mask),
6895                                       mkexpr(arg_m)),
6896                                 binop(Q ? Iop_AndV128 : Iop_And64,
6897                                       unop(Q ? Iop_NotV128 : Iop_Not64,
6898                                            mkexpr(mask)),
6899                                       neg2)));
6900               setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
6901               DIP("vqabs.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6902                   Q ? 'q' : 'd', mreg);
6903               break;
6904            }
6905            case 15: {
6906               /* VQNEG */
6907               IROp op, op2;
6908               IRExpr *zero;
6909               if (Q) {
6910                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6911               } else {
6912                  zero = mkU64(0);
6913               }
6914               switch (size) {
6915                  case 0:
6916                     op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6917                     op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6918                     break;
6919                  case 1:
6920                     op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6921                     op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6922                     break;
6923                  case 2:
6924                     op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6925                     op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6926                     break;
6927                  case 3:
6928                     return False;
6929                  default:
6930                     vassert(0);
6931               }
6932               assign(res, binop(op, zero, mkexpr(arg_m)));
6933               setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
6934                          Q, condT);
6935               DIP("vqneg.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6936                   Q ? 'q' : 'd', mreg);
6937               break;
6938            }
6939            default:
6940               vassert(0);
6941         }
6942         if (Q) {
6943            putQReg(dreg, mkexpr(res), condT);
6944         } else {
6945            putDRegI64(dreg, mkexpr(res), condT);
6946         }
6947         return True;
6948      case 1:
6949         if (Q) {
6950            arg_m = newTemp(Ity_V128);
6951            res = newTemp(Ity_V128);
6952            assign(arg_m, getQReg(mreg));
6953         } else {
6954            arg_m = newTemp(Ity_I64);
6955            res = newTemp(Ity_I64);
6956            assign(arg_m, getDRegI64(mreg));
6957         }
6958         switch ((B >> 1) & 0x7) {
6959            case 0: {
6960               /* VCGT #0 */
6961               IRExpr *zero;
6962               IROp op;
6963               if (Q) {
6964                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6965               } else {
6966                  zero = mkU64(0);
6967               }
6968               if (F) {
6969                  switch (size) {
6970                     case 0: case 1: case 3: return False;
6971                     case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
6972                     default: vassert(0);
6973                  }
6974               } else {
6975                  switch (size) {
6976                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
6977                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
6978                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
6979                     case 3: return False;
6980                     default: vassert(0);
6981                  }
6982               }
6983               assign(res, binop(op, mkexpr(arg_m), zero));
6984               DIP("vcgt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
6985                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6986               break;
6987            }
6988            case 1: {
6989               /* VCGE #0 */
6990               IROp op;
6991               IRExpr *zero;
6992               if (Q) {
6993                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6994               } else {
6995                  zero = mkU64(0);
6996               }
6997               if (F) {
6998                  switch (size) {
6999                     case 0: case 1: case 3: return False;
7000                     case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7001                     default: vassert(0);
7002                  }
7003                  assign(res, binop(op, mkexpr(arg_m), zero));
7004               } else {
7005                  switch (size) {
7006                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7007                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7008                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7009                     case 3: return False;
7010                     default: vassert(0);
7011                  }
7012                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7013                                   binop(op, zero, mkexpr(arg_m))));
7014               }
7015               DIP("vcge.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7016                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7017               break;
7018            }
7019            case 2: {
7020               /* VCEQ #0 */
7021               IROp op;
7022               IRExpr *zero;
7023               if (F) {
7024                  if (Q) {
7025                     zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7026                  } else {
7027                     zero = mkU64(0);
7028                  }
7029                  switch (size) {
7030                     case 0: case 1: case 3: return False;
7031                     case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
7032                     default: vassert(0);
7033                  }
7034                  assign(res, binop(op, zero, mkexpr(arg_m)));
7035               } else {
7036                  switch (size) {
7037                     case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
7038                     case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
7039                     case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
7040                     case 3: return False;
7041                     default: vassert(0);
7042                  }
7043                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7044                                   unop(op, mkexpr(arg_m))));
7045               }
7046               DIP("vceq.%c%u %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
7047                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7048               break;
7049            }
7050            case 3: {
7051               /* VCLE #0 */
7052               IRExpr *zero;
7053               IROp op;
7054               if (Q) {
7055                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7056               } else {
7057                  zero = mkU64(0);
7058               }
7059               if (F) {
7060                  switch (size) {
7061                     case 0: case 1: case 3: return False;
7062                     case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7063                     default: vassert(0);
7064                  }
7065                  assign(res, binop(op, zero, mkexpr(arg_m)));
7066               } else {
7067                  switch (size) {
7068                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7069                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7070                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7071                     case 3: return False;
7072                     default: vassert(0);
7073                  }
7074                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7075                                   binop(op, mkexpr(arg_m), zero)));
7076               }
7077               DIP("vcle.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7078                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7079               break;
7080            }
7081            case 4: {
7082               /* VCLT #0 */
7083               IROp op;
7084               IRExpr *zero;
7085               if (Q) {
7086                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7087               } else {
7088                  zero = mkU64(0);
7089               }
7090               if (F) {
7091                  switch (size) {
7092                     case 0: case 1: case 3: return False;
7093                     case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7094                     default: vassert(0);
7095                  }
7096                  assign(res, binop(op, zero, mkexpr(arg_m)));
7097               } else {
7098                  switch (size) {
7099                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7100                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7101                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7102                     case 3: return False;
7103                     default: vassert(0);
7104                  }
7105                  assign(res, binop(op, zero, mkexpr(arg_m)));
7106               }
7107               DIP("vclt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7108                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7109               break;
7110            }
7111            case 5:
7112               return False;
7113            case 6: {
7114               /* VABS */
7115               if (!F) {
7116                  IROp op;
7117                  switch(size) {
7118                     case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
7119                     case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
7120                     case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
7121                     case 3: return False;
7122                     default: vassert(0);
7123                  }
7124                  assign(res, unop(op, mkexpr(arg_m)));
7125               } else {
7126                  assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
7127                                   mkexpr(arg_m)));
7128               }
7129               DIP("vabs.%c%u %c%u, %c%u\n",
7130                   F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7131                   Q ? 'q' : 'd', mreg);
7132               break;
7133            }
7134            case 7: {
7135               /* VNEG */
7136               IROp op;
7137               IRExpr *zero;
7138               if (F) {
7139                  switch (size) {
7140                     case 0: case 1: case 3: return False;
7141                     case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
7142                     default: vassert(0);
7143                  }
7144                  assign(res, unop(op, mkexpr(arg_m)));
7145               } else {
7146                  if (Q) {
7147                     zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7148                  } else {
7149                     zero = mkU64(0);
7150                  }
7151                  switch (size) {
7152                     case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
7153                     case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
7154                     case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
7155                     case 3: return False;
7156                     default: vassert(0);
7157                  }
7158                  assign(res, binop(op, zero, mkexpr(arg_m)));
7159               }
7160               DIP("vneg.%c%u %c%u, %c%u\n",
7161                   F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7162                   Q ? 'q' : 'd', mreg);
7163               break;
7164            }
7165            default:
7166               vassert(0);
7167         }
7168         if (Q) {
7169            putQReg(dreg, mkexpr(res), condT);
7170         } else {
7171            putDRegI64(dreg, mkexpr(res), condT);
7172         }
7173         return True;
7174      case 2:
7175         if ((B >> 1) == 0) {
7176            /* VSWP */
7177            if (Q) {
7178               arg_m = newTemp(Ity_V128);
7179               assign(arg_m, getQReg(mreg));
7180               putQReg(mreg, getQReg(dreg), condT);
7181               putQReg(dreg, mkexpr(arg_m), condT);
7182            } else {
7183               arg_m = newTemp(Ity_I64);
7184               assign(arg_m, getDRegI64(mreg));
7185               putDRegI64(mreg, getDRegI64(dreg), condT);
7186               putDRegI64(dreg, mkexpr(arg_m), condT);
7187            }
7188            DIP("vswp %c%u, %c%u\n",
7189                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7190            return True;
7191         } else if ((B >> 1) == 1) {
7192            /* VTRN */
7193            IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
7194            IRTemp old_m, old_d, new_d, new_m;
7195            if (Q) {
7196               old_m = newTemp(Ity_V128);
7197               old_d = newTemp(Ity_V128);
7198               new_m = newTemp(Ity_V128);
7199               new_d = newTemp(Ity_V128);
7200               assign(old_m, getQReg(mreg));
7201               assign(old_d, getQReg(dreg));
7202            } else {
7203               old_m = newTemp(Ity_I64);
7204               old_d = newTemp(Ity_I64);
7205               new_m = newTemp(Ity_I64);
7206               new_d = newTemp(Ity_I64);
7207               assign(old_m, getDRegI64(mreg));
7208               assign(old_d, getDRegI64(dreg));
7209            }
7210            if (Q) {
7211               switch (size) {
7212                  case 0:
7213                     op_odd  = Iop_InterleaveOddLanes8x16;
7214                     op_even = Iop_InterleaveEvenLanes8x16;
7215                     break;
7216                  case 1:
7217                     op_odd  = Iop_InterleaveOddLanes16x8;
7218                     op_even = Iop_InterleaveEvenLanes16x8;
7219                     break;
7220                  case 2:
7221                     op_odd  = Iop_InterleaveOddLanes32x4;
7222                     op_even = Iop_InterleaveEvenLanes32x4;
7223                     break;
7224                  case 3:
7225                     return False;
7226                  default:
7227                     vassert(0);
7228               }
7229            } else {
7230               switch (size) {
7231                  case 0:
7232                     op_odd  = Iop_InterleaveOddLanes8x8;
7233                     op_even = Iop_InterleaveEvenLanes8x8;
7234                     break;
7235                  case 1:
7236                     op_odd  = Iop_InterleaveOddLanes16x4;
7237                     op_even = Iop_InterleaveEvenLanes16x4;
7238                     break;
7239                  case 2:
7240                     op_odd  = Iop_InterleaveHI32x2;
7241                     op_even = Iop_InterleaveLO32x2;
7242                     break;
7243                  case 3:
7244                     return False;
7245                  default:
7246                     vassert(0);
7247               }
7248            }
7249            assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7250            assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
7251            if (Q) {
7252               putQReg(dreg, mkexpr(new_d), condT);
7253               putQReg(mreg, mkexpr(new_m), condT);
7254            } else {
7255               putDRegI64(dreg, mkexpr(new_d), condT);
7256               putDRegI64(mreg, mkexpr(new_m), condT);
7257            }
7258            DIP("vtrn.%u %c%u, %c%u\n",
7259                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7260            return True;
7261         } else if ((B >> 1) == 2) {
7262            /* VUZP */
7263            IROp op_even, op_odd;
7264            IRTemp old_m, old_d, new_m, new_d;
7265            if (!Q && size == 2)
7266               return False;
7267            if (Q) {
7268               old_m = newTemp(Ity_V128);
7269               old_d = newTemp(Ity_V128);
7270               new_m = newTemp(Ity_V128);
7271               new_d = newTemp(Ity_V128);
7272               assign(old_m, getQReg(mreg));
7273               assign(old_d, getQReg(dreg));
7274            } else {
7275               old_m = newTemp(Ity_I64);
7276               old_d = newTemp(Ity_I64);
7277               new_m = newTemp(Ity_I64);
7278               new_d = newTemp(Ity_I64);
7279               assign(old_m, getDRegI64(mreg));
7280               assign(old_d, getDRegI64(dreg));
7281            }
7282            switch (size) {
7283               case 0:
7284                  op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
7285                  op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
7286                  break;
7287               case 1:
7288                  op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
7289                  op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
7290                  break;
7291               case 2:
7292                  op_odd  = Iop_CatOddLanes32x4;
7293                  op_even = Iop_CatEvenLanes32x4;
7294                  break;
7295               case 3:
7296                  return False;
7297               default:
7298                  vassert(0);
7299            }
7300            assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7301            assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
7302            if (Q) {
7303               putQReg(dreg, mkexpr(new_d), condT);
7304               putQReg(mreg, mkexpr(new_m), condT);
7305            } else {
7306               putDRegI64(dreg, mkexpr(new_d), condT);
7307               putDRegI64(mreg, mkexpr(new_m), condT);
7308            }
7309            DIP("vuzp.%u %c%u, %c%u\n",
7310                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7311            return True;
7312         } else if ((B >> 1) == 3) {
7313            /* VZIP */
7314            IROp op_lo, op_hi;
7315            IRTemp old_m, old_d, new_m, new_d;
7316            if (!Q && size == 2)
7317               return False;
7318            if (Q) {
7319               old_m = newTemp(Ity_V128);
7320               old_d = newTemp(Ity_V128);
7321               new_m = newTemp(Ity_V128);
7322               new_d = newTemp(Ity_V128);
7323               assign(old_m, getQReg(mreg));
7324               assign(old_d, getQReg(dreg));
7325            } else {
7326               old_m = newTemp(Ity_I64);
7327               old_d = newTemp(Ity_I64);
7328               new_m = newTemp(Ity_I64);
7329               new_d = newTemp(Ity_I64);
7330               assign(old_m, getDRegI64(mreg));
7331               assign(old_d, getDRegI64(dreg));
7332            }
7333            switch (size) {
7334               case 0:
7335                  op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
7336                  op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
7337                  break;
7338               case 1:
7339                  op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
7340                  op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
7341                  break;
7342               case 2:
7343                  op_hi = Iop_InterleaveHI32x4;
7344                  op_lo = Iop_InterleaveLO32x4;
7345                  break;
7346               case 3:
7347                  return False;
7348               default:
7349                  vassert(0);
7350            }
7351            assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
7352            assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
7353            if (Q) {
7354               putQReg(dreg, mkexpr(new_d), condT);
7355               putQReg(mreg, mkexpr(new_m), condT);
7356            } else {
7357               putDRegI64(dreg, mkexpr(new_d), condT);
7358               putDRegI64(mreg, mkexpr(new_m), condT);
7359            }
7360            DIP("vzip.%u %c%u, %c%u\n",
7361                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7362            return True;
7363         } else if (B == 8) {
7364            /* VMOVN */
7365            IROp op;
7366            mreg >>= 1;
7367            switch (size) {
7368               case 0: op = Iop_NarrowUn16to8x8;  break;
7369               case 1: op = Iop_NarrowUn32to16x4; break;
7370               case 2: op = Iop_NarrowUn64to32x2; break;
7371               case 3: return False;
7372               default: vassert(0);
7373            }
7374            putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
7375            DIP("vmovn.i%u d%u, q%u\n", 16 << size, dreg, mreg);
7376            return True;
7377         } else if (B == 9 || (B >> 1) == 5) {
7378            /* VQMOVN, VQMOVUN */
7379            IROp op, op2;
7380            IRTemp tmp;
7381            dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
7382            mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
7383            if (mreg & 1)
7384               return False;
7385            mreg >>= 1;
7386            switch (size) {
7387               case 0: op2 = Iop_NarrowUn16to8x8;  break;
7388               case 1: op2 = Iop_NarrowUn32to16x4; break;
7389               case 2: op2 = Iop_NarrowUn64to32x2; break;
7390               case 3: return False;
7391               default: vassert(0);
7392            }
7393            switch (B & 3) {
7394               case 0:
7395                  vassert(0);
7396               case 1:
7397                  switch (size) {
7398                     case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
7399                     case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
7400                     case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
7401                     case 3: return False;
7402                     default: vassert(0);
7403                  }
7404                  DIP("vqmovun.s%u d%u, q%u\n", 16 << size, dreg, mreg);
7405                  break;
7406               case 2:
7407                  switch (size) {
7408                     case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
7409                     case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
7410                     case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
7411                     case 3: return False;
7412                     default: vassert(0);
7413                  }
7414                  DIP("vqmovn.s%u d%u, q%u\n", 16 << size, dreg, mreg);
7415                  break;
7416               case 3:
7417                  switch (size) {
7418                     case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
7419                     case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
7420                     case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
7421                     case 3: return False;
7422                     default: vassert(0);
7423                  }
7424                  DIP("vqmovn.u%u d%u, q%u\n", 16 << size, dreg, mreg);
7425                  break;
7426               default:
7427                  vassert(0);
7428            }
7429            res = newTemp(Ity_I64);
7430            tmp = newTemp(Ity_I64);
7431            assign(res, unop(op, getQReg(mreg)));
7432            assign(tmp, unop(op2, getQReg(mreg)));
7433            setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
7434            putDRegI64(dreg, mkexpr(res), condT);
7435            return True;
7436         } else if (B == 12) {
7437            /* VSHLL (maximum shift) */
7438            IROp op, cvt;
7439            UInt shift_imm;
7440            if (Q)
7441               return False;
7442            if (dreg & 1)
7443               return False;
7444            dreg >>= 1;
7445            shift_imm = 8 << size;
7446            res = newTemp(Ity_V128);
7447            switch (size) {
7448               case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
7449               case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
7450               case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
7451               case 3: return False;
7452               default: vassert(0);
7453            }
7454            assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
7455                                  mkU8(shift_imm)));
7456            putQReg(dreg, mkexpr(res), condT);
7457            DIP("vshll.i%u q%u, d%u, #%u\n", 8 << size, dreg, mreg, 8 << size);
7458            return True;
7459         } else if ((B >> 3) == 3 && (B & 3) == 0) {
7460            /* VCVT (half<->single) */
7461            /* Half-precision extensions are needed to run this */
7462            vassert(0); // ATC
7463            if (((theInstr >> 18) & 3) != 1)
7464               return False;
7465            if ((theInstr >> 8) & 1) {
7466               if (dreg & 1)
7467                  return False;
7468               dreg >>= 1;
7469               putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
7470                     condT);
7471               DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
7472            } else {
7473               if (mreg & 1)
7474                  return False;
7475               mreg >>= 1;
7476               putDRegI64(dreg, unop(Iop_F32toF16x4, getQReg(mreg)),
7477                                condT);
7478               DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
7479            }
7480            return True;
7481         } else {
7482            return False;
7483         }
7484         vassert(0);
7485         return True;
7486      case 3:
7487         if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
7488            /* VRECPE */
7489            IROp op;
7490            F = (theInstr >> 8) & 1;
7491            if (size != 2)
7492               return False;
7493            if (Q) {
7494               op = F ? Iop_RecipEst32Fx4 : Iop_RecipEst32Ux4;
7495               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7496               DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7497            } else {
7498               op = F ? Iop_RecipEst32Fx2 : Iop_RecipEst32Ux2;
7499               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7500               DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7501            }
7502            return True;
7503         } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
7504            /* VRSQRTE */
7505            IROp op;
7506            F = (B >> 2) & 1;
7507            if (size != 2)
7508               return False;
7509            if (F) {
7510               /* fp */
7511               op = Q ? Iop_RSqrtEst32Fx4 : Iop_RSqrtEst32Fx2;
7512            } else {
7513               /* unsigned int */
7514               op = Q ? Iop_RSqrtEst32Ux4 : Iop_RSqrtEst32Ux2;
7515            }
7516            if (Q) {
7517               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7518               DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7519            } else {
7520               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7521               DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7522            }
7523            return True;
7524         } else if ((B >> 3) == 3) {
7525            /* VCVT (fp<->integer) */
7526            IROp op;
7527            if (size != 2)
7528               return False;
7529            switch ((B >> 1) & 3) {
7530               case 0:
7531                  op = Q ? Iop_I32StoFx4 : Iop_I32StoFx2;
7532                  DIP("vcvt.f32.s32 %c%u, %c%u\n",
7533                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7534                  break;
7535               case 1:
7536                  op = Q ? Iop_I32UtoFx4 : Iop_I32UtoFx2;
7537                  DIP("vcvt.f32.u32 %c%u, %c%u\n",
7538                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7539                  break;
7540               case 2:
7541                  op = Q ? Iop_FtoI32Sx4_RZ : Iop_FtoI32Sx2_RZ;
7542                  DIP("vcvt.s32.f32 %c%u, %c%u\n",
7543                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7544                  break;
7545               case 3:
7546                  op = Q ? Iop_FtoI32Ux4_RZ : Iop_FtoI32Ux2_RZ;
7547                  DIP("vcvt.u32.f32 %c%u, %c%u\n",
7548                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7549                  break;
7550               default:
7551                  vassert(0);
7552            }
7553            if (Q) {
7554               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7555            } else {
7556               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7557            }
7558            return True;
7559         } else {
7560            return False;
7561         }
7562         vassert(0);
7563         return True;
7564      default:
7565         vassert(0);
7566   }
7567   return False;
7568}
7569
7570/* A7.4.6 One register and a modified immediate value */
7571static
7572void ppNeonImm(UInt imm, UInt cmode, UInt op)
7573{
7574   int i;
7575   switch (cmode) {
7576      case 0: case 1: case 8: case 9:
7577         vex_printf("0x%x", imm);
7578         break;
7579      case 2: case 3: case 10: case 11:
7580         vex_printf("0x%x00", imm);
7581         break;
7582      case 4: case 5:
7583         vex_printf("0x%x0000", imm);
7584         break;
7585      case 6: case 7:
7586         vex_printf("0x%x000000", imm);
7587         break;
7588      case 12:
7589         vex_printf("0x%xff", imm);
7590         break;
7591      case 13:
7592         vex_printf("0x%xffff", imm);
7593         break;
7594      case 14:
7595         if (op) {
7596            vex_printf("0x");
7597            for (i = 7; i >= 0; i--)
7598               vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
7599         } else {
7600            vex_printf("0x%x", imm);
7601         }
7602         break;
7603      case 15:
7604         vex_printf("0x%x", imm);
7605         break;
7606   }
7607}
7608
7609static
7610const char *ppNeonImmType(UInt cmode, UInt op)
7611{
7612   switch (cmode) {
7613      case 0 ... 7:
7614      case 12: case 13:
7615         return "i32";
7616      case 8 ... 11:
7617         return "i16";
7618      case 14:
7619         if (op)
7620            return "i64";
7621         else
7622            return "i8";
7623      case 15:
7624         if (op)
7625            vassert(0);
7626         else
7627            return "f32";
7628      default:
7629         vassert(0);
7630   }
7631}
7632
7633static
7634void DIPimm(UInt imm, UInt cmode, UInt op,
7635            const char *instr, UInt Q, UInt dreg)
7636{
7637   if (vex_traceflags & VEX_TRACE_FE) {
7638      vex_printf("%s.%s %c%u, #", instr,
7639                 ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
7640      ppNeonImm(imm, cmode, op);
7641      vex_printf("\n");
7642   }
7643}
7644
7645static
7646Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
7647{
7648   UInt dreg = get_neon_d_regno(theInstr);
7649   ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
7650                  (theInstr & 0xf);
7651   ULong imm_raw_pp = imm_raw;
7652   UInt cmode = (theInstr >> 8) & 0xf;
7653   UInt op_bit = (theInstr >> 5) & 1;
7654   ULong imm = 0;
7655   UInt Q = (theInstr >> 6) & 1;
7656   int i, j;
7657   UInt tmp;
7658   IRExpr *imm_val;
7659   IRExpr *expr;
7660   IRTemp tmp_var;
7661   switch(cmode) {
7662      case 7: case 6:
7663         imm_raw = imm_raw << 8;
7664         /* fallthrough */
7665      case 5: case 4:
7666         imm_raw = imm_raw << 8;
7667         /* fallthrough */
7668      case 3: case 2:
7669         imm_raw = imm_raw << 8;
7670         /* fallthrough */
7671      case 0: case 1:
7672         imm = (imm_raw << 32) | imm_raw;
7673         break;
7674      case 11: case 10:
7675         imm_raw = imm_raw << 8;
7676         /* fallthrough */
7677      case 9: case 8:
7678         imm_raw = (imm_raw << 16) | imm_raw;
7679         imm = (imm_raw << 32) | imm_raw;
7680         break;
7681      case 13:
7682         imm_raw = (imm_raw << 8) | 0xff;
7683         /* fallthrough */
7684      case 12:
7685         imm_raw = (imm_raw << 8) | 0xff;
7686         imm = (imm_raw << 32) | imm_raw;
7687         break;
7688      case 14:
7689         if (! op_bit) {
7690            for(i = 0; i < 8; i++) {
7691               imm = (imm << 8) | imm_raw;
7692            }
7693         } else {
7694            for(i = 7; i >= 0; i--) {
7695               tmp = 0;
7696               for(j = 0; j < 8; j++) {
7697                  tmp = (tmp << 1) | ((imm_raw >> i) & 1);
7698               }
7699               imm = (imm << 8) | tmp;
7700            }
7701         }
7702         break;
7703      case 15:
7704         imm = (imm_raw & 0x80) << 5;
7705         imm |= ((~imm_raw & 0x40) << 5);
7706         for(i = 1; i <= 4; i++)
7707            imm |= (imm_raw & 0x40) << i;
7708         imm |= (imm_raw & 0x7f);
7709         imm = imm << 19;
7710         imm = (imm << 32) | imm;
7711         break;
7712      default:
7713         return False;
7714   }
7715   if (Q) {
7716      imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
7717   } else {
7718      imm_val = mkU64(imm);
7719   }
7720   if (((op_bit == 0) &&
7721      (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
7722      ((op_bit == 1) && (cmode == 14))) {
7723      /* VMOV (immediate) */
7724      if (Q) {
7725         putQReg(dreg, imm_val, condT);
7726      } else {
7727         putDRegI64(dreg, imm_val, condT);
7728      }
7729      DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
7730      return True;
7731   }
7732   if ((op_bit == 1) &&
7733      (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
7734      /* VMVN (immediate) */
7735      if (Q) {
7736         putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
7737      } else {
7738         putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
7739      }
7740      DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
7741      return True;
7742   }
7743   if (Q) {
7744      tmp_var = newTemp(Ity_V128);
7745      assign(tmp_var, getQReg(dreg));
7746   } else {
7747      tmp_var = newTemp(Ity_I64);
7748      assign(tmp_var, getDRegI64(dreg));
7749   }
7750   if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7751      /* VORR (immediate) */
7752      if (Q)
7753         expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
7754      else
7755         expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
7756      DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
7757   } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7758      /* VBIC (immediate) */
7759      if (Q)
7760         expr = binop(Iop_AndV128, mkexpr(tmp_var),
7761                                   unop(Iop_NotV128, imm_val));
7762      else
7763         expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
7764      DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
7765   } else {
7766      return False;
7767   }
7768   if (Q)
7769      putQReg(dreg, expr, condT);
7770   else
7771      putDRegI64(dreg, expr, condT);
7772   return True;
7773}
7774
7775/* A7.4 Advanced SIMD data-processing instructions */
7776static
7777Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
7778{
7779   UInt A = (theInstr >> 19) & 0x1F;
7780   UInt B = (theInstr >>  8) & 0xF;
7781   UInt C = (theInstr >>  4) & 0xF;
7782   UInt U = (theInstr >> 24) & 0x1;
7783
7784   if (! (A & 0x10)) {
7785      return dis_neon_data_3same(theInstr, condT);
7786   }
7787   if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
7788      return dis_neon_data_1reg_and_imm(theInstr, condT);
7789   }
7790   if ((C & 1) == 1) {
7791      return dis_neon_data_2reg_and_shift(theInstr, condT);
7792   }
7793   if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7794      return dis_neon_data_3diff(theInstr, condT);
7795   }
7796   if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7797      return dis_neon_data_2reg_and_scalar(theInstr, condT);
7798   }
7799   if ((A & 0x16) == 0x16) {
7800      if ((U == 0) && ((C & 1) == 0)) {
7801         return dis_neon_vext(theInstr, condT);
7802      }
7803      if ((U != 1) || ((C & 1) == 1))
7804         return False;
7805      if ((B & 8) == 0) {
7806         return dis_neon_data_2reg_misc(theInstr, condT);
7807      }
7808      if ((B & 12) == 8) {
7809         return dis_neon_vtb(theInstr, condT);
7810      }
7811      if ((B == 12) && ((C & 9) == 0)) {
7812         return dis_neon_vdup(theInstr, condT);
7813      }
7814   }
7815   return False;
7816}
7817
7818
7819/*------------------------------------------------------------*/
7820/*--- NEON loads and stores                                ---*/
7821/*------------------------------------------------------------*/
7822
7823/* For NEON memory operations, we use the standard scheme to handle
7824   conditionalisation: generate a jump around the instruction if the
7825   condition is false.  That's only necessary in Thumb mode, however,
7826   since in ARM mode NEON instructions are unconditional. */
7827
7828/* A helper function for what follows.  It assumes we already went
7829   uncond as per comments at the top of this section. */
7830static
7831void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
7832                                    UInt N, UInt size, IRTemp addr )
7833{
7834   UInt i;
7835   switch (size) {
7836      case 0:
7837         putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
7838                    loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
7839         break;
7840      case 1:
7841         putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
7842                    loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
7843         break;
7844      case 2:
7845         putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
7846                    loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
7847         break;
7848      default:
7849         vassert(0);
7850   }
7851   for (i = 1; i <= N; i++) {
7852      switch (size) {
7853         case 0:
7854            putDRegI64(rD + i * inc,
7855                       triop(Iop_SetElem8x8,
7856                             getDRegI64(rD + i * inc),
7857                             mkU8(index),
7858                             loadLE(Ity_I8, binop(Iop_Add32,
7859                                                  mkexpr(addr),
7860                                                  mkU32(i * 1)))),
7861                       IRTemp_INVALID);
7862            break;
7863         case 1:
7864            putDRegI64(rD + i * inc,
7865                       triop(Iop_SetElem16x4,
7866                             getDRegI64(rD + i * inc),
7867                             mkU8(index),
7868                             loadLE(Ity_I16, binop(Iop_Add32,
7869                                                   mkexpr(addr),
7870                                                   mkU32(i * 2)))),
7871                       IRTemp_INVALID);
7872            break;
7873         case 2:
7874            putDRegI64(rD + i * inc,
7875                       triop(Iop_SetElem32x2,
7876                             getDRegI64(rD + i * inc),
7877                             mkU8(index),
7878                             loadLE(Ity_I32, binop(Iop_Add32,
7879                                                   mkexpr(addr),
7880                                                   mkU32(i * 4)))),
7881                       IRTemp_INVALID);
7882            break;
7883         default:
7884            vassert(0);
7885      }
7886   }
7887}
7888
7889/* A(nother) helper function for what follows.  It assumes we already
7890   went uncond as per comments at the top of this section. */
7891static
7892void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
7893                                       UInt N, UInt size, IRTemp addr )
7894{
7895   UInt i;
7896   switch (size) {
7897      case 0:
7898         storeLE(mkexpr(addr),
7899                 binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
7900         break;
7901      case 1:
7902         storeLE(mkexpr(addr),
7903                 binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
7904         break;
7905      case 2:
7906         storeLE(mkexpr(addr),
7907                 binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
7908         break;
7909      default:
7910         vassert(0);
7911   }
7912   for (i = 1; i <= N; i++) {
7913      switch (size) {
7914         case 0:
7915            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
7916                    binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
7917                                          mkU8(index)));
7918            break;
7919         case 1:
7920            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
7921                    binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
7922                                           mkU8(index)));
7923            break;
7924         case 2:
7925            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
7926                    binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
7927                                           mkU8(index)));
7928            break;
7929         default:
7930            vassert(0);
7931      }
7932   }
7933}
7934
7935/* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
7936   make *u0 and *u1 be valid IRTemps before the call. */
7937static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
7938                                 IRTemp i0, IRTemp i1, Int laneszB)
7939{
7940   /* The following assumes that the guest is little endian, and hence
7941      that the memory-side (interleaved) data is stored
7942      little-endianly. */
7943   vassert(u0 && u1);
7944   /* This is pretty easy, since we have primitives directly to
7945      hand. */
7946   if (laneszB == 4) {
7947      // memLE(128 bits) == A0 B0 A1 B1
7948      // i0 == B0 A0, i1 == B1 A1
7949      // u0 == A1 A0, u1 == B1 B0
7950      assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
7951      assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
7952   } else if (laneszB == 2) {
7953      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
7954      // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
7955      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
7956      assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
7957      assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
7958   } else if (laneszB == 1) {
7959      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
7960      // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
7961      // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
7962      assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
7963      assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
7964   } else {
7965      // Can never happen, since VLD2 only has valid lane widths of 32,
7966      // 16 or 8 bits.
7967      vpanic("math_DEINTERLEAVE_2");
7968   }
7969}
7970
7971/* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
7972   *u0 and *u1 be valid IRTemps before the call. */
7973static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
7974                               IRTemp u0, IRTemp u1, Int laneszB)
7975{
7976   /* The following assumes that the guest is little endian, and hence
7977      that the memory-side (interleaved) data is stored
7978      little-endianly. */
7979   vassert(i0 && i1);
7980   /* This is pretty easy, since we have primitives directly to
7981      hand. */
7982   if (laneszB == 4) {
7983      // memLE(128 bits) == A0 B0 A1 B1
7984      // i0 == B0 A0, i1 == B1 A1
7985      // u0 == A1 A0, u1 == B1 B0
7986      assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
7987      assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
7988   } else if (laneszB == 2) {
7989      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
7990      // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
7991      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
7992      assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
7993      assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
7994   } else if (laneszB == 1) {
7995      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
7996      // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
7997      // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
7998      assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
7999      assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
8000   } else {
8001      // Can never happen, since VST2 only has valid lane widths of 32,
8002      // 16 or 8 bits.
8003      vpanic("math_INTERLEAVE_2");
8004   }
8005}
8006
8007// Helper function for generating arbitrary slicing 'n' dicing of
8008// 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
8009static IRExpr* math_PERM_8x8x3(const UChar* desc,
8010                               IRTemp s0, IRTemp s1, IRTemp s2)
8011{
8012   // desc is an array of 8 pairs, encoded as 16 bytes,
8013   // that describe how to assemble the result lanes, starting with
8014   // lane 7.  Each pair is: first component (0..2) says which of
8015   // s0/s1/s2 to use.  Second component (0..7) is the lane number
8016   // in the source to use.
8017   UInt si;
8018   for (si = 0; si < 7; si++) {
8019      vassert(desc[2 * si + 0] <= 2);
8020      vassert(desc[2 * si + 1] <= 7);
8021   }
8022   IRTemp h3 = newTemp(Ity_I64);
8023   IRTemp h2 = newTemp(Ity_I64);
8024   IRTemp h1 = newTemp(Ity_I64);
8025   IRTemp h0 = newTemp(Ity_I64);
8026   IRTemp srcs[3] = {s0, s1, s2};
8027#  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
8028#  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
8029   assign(h3, binop(Iop_InterleaveHI8x8,
8030                    binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
8031                    binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
8032   assign(h2, binop(Iop_InterleaveHI8x8,
8033                    binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
8034                    binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
8035   assign(h1, binop(Iop_InterleaveHI8x8,
8036                    binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
8037                    binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
8038   assign(h0, binop(Iop_InterleaveHI8x8,
8039                    binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
8040                    binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
8041#  undef SRC_VEC
8042#  undef SRC_SHIFT
8043   // Now h3..h0 are 64 bit vectors with useful information only
8044   // in the top 16 bits.  We now concatentate those four 16-bit
8045   // groups so as to produce the final result.
8046   IRTemp w1 = newTemp(Ity_I64);
8047   IRTemp w0 = newTemp(Ity_I64);
8048   assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
8049   assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
8050   return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
8051}
8052
8053/* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
8054   make *u0, *u1 and *u2 be valid IRTemps before the call. */
8055static void math_DEINTERLEAVE_3 (
8056               /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
8057               IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
8058            )
8059{
8060#  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8061#  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8062#  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8063   /* The following assumes that the guest is little endian, and hence
8064      that the memory-side (interleaved) data is stored
8065      little-endianly. */
8066   vassert(u0 && u1 && u2);
8067   if (laneszB == 4) {
8068      // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8069      // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8070      // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8071      assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
8072      assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
8073      assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
8074   } else if (laneszB == 2) {
8075      // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8076      // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8077      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8078#     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8079                IHI32x2(                                      \
8080                   IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8081                           SHL64((_tmp2),48-16*(_la2))),      \
8082                   IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8083                           SHL64((_tmp0),48-16*(_la0))))
8084      assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
8085      assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
8086      assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
8087#     undef XXX
8088   } else if (laneszB == 1) {
8089      // These describe how the result vectors [7..0] are
8090      // assembled from the source vectors.  Each pair is
8091      // (source vector number, lane number).
8092      static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
8093      static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
8094      static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
8095      assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
8096      assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
8097      assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
8098   } else {
8099      // Can never happen, since VLD3 only has valid lane widths of 32,
8100      // 16 or 8 bits.
8101      vpanic("math_DEINTERLEAVE_3");
8102   }
8103#  undef SHL64
8104#  undef IHI16x4
8105#  undef IHI32x2
8106}
8107
8108/* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
8109   make *i0, *i1 and *i2 be valid IRTemps before the call. */
8110static void math_INTERLEAVE_3 (
8111               /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
8112               IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
8113            )
8114{
8115#  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8116#  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8117#  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8118   /* The following assumes that the guest is little endian, and hence
8119      that the memory-side (interleaved) data is stored
8120      little-endianly. */
8121   vassert(i0 && i1 && i2);
8122   if (laneszB == 4) {
8123      // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8124      // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8125      // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8126      assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
8127      assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
8128      assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
8129   } else if (laneszB == 2) {
8130      // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8131      // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8132      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8133#     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8134                IHI32x2(                                      \
8135                   IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8136                           SHL64((_tmp2),48-16*(_la2))),      \
8137                   IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8138                           SHL64((_tmp0),48-16*(_la0))))
8139      assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
8140      assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
8141      assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
8142#     undef XXX
8143   } else if (laneszB == 1) {
8144      // These describe how the result vectors [7..0] are
8145      // assembled from the source vectors.  Each pair is
8146      // (source vector number, lane number).
8147      static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
8148      static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
8149      static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
8150      assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
8151      assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
8152      assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
8153   } else {
8154      // Can never happen, since VST3 only has valid lane widths of 32,
8155      // 16 or 8 bits.
8156      vpanic("math_INTERLEAVE_3");
8157   }
8158#  undef SHL64
8159#  undef IHI16x4
8160#  undef IHI32x2
8161}
8162
8163/* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
8164   make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
8165static void math_DEINTERLEAVE_4 (
8166               /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
8167               /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
8168               IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
8169            )
8170{
8171#  define IHI32x2(_t1, _t2) \
8172             binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8173#  define ILO32x2(_t1, _t2) \
8174             binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8175#  define IHI16x4(_t1, _t2) \
8176             binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
8177#  define ILO16x4(_t1, _t2) \
8178             binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
8179#  define IHI8x8(_t1, _e2) \
8180             binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
8181#  define SHL64(_tmp, _amt) \
8182             binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8183   /* The following assumes that the guest is little endian, and hence
8184      that the memory-side (interleaved) data is stored
8185      little-endianly. */
8186   vassert(u0 && u1 && u2 && u3);
8187   if (laneszB == 4) {
8188      assign(*u0, ILO32x2(i2, i0));
8189      assign(*u1, IHI32x2(i2, i0));
8190      assign(*u2, ILO32x2(i3, i1));
8191      assign(*u3, IHI32x2(i3, i1));
8192   } else if (laneszB == 2) {
8193      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8194      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8195      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8196      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8197      assign(b1b0a1a0, ILO16x4(i1, i0));
8198      assign(b3b2a3a2, ILO16x4(i3, i2));
8199      assign(d1d0c1c0, IHI16x4(i1, i0));
8200      assign(d3d2c3c2, IHI16x4(i3, i2));
8201      // And now do what we did for the 32-bit case.
8202      assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8203      assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8204      assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8205      assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8206   } else if (laneszB == 1) {
8207      // Deinterleave into 16-bit chunks, then do as the 16-bit case.
8208      IRTemp i0x = newTemp(Ity_I64);
8209      IRTemp i1x = newTemp(Ity_I64);
8210      IRTemp i2x = newTemp(Ity_I64);
8211      IRTemp i3x = newTemp(Ity_I64);
8212      assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
8213      assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
8214      assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
8215      assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
8216      // From here on is like the 16 bit case.
8217      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8218      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8219      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8220      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8221      assign(b1b0a1a0, ILO16x4(i1x, i0x));
8222      assign(b3b2a3a2, ILO16x4(i3x, i2x));
8223      assign(d1d0c1c0, IHI16x4(i1x, i0x));
8224      assign(d3d2c3c2, IHI16x4(i3x, i2x));
8225      // And now do what we did for the 32-bit case.
8226      assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8227      assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8228      assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8229      assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8230   } else {
8231      // Can never happen, since VLD4 only has valid lane widths of 32,
8232      // 16 or 8 bits.
8233      vpanic("math_DEINTERLEAVE_4");
8234   }
8235#  undef SHL64
8236#  undef IHI8x8
8237#  undef ILO16x4
8238#  undef IHI16x4
8239#  undef ILO32x2
8240#  undef IHI32x2
8241}
8242
8243/* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
8244   make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
8245static void math_INTERLEAVE_4 (
8246               /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8247               /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
8248               IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
8249            )
8250{
8251#  define IHI32x2(_t1, _t2) \
8252             binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8253#  define ILO32x2(_t1, _t2) \
8254             binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8255#  define CEV16x4(_t1, _t2) \
8256             binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
8257#  define COD16x4(_t1, _t2) \
8258             binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
8259#  define COD8x8(_t1, _e2) \
8260             binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
8261#  define SHL64(_tmp, _amt) \
8262             binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8263   /* The following assumes that the guest is little endian, and hence
8264      that the memory-side (interleaved) data is stored
8265      little-endianly. */
8266   vassert(u0 && u1 && u2 && u3);
8267   if (laneszB == 4) {
8268      assign(*i0, ILO32x2(u1, u0));
8269      assign(*i1, ILO32x2(u3, u2));
8270      assign(*i2, IHI32x2(u1, u0));
8271      assign(*i3, IHI32x2(u3, u2));
8272   } else if (laneszB == 2) {
8273      // First, interleave at the 32-bit lane size.
8274      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8275      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8276      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8277      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8278      assign(b1b0a1a0, ILO32x2(u1, u0));
8279      assign(b3b2a3a2, IHI32x2(u1, u0));
8280      assign(d1d0c1c0, ILO32x2(u3, u2));
8281      assign(d3d2c3c2, IHI32x2(u3, u2));
8282      // And interleave (cat) at the 16 bit size.
8283      assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
8284      assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
8285      assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
8286      assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
8287   } else if (laneszB == 1) {
8288      // First, interleave at the 32-bit lane size.
8289      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8290      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8291      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8292      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8293      assign(b1b0a1a0, ILO32x2(u1, u0));
8294      assign(b3b2a3a2, IHI32x2(u1, u0));
8295      assign(d1d0c1c0, ILO32x2(u3, u2));
8296      assign(d3d2c3c2, IHI32x2(u3, u2));
8297      // And interleave (cat) at the 16 bit size.
8298      IRTemp i0x = newTemp(Ity_I64);
8299      IRTemp i1x = newTemp(Ity_I64);
8300      IRTemp i2x = newTemp(Ity_I64);
8301      IRTemp i3x = newTemp(Ity_I64);
8302      assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
8303      assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
8304      assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
8305      assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
8306      // And rearrange within each word, to get the right 8 bit lanes.
8307      assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
8308      assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
8309      assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
8310      assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
8311   } else {
8312      // Can never happen, since VLD4 only has valid lane widths of 32,
8313      // 16 or 8 bits.
8314      vpanic("math_DEINTERLEAVE_4");
8315   }
8316#  undef SHL64
8317#  undef COD8x8
8318#  undef COD16x4
8319#  undef CEV16x4
8320#  undef ILO32x2
8321#  undef IHI32x2
8322}
8323
8324/* A7.7 Advanced SIMD element or structure load/store instructions */
8325static
8326Bool dis_neon_load_or_store ( UInt theInstr,
8327                              Bool isT, IRTemp condT )
8328{
8329#  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
8330   UInt bA = INSN(23,23);
8331   UInt fB = INSN(11,8);
8332   UInt bL = INSN(21,21);
8333   UInt rD = (INSN(22,22) << 4) | INSN(15,12);
8334   UInt rN = INSN(19,16);
8335   UInt rM = INSN(3,0);
8336   UInt N, size, i, j;
8337   UInt inc;
8338   UInt regs = 1;
8339
8340   if (isT) {
8341      vassert(condT != IRTemp_INVALID);
8342   } else {
8343      vassert(condT == IRTemp_INVALID);
8344   }
8345   /* So now, if condT is not IRTemp_INVALID, we know we're
8346      dealing with Thumb code. */
8347
8348   if (INSN(20,20) != 0)
8349      return False;
8350
8351   IRTemp initialRn = newTemp(Ity_I32);
8352   assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
8353
8354   IRTemp initialRm = newTemp(Ity_I32);
8355   assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
8356
8357   /* There are 3 cases:
8358      (1) VSTn / VLDn (n-element structure from/to one lane)
8359      (2) VLDn (single element to all lanes)
8360      (3) VSTn / VLDn (multiple n-element structures)
8361   */
8362   if (bA) {
8363      N = fB & 3;
8364      if ((fB >> 2) < 3) {
8365         /* ------------ Case (1) ------------
8366            VSTn / VLDn (n-element structure from/to one lane) */
8367
8368         size = fB >> 2;
8369
8370         switch (size) {
8371            case 0: i = INSN(7,5); inc = 1; break;
8372            case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
8373            case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
8374            case 3: return False;
8375            default: vassert(0);
8376         }
8377
8378         IRTemp addr = newTemp(Ity_I32);
8379         assign(addr, mkexpr(initialRn));
8380
8381         // go uncond
8382         if (condT != IRTemp_INVALID)
8383            mk_skip_over_T32_if_cond_is_false(condT);
8384         // now uncond
8385
8386         if (bL)
8387            mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
8388         else
8389            mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
8390         DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << size);
8391         for (j = 0; j <= N; j++) {
8392            if (j)
8393               DIP(", ");
8394            DIP("d%u[%u]", rD + j * inc, i);
8395         }
8396         DIP("}, [r%u]", rN);
8397         if (rM != 13 && rM != 15) {
8398            DIP(", r%u\n", rM);
8399         } else {
8400            DIP("%s\n", (rM != 15) ? "!" : "");
8401         }
8402      } else {
8403         /* ------------ Case (2) ------------
8404            VLDn (single element to all lanes) */
8405         UInt r;
8406         if (bL == 0)
8407            return False;
8408
8409         inc = INSN(5,5) + 1;
8410         size = INSN(7,6);
8411
8412         /* size == 3 and size == 2 cases differ in alignment constraints */
8413         if (size == 3 && N == 3 && INSN(4,4) == 1)
8414            size = 2;
8415
8416         if (size == 0 && N == 0 && INSN(4,4) == 1)
8417            return False;
8418         if (N == 2 && INSN(4,4) == 1)
8419            return False;
8420         if (size == 3)
8421            return False;
8422
8423         // go uncond
8424         if (condT != IRTemp_INVALID)
8425            mk_skip_over_T32_if_cond_is_false(condT);
8426         // now uncond
8427
8428         IRTemp addr = newTemp(Ity_I32);
8429         assign(addr, mkexpr(initialRn));
8430
8431         if (N == 0 && INSN(5,5))
8432            regs = 2;
8433
8434         for (r = 0; r < regs; r++) {
8435            switch (size) {
8436               case 0:
8437                  putDRegI64(rD + r, unop(Iop_Dup8x8,
8438                                          loadLE(Ity_I8, mkexpr(addr))),
8439                             IRTemp_INVALID);
8440                  break;
8441               case 1:
8442                  putDRegI64(rD + r, unop(Iop_Dup16x4,
8443                                          loadLE(Ity_I16, mkexpr(addr))),
8444                             IRTemp_INVALID);
8445                  break;
8446               case 2:
8447                  putDRegI64(rD + r, unop(Iop_Dup32x2,
8448                                          loadLE(Ity_I32, mkexpr(addr))),
8449                             IRTemp_INVALID);
8450                  break;
8451               default:
8452                  vassert(0);
8453            }
8454            for (i = 1; i <= N; i++) {
8455               switch (size) {
8456                  case 0:
8457                     putDRegI64(rD + r + i * inc,
8458                                unop(Iop_Dup8x8,
8459                                     loadLE(Ity_I8, binop(Iop_Add32,
8460                                                          mkexpr(addr),
8461                                                          mkU32(i * 1)))),
8462                                IRTemp_INVALID);
8463                     break;
8464                  case 1:
8465                     putDRegI64(rD + r + i * inc,
8466                                unop(Iop_Dup16x4,
8467                                     loadLE(Ity_I16, binop(Iop_Add32,
8468                                                           mkexpr(addr),
8469                                                           mkU32(i * 2)))),
8470                                IRTemp_INVALID);
8471                     break;
8472                  case 2:
8473                     putDRegI64(rD + r + i * inc,
8474                                unop(Iop_Dup32x2,
8475                                     loadLE(Ity_I32, binop(Iop_Add32,
8476                                                           mkexpr(addr),
8477                                                           mkU32(i * 4)))),
8478                                IRTemp_INVALID);
8479                     break;
8480                  default:
8481                     vassert(0);
8482               }
8483            }
8484         }
8485         DIP("vld%u.%u {", N + 1, 8 << size);
8486         for (r = 0; r < regs; r++) {
8487            for (i = 0; i <= N; i++) {
8488               if (i || r)
8489                  DIP(", ");
8490               DIP("d%u[]", rD + r + i * inc);
8491            }
8492         }
8493         DIP("}, [r%u]", rN);
8494         if (rM != 13 && rM != 15) {
8495            DIP(", r%u\n", rM);
8496         } else {
8497            DIP("%s\n", (rM != 15) ? "!" : "");
8498         }
8499      }
8500      /* Writeback.  We're uncond here, so no condT-ing. */
8501      if (rM != 15) {
8502         if (rM == 13) {
8503            IRExpr* e = binop(Iop_Add32,
8504                              mkexpr(initialRn),
8505                              mkU32((1 << size) * (N + 1)));
8506            if (isT)
8507               putIRegT(rN, e, IRTemp_INVALID);
8508            else
8509               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8510         } else {
8511            IRExpr* e = binop(Iop_Add32,
8512                              mkexpr(initialRn),
8513                              mkexpr(initialRm));
8514            if (isT)
8515               putIRegT(rN, e, IRTemp_INVALID);
8516            else
8517               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8518         }
8519      }
8520      return True;
8521   } else {
8522      /* ------------ Case (3) ------------
8523         VSTn / VLDn (multiple n-element structures) */
8524      inc = (fB & 1) + 1;
8525
8526      if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
8527          || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
8528          || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
8529          || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
8530         N = 0; // VLD1/VST1.  'inc' does not appear to have any
8531                // meaning for the VLD1/VST1 cases.  'regs' is the number of
8532                // registers involved.
8533         if (rD + regs > 32) return False;
8534      }
8535      else
8536      if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
8537          || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
8538          || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
8539         N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
8540         if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
8541         if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
8542         if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
8543      } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
8544         N = 2; // VLD3/VST3
8545         if (inc == 1 && rD + 2 >= 32) return False;
8546         if (inc == 2 && rD + 4 >= 32) return False;
8547      } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
8548         N = 3; // VLD4/VST4
8549         if (inc == 1 && rD + 3 >= 32) return False;
8550         if (inc == 2 && rD + 6 >= 32) return False;
8551      } else {
8552         return False;
8553      }
8554
8555      if (N == 1 && fB == BITS4(0,0,1,1)) {
8556         regs = 2;
8557      } else if (N == 0) {
8558         if (fB == BITS4(1,0,1,0)) {
8559            regs = 2;
8560         } else if (fB == BITS4(0,1,1,0)) {
8561            regs = 3;
8562         } else if (fB == BITS4(0,0,1,0)) {
8563            regs = 4;
8564         }
8565      }
8566
8567      size = INSN(7,6);
8568      if (N == 0 && size == 3)
8569         size = 2;
8570      if (size == 3)
8571         return False;
8572
8573      // go uncond
8574      if (condT != IRTemp_INVALID)
8575         mk_skip_over_T32_if_cond_is_false(condT);
8576      // now uncond
8577
8578      IRTemp addr = newTemp(Ity_I32);
8579      assign(addr, mkexpr(initialRn));
8580
8581      if (N == 0 /* No interleaving -- VLD1/VST1 */) {
8582         UInt r;
8583         vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
8584         /* inc has no relevance here */
8585         for (r = 0; r < regs; r++) {
8586            if (bL)
8587               putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
8588            else
8589               storeLE(mkexpr(addr), getDRegI64(rD+r));
8590            IRTemp tmp = newTemp(Ity_I32);
8591            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
8592            addr = tmp;
8593         }
8594      }
8595      else
8596      if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
8597         vassert( (regs == 1 && (inc == 1 || inc == 2))
8598                   || (regs == 2 && inc == 2) );
8599         // Make 'nregs' be the number of registers and 'regstep'
8600         // equal the actual register-step.  The ARM encoding, using 'regs'
8601         // and 'inc', is bizarre.  After this, we have:
8602         // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
8603         // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
8604         // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
8605         UInt nregs   = 2;
8606         UInt regstep = 1;
8607         if (regs == 1 && inc == 1) {
8608            /* nothing */
8609         } else if (regs == 1 && inc == 2) {
8610            regstep = 2;
8611         } else if (regs == 2 && inc == 2) {
8612            nregs = 4;
8613         } else {
8614            vassert(0);
8615         }
8616         // 'a' is address,
8617         // 'di' is interleaved data, 'du' is uninterleaved data
8618         if (nregs == 2) {
8619            IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8620            IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8621            IRTemp  di0 = newTemp(Ity_I64);
8622            IRTemp  di1 = newTemp(Ity_I64);
8623            IRTemp  du0 = newTemp(Ity_I64);
8624            IRTemp  du1 = newTemp(Ity_I64);
8625            if (bL) {
8626               assign(di0, loadLE(Ity_I64, a0));
8627               assign(di1, loadLE(Ity_I64, a1));
8628               math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
8629               putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8630               putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8631            } else {
8632               assign(du0, getDRegI64(rD + 0 * regstep));
8633               assign(du1, getDRegI64(rD + 1 * regstep));
8634               math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
8635               storeLE(a0, mkexpr(di0));
8636               storeLE(a1, mkexpr(di1));
8637            }
8638            IRTemp tmp = newTemp(Ity_I32);
8639            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
8640            addr = tmp;
8641         } else {
8642            vassert(nregs == 4);
8643            vassert(regstep == 1);
8644            IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8645            IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8646            IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8647            IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8648            IRTemp  di0 = newTemp(Ity_I64);
8649            IRTemp  di1 = newTemp(Ity_I64);
8650            IRTemp  di2 = newTemp(Ity_I64);
8651            IRTemp  di3 = newTemp(Ity_I64);
8652            IRTemp  du0 = newTemp(Ity_I64);
8653            IRTemp  du1 = newTemp(Ity_I64);
8654            IRTemp  du2 = newTemp(Ity_I64);
8655            IRTemp  du3 = newTemp(Ity_I64);
8656            if (bL) {
8657               assign(di0, loadLE(Ity_I64, a0));
8658               assign(di1, loadLE(Ity_I64, a1));
8659               assign(di2, loadLE(Ity_I64, a2));
8660               assign(di3, loadLE(Ity_I64, a3));
8661               // Note spooky interleaving: du0, du2, di0, di1 etc
8662               math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
8663               math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
8664               putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8665               putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8666               putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
8667               putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
8668            } else {
8669               assign(du0, getDRegI64(rD + 0 * regstep));
8670               assign(du1, getDRegI64(rD + 1 * regstep));
8671               assign(du2, getDRegI64(rD + 2 * regstep));
8672               assign(du3, getDRegI64(rD + 3 * regstep));
8673               // Note spooky interleaving: du0, du2, di0, di1 etc
8674               math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
8675               math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
8676               storeLE(a0, mkexpr(di0));
8677               storeLE(a1, mkexpr(di1));
8678               storeLE(a2, mkexpr(di2));
8679               storeLE(a3, mkexpr(di3));
8680            }
8681
8682            IRTemp tmp = newTemp(Ity_I32);
8683            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8684            addr = tmp;
8685         }
8686      }
8687      else
8688      if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
8689         // Dd, Dd+1, Dd+2   regs = 1, inc = 1
8690         // Dd, Dd+2, Dd+4   regs = 1, inc = 2
8691         vassert(regs == 1 && (inc == 1 || inc == 2));
8692         IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8693         IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8694         IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8695         IRTemp  di0 = newTemp(Ity_I64);
8696         IRTemp  di1 = newTemp(Ity_I64);
8697         IRTemp  di2 = newTemp(Ity_I64);
8698         IRTemp  du0 = newTemp(Ity_I64);
8699         IRTemp  du1 = newTemp(Ity_I64);
8700         IRTemp  du2 = newTemp(Ity_I64);
8701         if (bL) {
8702            assign(di0, loadLE(Ity_I64, a0));
8703            assign(di1, loadLE(Ity_I64, a1));
8704            assign(di2, loadLE(Ity_I64, a2));
8705            math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
8706            putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8707            putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8708            putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8709         } else {
8710            assign(du0, getDRegI64(rD + 0 * inc));
8711            assign(du1, getDRegI64(rD + 1 * inc));
8712            assign(du2, getDRegI64(rD + 2 * inc));
8713            math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
8714            storeLE(a0, mkexpr(di0));
8715            storeLE(a1, mkexpr(di1));
8716            storeLE(a2, mkexpr(di2));
8717         }
8718         IRTemp tmp = newTemp(Ity_I32);
8719         assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
8720         addr = tmp;
8721      }
8722      else
8723      if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
8724         // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
8725         // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
8726         vassert(regs == 1 && (inc == 1 || inc == 2));
8727         IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8728         IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8729         IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8730         IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8731         IRTemp  di0 = newTemp(Ity_I64);
8732         IRTemp  di1 = newTemp(Ity_I64);
8733         IRTemp  di2 = newTemp(Ity_I64);
8734         IRTemp  di3 = newTemp(Ity_I64);
8735         IRTemp  du0 = newTemp(Ity_I64);
8736         IRTemp  du1 = newTemp(Ity_I64);
8737         IRTemp  du2 = newTemp(Ity_I64);
8738         IRTemp  du3 = newTemp(Ity_I64);
8739         if (bL) {
8740            assign(di0, loadLE(Ity_I64, a0));
8741            assign(di1, loadLE(Ity_I64, a1));
8742            assign(di2, loadLE(Ity_I64, a2));
8743            assign(di3, loadLE(Ity_I64, a3));
8744            math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
8745                                di0, di1, di2, di3, 1 << size);
8746            putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8747            putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8748            putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8749            putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
8750         } else {
8751            assign(du0, getDRegI64(rD + 0 * inc));
8752            assign(du1, getDRegI64(rD + 1 * inc));
8753            assign(du2, getDRegI64(rD + 2 * inc));
8754            assign(du3, getDRegI64(rD + 3 * inc));
8755            math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
8756                              du0, du1, du2, du3, 1 << size);
8757            storeLE(a0, mkexpr(di0));
8758            storeLE(a1, mkexpr(di1));
8759            storeLE(a2, mkexpr(di2));
8760            storeLE(a3, mkexpr(di3));
8761         }
8762         IRTemp tmp = newTemp(Ity_I32);
8763         assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8764         addr = tmp;
8765      }
8766      else {
8767         vassert(0);
8768      }
8769
8770      /* Writeback */
8771      if (rM != 15) {
8772         IRExpr* e;
8773         if (rM == 13) {
8774            e = binop(Iop_Add32, mkexpr(initialRn),
8775                                 mkU32(8 * (N + 1) * regs));
8776         } else {
8777            e = binop(Iop_Add32, mkexpr(initialRn),
8778                                 mkexpr(initialRm));
8779         }
8780         if (isT)
8781            putIRegT(rN, e, IRTemp_INVALID);
8782         else
8783            putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8784      }
8785
8786      DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
8787      if ((inc == 1 && regs * (N + 1) > 1)
8788          || (inc == 2 && regs > 1 && N > 0)) {
8789         DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
8790      } else {
8791         UInt r;
8792         for (r = 0; r < regs; r++) {
8793            for (i = 0; i <= N; i++) {
8794               if (i || r)
8795                  DIP(", ");
8796               DIP("d%u", rD + r + i * inc);
8797            }
8798         }
8799      }
8800      DIP("}, [r%u]", rN);
8801      if (rM != 13 && rM != 15) {
8802         DIP(", r%u\n", rM);
8803      } else {
8804         DIP("%s\n", (rM != 15) ? "!" : "");
8805      }
8806      return True;
8807   }
8808#  undef INSN
8809}
8810
8811
8812/*------------------------------------------------------------*/
8813/*--- NEON, top level control                              ---*/
8814/*------------------------------------------------------------*/
8815
8816/* Both ARM and Thumb */
8817
8818/* Translate a NEON instruction.    If successful, returns
8819   True and *dres may or may not be updated.  If failure, returns
8820   False and doesn't change *dres nor create any IR.
8821
8822   The Thumb and ARM encodings are similar for the 24 bottom bits, but
8823   the top 8 bits are slightly different.  In both cases, the caller
8824   must pass the entire 32 bits.  Callers may pass any instruction;
8825   this ignores non-NEON ones.
8826
8827   Caller must supply an IRTemp 'condT' holding the gating condition,
8828   or IRTemp_INVALID indicating the insn is always executed.  In ARM
8829   code, this must always be IRTemp_INVALID because NEON insns are
8830   unconditional for ARM.
8831
8832   Finally, the caller must indicate whether this occurs in ARM or in
8833   Thumb code.
8834*/
8835static Bool decode_NEON_instruction (
8836               /*MOD*/DisResult* dres,
8837               UInt              insn32,
8838               IRTemp            condT,
8839               Bool              isT
8840            )
8841{
8842#  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
8843
8844   /* There are two kinds of instruction to deal with: load/store and
8845      data processing.  In each case, in ARM mode we merely identify
8846      the kind, and pass it on to the relevant sub-handler.  In Thumb
8847      mode we identify the kind, swizzle the bits around to make it
8848      have the same encoding as in ARM, and hand it on to the
8849      sub-handler.
8850   */
8851
8852   /* In ARM mode, NEON instructions can't be conditional. */
8853   if (!isT)
8854      vassert(condT == IRTemp_INVALID);
8855
8856   /* Data processing:
8857      Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
8858      ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
8859   */
8860   if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
8861      // ARM, DP
8862      return dis_neon_data_processing(INSN(31,0), condT);
8863   }
8864   if (isT && INSN(31,29) == BITS3(1,1,1)
8865       && INSN(27,24) == BITS4(1,1,1,1)) {
8866      // Thumb, DP
8867      UInt reformatted = INSN(23,0);
8868      reformatted |= (INSN(28,28) << 24); // U bit
8869      reformatted |= (BITS7(1,1,1,1,0,0,1) << 25);
8870      return dis_neon_data_processing(reformatted, condT);
8871   }
8872
8873   /* Load/store:
8874      Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
8875      ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
8876   */
8877   if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
8878      // ARM, memory
8879      return dis_neon_load_or_store(INSN(31,0), isT, condT);
8880   }
8881   if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
8882      UInt reformatted = INSN(23,0);
8883      reformatted |= (BITS8(1,1,1,1,0,1,0,0) << 24);
8884      return dis_neon_load_or_store(reformatted, isT, condT);
8885   }
8886
8887   /* Doesn't match. */
8888   return False;
8889
8890#  undef INSN
8891}
8892
8893
8894/*------------------------------------------------------------*/
8895/*--- V6 MEDIA instructions                                ---*/
8896/*------------------------------------------------------------*/
8897
8898/* Both ARM and Thumb */
8899
8900/* Translate a V6 media instruction.    If successful, returns
8901   True and *dres may or may not be updated.  If failure, returns
8902   False and doesn't change *dres nor create any IR.
8903
8904   The Thumb and ARM encodings are completely different.  In Thumb
8905   mode, the caller must pass the entire 32 bits.  In ARM mode it must
8906   pass the lower 28 bits.  Apart from that, callers may pass any
8907   instruction; this function ignores anything it doesn't recognise.
8908
8909   Caller must supply an IRTemp 'condT' holding the gating condition,
8910   or IRTemp_INVALID indicating the insn is always executed.
8911
8912   Caller must also supply an ARMCondcode 'cond'.  This is only used
8913   for debug printing, no other purpose.  For ARM, this is simply the
8914   top 4 bits of the original instruction.  For Thumb, the condition
8915   is not (really) known until run time, and so ARMCondAL should be
8916   passed, only so that printing of these instructions does not show
8917   any condition.
8918
8919   Finally, the caller must indicate whether this occurs in ARM or in
8920   Thumb code.
8921*/
8922static Bool decode_V6MEDIA_instruction (
8923               /*MOD*/DisResult* dres,
8924               UInt              insnv6m,
8925               IRTemp            condT,
8926               ARMCondcode       conq,
8927               Bool              isT
8928            )
8929{
8930#  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
8931#  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
8932                                           (_bMax), (_bMin) )
8933#  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
8934                                           (_bMax), (_bMin) )
8935   HChar dis_buf[128];
8936   dis_buf[0] = 0;
8937
8938   if (isT) {
8939      vassert(conq == ARMCondAL);
8940   } else {
8941      vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
8942      vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
8943   }
8944
8945   /* ----------- smulbb, smulbt, smultb, smultt ----------- */
8946   {
8947     UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
8948     Bool gate = False;
8949
8950     if (isT) {
8951        if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
8952            && INSNT1(7,6) == BITS2(0,0)) {
8953           regD = INSNT1(11,8);
8954           regM = INSNT1(3,0);
8955           regN = INSNT0(3,0);
8956           bitM = INSNT1(4,4);
8957           bitN = INSNT1(5,5);
8958           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8959              gate = True;
8960        }
8961     } else {
8962        if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
8963            BITS4(0,0,0,0)         == INSNA(15,12) &&
8964            BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
8965           regD = INSNA(19,16);
8966           regM = INSNA(11,8);
8967           regN = INSNA(3,0);
8968           bitM = INSNA(6,6);
8969           bitN = INSNA(5,5);
8970           if (regD != 15 && regN != 15 && regM != 15)
8971              gate = True;
8972        }
8973     }
8974
8975     if (gate) {
8976        IRTemp srcN = newTemp(Ity_I32);
8977        IRTemp srcM = newTemp(Ity_I32);
8978        IRTemp res  = newTemp(Ity_I32);
8979
8980        assign( srcN, binop(Iop_Sar32,
8981                            binop(Iop_Shl32,
8982                                  isT ? getIRegT(regN) : getIRegA(regN),
8983                                  mkU8(bitN ? 0 : 16)), mkU8(16)) );
8984        assign( srcM, binop(Iop_Sar32,
8985                            binop(Iop_Shl32,
8986                                  isT ? getIRegT(regM) : getIRegA(regM),
8987                                  mkU8(bitM ? 0 : 16)), mkU8(16)) );
8988        assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
8989
8990        if (isT)
8991           putIRegT( regD, mkexpr(res), condT );
8992        else
8993           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
8994
8995        DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
8996             nCC(conq), regD, regN, regM );
8997        return True;
8998     }
8999     /* fall through */
9000   }
9001
9002   /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
9003   /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
9004   {
9005     UInt regD = 99, regN = 99, regM = 99, bitM = 0;
9006     Bool gate = False;
9007
9008     if (isT) {
9009        if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
9010            && INSNT1(7,5) == BITS3(0,0,0)) {
9011          regN = INSNT0(3,0);
9012          regD = INSNT1(11,8);
9013          regM = INSNT1(3,0);
9014          bitM = INSNT1(4,4);
9015          if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9016             gate = True;
9017        }
9018     } else {
9019        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
9020            INSNA(15,12) == BITS4(0,0,0,0)         &&
9021            (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
9022           regD = INSNA(19,16);
9023           regN = INSNA(3,0);
9024           regM = INSNA(11,8);
9025           bitM = INSNA(6,6);
9026           if (regD != 15 && regN != 15 && regM != 15)
9027              gate = True;
9028        }
9029     }
9030
9031     if (gate) {
9032        IRTemp irt_prod = newTemp(Ity_I64);
9033
9034        assign( irt_prod,
9035                binop(Iop_MullS32,
9036                      isT ? getIRegT(regN) : getIRegA(regN),
9037                      binop(Iop_Sar32,
9038                            binop(Iop_Shl32,
9039                                  isT ? getIRegT(regM) : getIRegA(regM),
9040                                  mkU8(bitM ? 0 : 16)),
9041                            mkU8(16))) );
9042
9043        IRExpr* ire_result = binop(Iop_Or32,
9044                                   binop( Iop_Shl32,
9045                                          unop(Iop_64HIto32, mkexpr(irt_prod)),
9046                                          mkU8(16) ),
9047                                   binop( Iop_Shr32,
9048                                          unop(Iop_64to32, mkexpr(irt_prod)),
9049                                          mkU8(16) ) );
9050
9051        if (isT)
9052           putIRegT( regD, ire_result, condT );
9053        else
9054           putIRegA( regD, ire_result, condT, Ijk_Boring );
9055
9056        DIP("smulw%c%s r%u, r%u, r%u\n",
9057            bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
9058        return True;
9059     }
9060     /* fall through */
9061   }
9062
9063   /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
9064   /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
9065   {
9066     UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
9067     Bool tbform = False;
9068     Bool gate = False;
9069
9070     if (isT) {
9071        if (INSNT0(15,4) == 0xEAC
9072            && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
9073           regN = INSNT0(3,0);
9074           regD = INSNT1(11,8);
9075           regM = INSNT1(3,0);
9076           imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
9077           shift_type = (INSNT1(5,5) << 1) | 0;
9078           tbform = (INSNT1(5,5) == 0) ? False : True;
9079           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9080              gate = True;
9081        }
9082     } else {
9083        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
9084            INSNA(5,4)   == BITS2(0,1)             &&
9085            (INSNA(6,6)  == 0 || INSNA(6,6) == 1) ) {
9086           regD = INSNA(15,12);
9087           regN = INSNA(19,16);
9088           regM = INSNA(3,0);
9089           imm5 = INSNA(11,7);
9090           shift_type = (INSNA(6,6) << 1) | 0;
9091           tbform = (INSNA(6,6) == 0) ? False : True;
9092           if (regD != 15 && regN != 15 && regM != 15)
9093              gate = True;
9094        }
9095     }
9096
9097     if (gate) {
9098        IRTemp irt_regM       = newTemp(Ity_I32);
9099        IRTemp irt_regM_shift = newTemp(Ity_I32);
9100        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
9101        compute_result_and_C_after_shift_by_imm5(
9102           dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
9103
9104        UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
9105        IRExpr* ire_result
9106          = binop( Iop_Or32,
9107                   binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
9108                   binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
9109                                    unop(Iop_Not32, mkU32(mask))) );
9110
9111        if (isT)
9112           putIRegT( regD, ire_result, condT );
9113        else
9114           putIRegA( regD, ire_result, condT, Ijk_Boring );
9115
9116        DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
9117             nCC(conq), regD, regN, regM, dis_buf );
9118
9119        return True;
9120     }
9121     /* fall through */
9122   }
9123
9124   /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9125   {
9126     UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9127     Bool gate = False;
9128
9129     if (isT) {
9130        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
9131            && INSNT0(4,4) == 0
9132            && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9133           regD       = INSNT1(11,8);
9134           regN       = INSNT0(3,0);
9135           shift_type = (INSNT0(5,5) << 1) | 0;
9136           imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9137           sat_imm    = INSNT1(4,0);
9138           if (!isBadRegT(regD) && !isBadRegT(regN))
9139              gate = True;
9140           if (shift_type == BITS2(1,0) && imm5 == 0)
9141              gate = False;
9142        }
9143     } else {
9144        if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
9145            INSNA(5,4)   == BITS2(0,1)) {
9146           regD       = INSNA(15,12);
9147           regN       = INSNA(3,0);
9148           shift_type = (INSNA(6,6) << 1) | 0;
9149           imm5       = INSNA(11,7);
9150           sat_imm    = INSNA(20,16);
9151           if (regD != 15 && regN != 15)
9152              gate = True;
9153        }
9154     }
9155
9156     if (gate) {
9157        IRTemp irt_regN       = newTemp(Ity_I32);
9158        IRTemp irt_regN_shift = newTemp(Ity_I32);
9159        IRTemp irt_sat_Q      = newTemp(Ity_I32);
9160        IRTemp irt_result     = newTemp(Ity_I32);
9161
9162        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9163        compute_result_and_C_after_shift_by_imm5(
9164                dis_buf, &irt_regN_shift, NULL,
9165                irt_regN, shift_type, imm5, regN );
9166
9167        armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
9168        or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9169
9170        if (isT)
9171           putIRegT( regD, mkexpr(irt_result), condT );
9172        else
9173           putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9174
9175        DIP("usat%s r%u, #0x%04x, %s\n",
9176            nCC(conq), regD, imm5, dis_buf);
9177        return True;
9178     }
9179     /* fall through */
9180   }
9181
9182  /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9183   {
9184     UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9185     Bool gate = False;
9186
9187     if (isT) {
9188        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9189            && INSNT0(4,4) == 0
9190            && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9191           regD       = INSNT1(11,8);
9192           regN       = INSNT0(3,0);
9193           shift_type = (INSNT0(5,5) << 1) | 0;
9194           imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9195           sat_imm    = INSNT1(4,0) + 1;
9196           if (!isBadRegT(regD) && !isBadRegT(regN))
9197              gate = True;
9198           if (shift_type == BITS2(1,0) && imm5 == 0)
9199              gate = False;
9200        }
9201     } else {
9202        if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
9203            INSNA(5,4)   == BITS2(0,1)) {
9204           regD       = INSNA(15,12);
9205           regN       = INSNA(3,0);
9206           shift_type = (INSNA(6,6) << 1) | 0;
9207           imm5       = INSNA(11,7);
9208           sat_imm    = INSNA(20,16) + 1;
9209           if (regD != 15 && regN != 15)
9210              gate = True;
9211        }
9212     }
9213
9214     if (gate) {
9215        IRTemp irt_regN       = newTemp(Ity_I32);
9216        IRTemp irt_regN_shift = newTemp(Ity_I32);
9217        IRTemp irt_sat_Q      = newTemp(Ity_I32);
9218        IRTemp irt_result     = newTemp(Ity_I32);
9219
9220        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9221        compute_result_and_C_after_shift_by_imm5(
9222                dis_buf, &irt_regN_shift, NULL,
9223                irt_regN, shift_type, imm5, regN );
9224
9225        armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
9226        or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9227
9228        if (isT)
9229           putIRegT( regD, mkexpr(irt_result), condT );
9230        else
9231           putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9232
9233        DIP( "ssat%s r%u, #0x%04x, %s\n",
9234             nCC(conq), regD, imm5, dis_buf);
9235        return True;
9236    }
9237    /* fall through */
9238  }
9239
9240   /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
9241   {
9242     UInt regD = 99, regN = 99, sat_imm = 99;
9243     Bool gate = False;
9244
9245     if (isT) {
9246        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9247            && INSNT0(5,4) == BITS2(1,0)
9248            && INSNT1(15,12) == BITS4(0,0,0,0)
9249            && INSNT1(7,4) == BITS4(0,0,0,0)) {
9250           regD       = INSNT1(11,8);
9251           regN       = INSNT0(3,0);
9252           sat_imm    = INSNT1(3,0) + 1;
9253           if (!isBadRegT(regD) && !isBadRegT(regN))
9254              gate = True;
9255        }
9256     } else {
9257        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
9258            INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
9259           regD       = INSNA(15,12);
9260           regN       = INSNA(3,0);
9261           sat_imm    = INSNA(19,16) + 1;
9262           if (regD != 15 && regN != 15)
9263              gate = True;
9264        }
9265     }
9266
9267     if (gate) {
9268        IRTemp irt_regN    = newTemp(Ity_I32);
9269        IRTemp irt_regN_lo = newTemp(Ity_I32);
9270        IRTemp irt_regN_hi = newTemp(Ity_I32);
9271        IRTemp irt_Q_lo    = newTemp(Ity_I32);
9272        IRTemp irt_Q_hi    = newTemp(Ity_I32);
9273        IRTemp irt_res_lo  = newTemp(Ity_I32);
9274        IRTemp irt_res_hi  = newTemp(Ity_I32);
9275
9276        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9277        assign( irt_regN_lo,
9278                binop( Iop_Sar32,
9279                       binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9280                       mkU8(16)) );
9281        assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9282
9283        armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
9284        or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9285
9286        armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
9287        or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9288
9289        IRExpr* ire_result
9290           = binop(Iop_Or32,
9291                   binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
9292                   binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
9293        if (isT)
9294           putIRegT( regD, ire_result, condT );
9295        else
9296           putIRegA( regD, ire_result, condT, Ijk_Boring );
9297
9298        DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9299        return True;
9300     }
9301     /* fall through */
9302   }
9303
9304   /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
9305   {
9306     UInt regD = 99, regN = 99, sat_imm = 99;
9307     Bool gate = False;
9308
9309     if (isT) {
9310        if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
9311           regN = INSNT0(3,0);
9312           regD = INSNT1(11,8);
9313           sat_imm = INSNT1(3,0);
9314           if (!isBadRegT(regD) && !isBadRegT(regN))
9315              gate = True;
9316       }
9317     } else {
9318        if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
9319            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9320            INSNA(7,4)   == BITS4(0,0,1,1)) {
9321           regD    = INSNA(15,12);
9322           regN    = INSNA(3,0);
9323           sat_imm = INSNA(19,16);
9324           if (regD != 15 && regN != 15)
9325              gate = True;
9326        }
9327     }
9328
9329     if (gate) {
9330        IRTemp irt_regN    = newTemp(Ity_I32);
9331        IRTemp irt_regN_lo = newTemp(Ity_I32);
9332        IRTemp irt_regN_hi = newTemp(Ity_I32);
9333        IRTemp irt_Q_lo    = newTemp(Ity_I32);
9334        IRTemp irt_Q_hi    = newTemp(Ity_I32);
9335        IRTemp irt_res_lo  = newTemp(Ity_I32);
9336        IRTemp irt_res_hi  = newTemp(Ity_I32);
9337
9338        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9339        assign( irt_regN_lo, binop( Iop_Sar32,
9340                                    binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9341                                    mkU8(16)) );
9342        assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9343
9344        armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
9345        or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9346
9347        armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
9348        or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9349
9350        IRExpr* ire_result = binop( Iop_Or32,
9351                                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
9352                                    mkexpr(irt_res_lo) );
9353
9354        if (isT)
9355           putIRegT( regD, ire_result, condT );
9356        else
9357           putIRegA( regD, ire_result, condT, Ijk_Boring );
9358
9359        DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9360        return True;
9361     }
9362     /* fall through */
9363   }
9364
9365   /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9366   {
9367     UInt regD = 99, regN = 99, regM = 99;
9368     Bool gate = False;
9369
9370     if (isT) {
9371        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9372           regN = INSNT0(3,0);
9373           regD = INSNT1(11,8);
9374           regM = INSNT1(3,0);
9375           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9376              gate = True;
9377        }
9378     } else {
9379        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9380            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9381            INSNA(7,4)   == BITS4(0,0,0,1)) {
9382           regD = INSNA(15,12);
9383           regN = INSNA(19,16);
9384           regM = INSNA(3,0);
9385           if (regD != 15 && regN != 15 && regM != 15)
9386              gate = True;
9387        }
9388     }
9389
9390     if (gate) {
9391        IRTemp rNt  = newTemp(Ity_I32);
9392        IRTemp rMt  = newTemp(Ity_I32);
9393        IRTemp res  = newTemp(Ity_I32);
9394        IRTemp reso = newTemp(Ity_I32);
9395
9396        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9397        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9398
9399        assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9400        if (isT)
9401           putIRegT( regD, mkexpr(res), condT );
9402        else
9403           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9404
9405        assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
9406        set_GE_32_10_from_bits_31_15(reso, condT);
9407
9408        DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9409        return True;
9410     }
9411     /* fall through */
9412   }
9413
9414   /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9415   {
9416     UInt regD = 99, regN = 99, regM = 99;
9417     Bool gate = False;
9418
9419     if (isT) {
9420        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9421           regN = INSNT0(3,0);
9422           regD = INSNT1(11,8);
9423           regM = INSNT1(3,0);
9424           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9425              gate = True;
9426        }
9427     } else {
9428        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9429            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9430            INSNA(7,4)   == BITS4(0,0,0,1)) {
9431           regD = INSNA(15,12);
9432           regN = INSNA(19,16);
9433           regM = INSNA(3,0);
9434           if (regD != 15 && regN != 15 && regM != 15)
9435              gate = True;
9436        }
9437     }
9438
9439     if (gate) {
9440        IRTemp rNt  = newTemp(Ity_I32);
9441        IRTemp rMt  = newTemp(Ity_I32);
9442        IRTemp res  = newTemp(Ity_I32);
9443        IRTemp reso = newTemp(Ity_I32);
9444
9445        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9446        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9447
9448        assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9449        if (isT)
9450           putIRegT( regD, mkexpr(res), condT );
9451        else
9452           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9453
9454        assign(reso, unop(Iop_Not32,
9455                          binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
9456        set_GE_32_10_from_bits_31_15(reso, condT);
9457
9458        DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9459        return True;
9460     }
9461     /* fall through */
9462   }
9463
9464   /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
9465   {
9466     UInt regD = 99, regN = 99, regM = 99;
9467     Bool gate = False;
9468
9469     if (isT) {
9470        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9471           regN = INSNT0(3,0);
9472           regD = INSNT1(11,8);
9473           regM = INSNT1(3,0);
9474           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9475              gate = True;
9476        }
9477     } else {
9478        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9479            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9480            INSNA(7,4)   == BITS4(0,1,1,1)) {
9481           regD = INSNA(15,12);
9482           regN = INSNA(19,16);
9483           regM = INSNA(3,0);
9484           if (regD != 15 && regN != 15 && regM != 15)
9485             gate = True;
9486        }
9487     }
9488
9489     if (gate) {
9490        IRTemp rNt  = newTemp(Ity_I32);
9491        IRTemp rMt  = newTemp(Ity_I32);
9492        IRTemp res  = newTemp(Ity_I32);
9493        IRTemp reso = newTemp(Ity_I32);
9494
9495        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9496        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9497
9498        assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9499        if (isT)
9500           putIRegT( regD, mkexpr(res), condT );
9501        else
9502           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9503
9504        assign(reso, unop(Iop_Not32,
9505                          binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
9506        set_GE_32_10_from_bits_31_15(reso, condT);
9507
9508        DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9509        return True;
9510     }
9511     /* fall through */
9512   }
9513
9514   /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
9515   {
9516     UInt regD = 99, regN = 99, regM = 99;
9517     Bool gate = False;
9518
9519     if (isT) {
9520        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9521           regN = INSNT0(3,0);
9522           regD = INSNT1(11,8);
9523           regM = INSNT1(3,0);
9524           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9525              gate = True;
9526        }
9527     } else {
9528        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9529            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9530            INSNA(7,4)   == BITS4(0,1,1,1)) {
9531           regD = INSNA(15,12);
9532           regN = INSNA(19,16);
9533           regM = INSNA(3,0);
9534           if (regD != 15 && regN != 15 && regM != 15)
9535              gate = True;
9536        }
9537     }
9538
9539     if (gate) {
9540        IRTemp rNt  = newTemp(Ity_I32);
9541        IRTemp rMt  = newTemp(Ity_I32);
9542        IRTemp res  = newTemp(Ity_I32);
9543        IRTemp reso = newTemp(Ity_I32);
9544
9545        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9546        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9547
9548        assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9549        if (isT)
9550           putIRegT( regD, mkexpr(res), condT );
9551        else
9552           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9553
9554        assign(reso, unop(Iop_Not32,
9555                          binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
9556        set_GE_32_10_from_bits_31_15(reso, condT);
9557
9558        DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9559        return True;
9560     }
9561     /* fall through */
9562   }
9563
9564   /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
9565   {
9566     UInt regD = 99, regN = 99, regM = 99;
9567     Bool gate = False;
9568
9569     if (isT) {
9570        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9571           regN = INSNT0(3,0);
9572           regD = INSNT1(11,8);
9573           regM = INSNT1(3,0);
9574           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9575              gate = True;
9576        }
9577     } else {
9578        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9579            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9580            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9581           regD = INSNA(15,12);
9582           regN = INSNA(19,16);
9583           regM = INSNA(3,0);
9584           if (regD != 15 && regN != 15 && regM != 15)
9585              gate = True;
9586        }
9587     }
9588
9589     if (gate) {
9590        IRTemp rNt  = newTemp(Ity_I32);
9591        IRTemp rMt  = newTemp(Ity_I32);
9592        IRTemp res  = newTemp(Ity_I32);
9593        IRTemp reso = newTemp(Ity_I32);
9594
9595        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9596        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9597
9598        assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9599        if (isT)
9600           putIRegT( regD, mkexpr(res), condT );
9601        else
9602           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9603
9604        assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9605        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9606
9607        DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9608        return True;
9609     }
9610     /* fall through */
9611   }
9612
9613   /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9614   {
9615     UInt regD = 99, regN = 99, regM = 99;
9616     Bool gate = False;
9617
9618     if (isT) {
9619        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9620           regN = INSNT0(3,0);
9621           regD = INSNT1(11,8);
9622           regM = INSNT1(3,0);
9623           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9624              gate = True;
9625        }
9626     } else {
9627        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9628            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9629            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9630           regD = INSNA(15,12);
9631           regN = INSNA(19,16);
9632           regM = INSNA(3,0);
9633           if (regD != 15 && regN != 15 && regM != 15)
9634              gate = True;
9635        }
9636     }
9637
9638     if (gate) {
9639        IRTemp rNt  = newTemp(Ity_I32);
9640        IRTemp rMt  = newTemp(Ity_I32);
9641        IRTemp res  = newTemp(Ity_I32);
9642        IRTemp reso = newTemp(Ity_I32);
9643
9644        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9645        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9646
9647        assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9648        if (isT)
9649           putIRegT( regD, mkexpr(res), condT );
9650        else
9651           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9652
9653        assign(reso, unop(Iop_Not32,
9654                          binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
9655        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9656
9657        DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9658        return True;
9659     }
9660     /* fall through */
9661   }
9662
9663   /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9664   {
9665     UInt regD = 99, regN = 99, regM = 99;
9666     Bool gate = False;
9667
9668     if (isT) {
9669        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9670           regN = INSNT0(3,0);
9671           regD = INSNT1(11,8);
9672           regM = INSNT1(3,0);
9673           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9674              gate = True;
9675        }
9676     } else {
9677        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9678            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9679            (INSNA(7,4)  == BITS4(1,1,1,1))) {
9680           regD = INSNA(15,12);
9681           regN = INSNA(19,16);
9682           regM = INSNA(3,0);
9683           if (regD != 15 && regN != 15 && regM != 15)
9684             gate = True;
9685        }
9686     }
9687
9688     if (gate) {
9689        IRTemp rNt  = newTemp(Ity_I32);
9690        IRTemp rMt  = newTemp(Ity_I32);
9691        IRTemp res  = newTemp(Ity_I32);
9692        IRTemp reso = newTemp(Ity_I32);
9693
9694        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9695        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9696
9697        assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9698        if (isT)
9699           putIRegT( regD, mkexpr(res), condT );
9700        else
9701           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9702
9703        assign(reso, unop(Iop_Not32,
9704                          binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
9705        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9706
9707        DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9708        return True;
9709     }
9710     /* fall through */
9711   }
9712
9713   /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9714   {
9715     UInt regD = 99, regN = 99, regM = 99;
9716     Bool gate = False;
9717
9718     if (isT) {
9719        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9720           regN = INSNT0(3,0);
9721           regD = INSNT1(11,8);
9722           regM = INSNT1(3,0);
9723           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9724              gate = True;
9725        }
9726     } else {
9727        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9728            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9729            INSNA(7,4)   == BITS4(1,1,1,1)) {
9730           regD = INSNA(15,12);
9731           regN = INSNA(19,16);
9732           regM = INSNA(3,0);
9733           if (regD != 15 && regN != 15 && regM != 15)
9734              gate = True;
9735        }
9736     }
9737
9738     if (gate) {
9739        IRTemp rNt  = newTemp(Ity_I32);
9740        IRTemp rMt  = newTemp(Ity_I32);
9741        IRTemp res  = newTemp(Ity_I32);
9742        IRTemp reso = newTemp(Ity_I32);
9743
9744        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9745        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9746
9747        assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9748        if (isT)
9749           putIRegT( regD, mkexpr(res), condT );
9750        else
9751           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9752
9753        assign(reso, unop(Iop_Not32,
9754                          binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
9755        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9756
9757        DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9758        return True;
9759     }
9760     /* fall through */
9761   }
9762
9763   /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9764   {
9765     UInt regD = 99, regN = 99, regM = 99;
9766     Bool gate = False;
9767
9768     if (isT) {
9769        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9770           regN = INSNT0(3,0);
9771           regD = INSNT1(11,8);
9772           regM = INSNT1(3,0);
9773           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9774              gate = True;
9775        }
9776     } else {
9777        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9778            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9779            INSNA(7,4)   == BITS4(1,0,0,1)) {
9780           regD = INSNA(15,12);
9781           regN = INSNA(19,16);
9782           regM = INSNA(3,0);
9783           if (regD != 15 && regN != 15 && regM != 15)
9784              gate = True;
9785        }
9786     }
9787
9788     if (gate) {
9789        IRTemp rNt   = newTemp(Ity_I32);
9790        IRTemp rMt   = newTemp(Ity_I32);
9791        IRTemp res_q = newTemp(Ity_I32);
9792
9793        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9794        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9795
9796        assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
9797        if (isT)
9798           putIRegT( regD, mkexpr(res_q), condT );
9799        else
9800           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9801
9802        DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9803        return True;
9804     }
9805     /* fall through */
9806   }
9807
9808   /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
9809   {
9810     UInt regD = 99, regN = 99, regM = 99;
9811     Bool gate = False;
9812
9813     if (isT) {
9814        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9815           regN = INSNT0(3,0);
9816           regD = INSNT1(11,8);
9817           regM = INSNT1(3,0);
9818           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9819              gate = True;
9820        }
9821     } else {
9822        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9823            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9824            INSNA(7,4)   == BITS4(1,1,1,1)) {
9825           regD = INSNA(15,12);
9826           regN = INSNA(19,16);
9827           regM = INSNA(3,0);
9828           if (regD != 15 && regN != 15 && regM != 15)
9829              gate = True;
9830        }
9831     }
9832
9833     if (gate) {
9834        IRTemp rNt   = newTemp(Ity_I32);
9835        IRTemp rMt   = newTemp(Ity_I32);
9836        IRTemp res_q = newTemp(Ity_I32);
9837
9838        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9839        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9840
9841        assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
9842        if (isT)
9843           putIRegT( regD, mkexpr(res_q), condT );
9844        else
9845           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9846
9847        DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9848        return True;
9849     }
9850     /* fall through */
9851   }
9852
9853   /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9854   {
9855     UInt regD = 99, regN = 99, regM = 99;
9856     Bool gate = False;
9857
9858     if (isT) {
9859        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9860           regN = INSNT0(3,0);
9861           regD = INSNT1(11,8);
9862           regM = INSNT1(3,0);
9863           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9864              gate = True;
9865        }
9866     } else {
9867        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9868            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9869            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9870           regD = INSNA(15,12);
9871           regN = INSNA(19,16);
9872           regM = INSNA(3,0);
9873           if (regD != 15 && regN != 15 && regM != 15)
9874              gate = True;
9875        }
9876     }
9877
9878     if (gate) {
9879        IRTemp rNt   = newTemp(Ity_I32);
9880        IRTemp rMt   = newTemp(Ity_I32);
9881        IRTemp res_q = newTemp(Ity_I32);
9882
9883        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9884        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9885
9886        assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9887        if (isT)
9888           putIRegT( regD, mkexpr(res_q), condT );
9889        else
9890           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9891
9892        DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9893        return True;
9894     }
9895     /* fall through */
9896   }
9897
9898   /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9899   {
9900     UInt regD = 99, regN = 99, regM = 99;
9901     Bool gate = False;
9902
9903     if (isT) {
9904        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9905           regN = INSNT0(3,0);
9906           regD = INSNT1(11,8);
9907           regM = INSNT1(3,0);
9908           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9909              gate = True;
9910        }
9911     } else {
9912        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9913            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9914            (INSNA(7,4)  == BITS4(1,1,1,1))) {
9915           regD = INSNA(15,12);
9916           regN = INSNA(19,16);
9917           regM = INSNA(3,0);
9918           if (regD != 15 && regN != 15 && regM != 15)
9919             gate = True;
9920        }
9921     }
9922
9923     if (gate) {
9924        IRTemp rNt   = newTemp(Ity_I32);
9925        IRTemp rMt   = newTemp(Ity_I32);
9926        IRTemp res_q = newTemp(Ity_I32);
9927
9928        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9929        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9930
9931        assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
9932        if (isT)
9933           putIRegT( regD, mkexpr(res_q), condT );
9934        else
9935           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9936
9937        DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9938        return True;
9939     }
9940     /* fall through */
9941   }
9942
9943   /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9944   {
9945     UInt regD = 99, regN = 99, regM = 99;
9946     Bool gate = False;
9947
9948     if (isT) {
9949        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
9950           regN = INSNT0(3,0);
9951           regD = INSNT1(11,8);
9952           regM = INSNT1(3,0);
9953           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9954              gate = True;
9955        }
9956     } else {
9957        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
9958            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9959            INSNA(7,4)   == BITS4(1,0,0,1)) {
9960           regD = INSNA(15,12);
9961           regN = INSNA(19,16);
9962           regM = INSNA(3,0);
9963           if (regD != 15 && regN != 15 && regM != 15)
9964              gate = True;
9965        }
9966     }
9967
9968     if (gate) {
9969        IRTemp rNt   = newTemp(Ity_I32);
9970        IRTemp rMt   = newTemp(Ity_I32);
9971        IRTemp res_q = newTemp(Ity_I32);
9972
9973        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9974        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9975
9976        assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9977        if (isT)
9978           putIRegT( regD, mkexpr(res_q), condT );
9979        else
9980           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9981
9982        DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9983        return True;
9984     }
9985     /* fall through */
9986   }
9987
9988   /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
9989   {
9990     UInt regD = 99, regN = 99, regM = 99;
9991     Bool gate = False;
9992
9993     if (isT) {
9994        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
9995           regN = INSNT0(3,0);
9996           regD = INSNT1(11,8);
9997           regM = INSNT1(3,0);
9998           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9999              gate = True;
10000        }
10001     } else {
10002        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10003            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10004            INSNA(7,4)   == BITS4(0,0,0,1)) {
10005           regD = INSNA(15,12);
10006           regN = INSNA(19,16);
10007           regM = INSNA(3,0);
10008           if (regD != 15 && regN != 15 && regM != 15)
10009              gate = True;
10010        }
10011     }
10012
10013     if (gate) {
10014        IRTemp rNt   = newTemp(Ity_I32);
10015        IRTemp rMt   = newTemp(Ity_I32);
10016        IRTemp res_q = newTemp(Ity_I32);
10017
10018        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10019        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10020
10021        assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
10022        if (isT)
10023           putIRegT( regD, mkexpr(res_q), condT );
10024        else
10025           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10026
10027        DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10028        return True;
10029     }
10030     /* fall through */
10031   }
10032
10033   /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10034   {
10035     UInt regD = 99, regN = 99, regM = 99;
10036     Bool gate = False;
10037
10038     if (isT) {
10039        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
10040           regN = INSNT0(3,0);
10041           regD = INSNT1(11,8);
10042           regM = INSNT1(3,0);
10043           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10044              gate = True;
10045        }
10046     } else {
10047        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
10048            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10049            INSNA(7,4)   == BITS4(1,0,0,1)) {
10050           regD = INSNA(15,12);
10051           regN = INSNA(19,16);
10052           regM = INSNA(3,0);
10053           if (regD != 15 && regN != 15 && regM != 15)
10054              gate = True;
10055        }
10056     }
10057
10058     if (gate) {
10059        IRTemp rNt   = newTemp(Ity_I32);
10060        IRTemp rMt   = newTemp(Ity_I32);
10061        IRTemp res_q = newTemp(Ity_I32);
10062
10063        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10064        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10065
10066        assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
10067        if (isT)
10068           putIRegT( regD, mkexpr(res_q), condT );
10069        else
10070           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10071
10072        DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10073        return True;
10074     }
10075     /* fall through */
10076   }
10077
10078   /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
10079   {
10080     UInt regD = 99, regN = 99, regM = 99;
10081     Bool gate = False;
10082
10083     if (isT) {
10084        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10085           regN = INSNT0(3,0);
10086           regD = INSNT1(11,8);
10087           regM = INSNT1(3,0);
10088           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10089              gate = True;
10090        }
10091     } else {
10092        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10093            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10094            INSNA(7,4)   == BITS4(0,0,0,1)) {
10095           regD = INSNA(15,12);
10096           regN = INSNA(19,16);
10097           regM = INSNA(3,0);
10098           if (regD != 15 && regN != 15 && regM != 15)
10099              gate = True;
10100        }
10101     }
10102
10103     if (gate) {
10104        IRTemp rNt   = newTemp(Ity_I32);
10105        IRTemp rMt   = newTemp(Ity_I32);
10106        IRTemp res_q = newTemp(Ity_I32);
10107
10108        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10109        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10110
10111        assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
10112        if (isT)
10113           putIRegT( regD, mkexpr(res_q), condT );
10114        else
10115           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10116
10117        DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10118        return True;
10119     }
10120     /* fall through */
10121   }
10122
10123   /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
10124   {
10125     UInt regD = 99, regN = 99, regM = 99;
10126     Bool gate = False;
10127
10128      if (isT) {
10129        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10130           regN = INSNT0(3,0);
10131           regD = INSNT1(11,8);
10132           regM = INSNT1(3,0);
10133           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10134              gate = True;
10135        }
10136     } else {
10137        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10138            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10139            INSNA(7,4)   == BITS4(0,1,1,1)) {
10140           regD = INSNA(15,12);
10141           regN = INSNA(19,16);
10142           regM = INSNA(3,0);
10143           if (regD != 15 && regN != 15 && regM != 15)
10144             gate = True;
10145        }
10146     }
10147
10148     if (gate) {
10149        IRTemp rNt   = newTemp(Ity_I32);
10150        IRTemp rMt   = newTemp(Ity_I32);
10151        IRTemp res_q = newTemp(Ity_I32);
10152
10153        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10154        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10155
10156        assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
10157        if (isT)
10158           putIRegT( regD, mkexpr(res_q), condT );
10159        else
10160           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10161
10162        DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10163        return True;
10164     }
10165     /* fall through */
10166   }
10167
10168   /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
10169   /* note: the hardware seems to construct the result differently
10170      from wot the manual says. */
10171   {
10172     UInt regD = 99, regN = 99, regM = 99;
10173     Bool gate = False;
10174
10175     if (isT) {
10176        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10177           regN = INSNT0(3,0);
10178           regD = INSNT1(11,8);
10179           regM = INSNT1(3,0);
10180           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10181              gate = True;
10182        }
10183     } else {
10184        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10185            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10186            INSNA(7,4)   == BITS4(0,1,0,1)) {
10187           regD = INSNA(15,12);
10188           regN = INSNA(19,16);
10189           regM = INSNA(3,0);
10190           if (regD != 15 && regN != 15 && regM != 15)
10191              gate = True;
10192        }
10193     }
10194
10195     if (gate) {
10196        IRTemp irt_regN     = newTemp(Ity_I32);
10197        IRTemp irt_regM     = newTemp(Ity_I32);
10198        IRTemp irt_sum      = newTemp(Ity_I32);
10199        IRTemp irt_diff     = newTemp(Ity_I32);
10200        IRTemp irt_sum_res  = newTemp(Ity_I32);
10201        IRTemp irt_diff_res = newTemp(Ity_I32);
10202
10203        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10204        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10205
10206        assign( irt_diff,
10207                binop( Iop_Sub32,
10208                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10209                       binop( Iop_Sar32,
10210                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10211                              mkU8(16) ) ) );
10212        armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
10213
10214        assign( irt_sum,
10215                binop( Iop_Add32,
10216                       binop( Iop_Sar32,
10217                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10218                              mkU8(16) ),
10219                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
10220        armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
10221
10222        IRExpr* ire_result = binop( Iop_Or32,
10223                                    binop( Iop_Shl32, mkexpr(irt_diff_res),
10224                                           mkU8(16) ),
10225                                    binop( Iop_And32, mkexpr(irt_sum_res),
10226                                           mkU32(0xFFFF)) );
10227
10228        if (isT)
10229           putIRegT( regD, ire_result, condT );
10230        else
10231           putIRegA( regD, ire_result, condT, Ijk_Boring );
10232
10233        DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10234        return True;
10235     }
10236     /* fall through */
10237   }
10238
10239   /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10240   {
10241     UInt regD = 99, regN = 99, regM = 99;
10242     Bool gate = False;
10243
10244     if (isT) {
10245        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10246           regN = INSNT0(3,0);
10247           regD = INSNT1(11,8);
10248           regM = INSNT1(3,0);
10249           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10250              gate = True;
10251        }
10252     } else {
10253        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10254            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10255            INSNA(7,4)   == BITS4(0,0,1,1)) {
10256           regD = INSNA(15,12);
10257           regN = INSNA(19,16);
10258           regM = INSNA(3,0);
10259           if (regD != 15 && regN != 15 && regM != 15)
10260              gate = True;
10261        }
10262     }
10263
10264     if (gate) {
10265        IRTemp irt_regN     = newTemp(Ity_I32);
10266        IRTemp irt_regM     = newTemp(Ity_I32);
10267        IRTemp irt_sum      = newTemp(Ity_I32);
10268        IRTemp irt_diff     = newTemp(Ity_I32);
10269        IRTemp irt_res_sum  = newTemp(Ity_I32);
10270        IRTemp irt_res_diff = newTemp(Ity_I32);
10271
10272        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10273        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10274
10275        assign( irt_diff,
10276                binop( Iop_Sub32,
10277                       binop( Iop_Sar32,
10278                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10279                              mkU8(16) ),
10280                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10281        armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
10282
10283        assign( irt_sum,
10284                binop( Iop_Add32,
10285                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10286                       binop( Iop_Sar32,
10287                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10288                              mkU8(16) ) ) );
10289        armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
10290
10291        IRExpr* ire_result
10292          = binop( Iop_Or32,
10293                   binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
10294                   binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
10295
10296        if (isT)
10297           putIRegT( regD, ire_result, condT );
10298        else
10299           putIRegA( regD, ire_result, condT, Ijk_Boring );
10300
10301        DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10302        return True;
10303     }
10304     /* fall through */
10305   }
10306
10307   /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10308   {
10309     UInt regD = 99, regN = 99, regM = 99;
10310     Bool gate = False;
10311
10312     if (isT) {
10313        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
10314           regN = INSNT0(3,0);
10315           regD = INSNT1(11,8);
10316           regM = INSNT1(3,0);
10317           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10318              gate = True;
10319        }
10320     } else {
10321        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
10322            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10323            INSNA(7,4)   == BITS4(0,0,1,1)) {
10324           regD = INSNA(15,12);
10325           regN = INSNA(19,16);
10326           regM = INSNA(3,0);
10327           if (regD != 15 && regN != 15 && regM != 15)
10328              gate = True;
10329        }
10330     }
10331
10332     if (gate) {
10333        IRTemp irt_regN = newTemp(Ity_I32);
10334        IRTemp irt_regM = newTemp(Ity_I32);
10335        IRTemp irt_sum  = newTemp(Ity_I32);
10336        IRTemp irt_diff = newTemp(Ity_I32);
10337
10338        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10339        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10340
10341        assign( irt_diff,
10342                binop( Iop_Sub32,
10343                       binop( Iop_Sar32,
10344                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10345                              mkU8(16) ),
10346                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10347
10348        assign( irt_sum,
10349                binop( Iop_Add32,
10350                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10351                       binop( Iop_Sar32,
10352                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10353                              mkU8(16) ) ) );
10354
10355        IRExpr* ire_result
10356          = binop( Iop_Or32,
10357                   binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
10358                   binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
10359
10360        IRTemp ge10 = newTemp(Ity_I32);
10361        assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
10362        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
10363        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
10364
10365        IRTemp ge32 = newTemp(Ity_I32);
10366        assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
10367        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
10368        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
10369
10370        if (isT)
10371           putIRegT( regD, ire_result, condT );
10372        else
10373           putIRegA( regD, ire_result, condT, Ijk_Boring );
10374
10375        DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10376        return True;
10377     }
10378     /* fall through */
10379   }
10380
10381   /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
10382   /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
10383   {
10384     UInt regD = 99, regN = 99, regM = 99, bitM = 99;
10385     Bool gate = False, isAD = False;
10386
10387     if (isT) {
10388        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10389            && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
10390           regN = INSNT0(3,0);
10391           regD = INSNT1(11,8);
10392           regM = INSNT1(3,0);
10393           bitM = INSNT1(4,4);
10394           isAD = INSNT0(15,4) == 0xFB2;
10395           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10396              gate = True;
10397        }
10398     } else {
10399        if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10400            INSNA(15,12) == BITS4(1,1,1,1)         &&
10401            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
10402           regD = INSNA(19,16);
10403           regN = INSNA(3,0);
10404           regM = INSNA(11,8);
10405           bitM = INSNA(5,5);
10406           isAD = INSNA(6,6) == 0;
10407           if (regD != 15 && regN != 15 && regM != 15)
10408              gate = True;
10409        }
10410     }
10411
10412     if (gate) {
10413        IRTemp irt_regN    = newTemp(Ity_I32);
10414        IRTemp irt_regM    = newTemp(Ity_I32);
10415        IRTemp irt_prod_lo = newTemp(Ity_I32);
10416        IRTemp irt_prod_hi = newTemp(Ity_I32);
10417        IRTemp tmpM        = newTemp(Ity_I32);
10418
10419        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10420
10421        assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10422        assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10423
10424        assign( irt_prod_lo,
10425                binop( Iop_Mul32,
10426                       binop( Iop_Sar32,
10427                              binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
10428                              mkU8(16) ),
10429                       binop( Iop_Sar32,
10430                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10431                              mkU8(16) ) ) );
10432        assign( irt_prod_hi, binop(Iop_Mul32,
10433                                   binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
10434                                   binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
10435        IRExpr* ire_result
10436           = binop( isAD ? Iop_Add32 : Iop_Sub32,
10437                    mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
10438
10439        if (isT)
10440           putIRegT( regD, ire_result, condT );
10441        else
10442           putIRegA( regD, ire_result, condT, Ijk_Boring );
10443
10444        if (isAD) {
10445           or_into_QFLAG32(
10446              signed_overflow_after_Add32( ire_result,
10447                                           irt_prod_lo, irt_prod_hi ),
10448              condT
10449           );
10450        }
10451
10452        DIP("smu%cd%s%s r%u, r%u, r%u\n",
10453            isAD ? 'a' : 's',
10454            bitM ? "x" : "", nCC(conq), regD, regN, regM);
10455        return True;
10456     }
10457     /* fall through */
10458   }
10459
10460   /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10461   /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10462   {
10463     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10464     Bool gate = False, isAD = False;
10465
10466     if (isT) {
10467       if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10468           && INSNT1(7,5) == BITS3(0,0,0)) {
10469           regN = INSNT0(3,0);
10470           regD = INSNT1(11,8);
10471           regM = INSNT1(3,0);
10472           regA = INSNT1(15,12);
10473           bitM = INSNT1(4,4);
10474           isAD = INSNT0(15,4) == 0xFB2;
10475           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10476               && !isBadRegT(regA))
10477              gate = True;
10478        }
10479     } else {
10480        if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10481            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
10482           regD = INSNA(19,16);
10483           regA = INSNA(15,12);
10484           regN = INSNA(3,0);
10485           regM = INSNA(11,8);
10486           bitM = INSNA(5,5);
10487           isAD = INSNA(6,6) == 0;
10488           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10489              gate = True;
10490        }
10491     }
10492
10493     if (gate) {
10494        IRTemp irt_regN    = newTemp(Ity_I32);
10495        IRTemp irt_regM    = newTemp(Ity_I32);
10496        IRTemp irt_regA    = newTemp(Ity_I32);
10497        IRTemp irt_prod_lo = newTemp(Ity_I32);
10498        IRTemp irt_prod_hi = newTemp(Ity_I32);
10499        IRTemp irt_sum     = newTemp(Ity_I32);
10500        IRTemp tmpM        = newTemp(Ity_I32);
10501
10502        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10503        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10504
10505        assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10506        assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10507
10508        assign( irt_prod_lo,
10509                binop(Iop_Mul32,
10510                      binop(Iop_Sar32,
10511                            binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10512                            mkU8(16)),
10513                      binop(Iop_Sar32,
10514                            binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10515                            mkU8(16))) );
10516        assign( irt_prod_hi,
10517                binop( Iop_Mul32,
10518                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10519                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10520        assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
10521                                mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
10522
10523        IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
10524
10525        if (isT)
10526           putIRegT( regD, ire_result, condT );
10527        else
10528           putIRegA( regD, ire_result, condT, Ijk_Boring );
10529
10530        if (isAD) {
10531           or_into_QFLAG32(
10532              signed_overflow_after_Add32( mkexpr(irt_sum),
10533                                           irt_prod_lo, irt_prod_hi ),
10534              condT
10535           );
10536        }
10537
10538        or_into_QFLAG32(
10539           signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
10540           condT
10541        );
10542
10543        DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
10544            isAD ? 'a' : 's',
10545            bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
10546        return True;
10547     }
10548     /* fall through */
10549   }
10550
10551   /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10552   {
10553     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
10554     Bool gate = False;
10555
10556     if (isT) {
10557        if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
10558           regN = INSNT0(3,0);
10559           regD = INSNT1(11,8);
10560           regM = INSNT1(3,0);
10561           regA = INSNT1(15,12);
10562           bitM = INSNT1(4,4);
10563           bitN = INSNT1(5,5);
10564           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10565               && !isBadRegT(regA))
10566              gate = True;
10567        }
10568     } else {
10569        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
10570            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10571           regD = INSNA(19,16);
10572           regN = INSNA(3,0);
10573           regM = INSNA(11,8);
10574           regA = INSNA(15,12);
10575           bitM = INSNA(6,6);
10576           bitN = INSNA(5,5);
10577           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10578              gate = True;
10579        }
10580     }
10581
10582     if (gate) {
10583        IRTemp irt_regA = newTemp(Ity_I32);
10584        IRTemp irt_prod = newTemp(Ity_I32);
10585
10586        assign( irt_prod,
10587                binop(Iop_Mul32,
10588                      binop(Iop_Sar32,
10589                            binop(Iop_Shl32,
10590                                  isT ? getIRegT(regN) : getIRegA(regN),
10591                                  mkU8(bitN ? 0 : 16)),
10592                            mkU8(16)),
10593                      binop(Iop_Sar32,
10594                            binop(Iop_Shl32,
10595                                  isT ? getIRegT(regM) : getIRegA(regM),
10596                                  mkU8(bitM ? 0 : 16)),
10597                            mkU8(16))) );
10598
10599        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10600
10601        IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
10602
10603        if (isT)
10604           putIRegT( regD, ire_result, condT );
10605        else
10606           putIRegA( regD, ire_result, condT, Ijk_Boring );
10607
10608        or_into_QFLAG32(
10609           signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
10610           condT
10611        );
10612
10613        DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
10614             bitN ? 't' : 'b', bitM ? 't' : 'b',
10615             nCC(conq), regD, regN, regM, regA );
10616        return True;
10617     }
10618     /* fall through */
10619   }
10620
10621   /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10622   {
10623     UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
10624     Bool gate = False;
10625
10626     if (isT) {
10627        if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
10628           regN   = INSNT0(3,0);
10629           regDHi = INSNT1(11,8);
10630           regM   = INSNT1(3,0);
10631           regDLo = INSNT1(15,12);
10632           bitM   = INSNT1(4,4);
10633           bitN   = INSNT1(5,5);
10634           if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
10635               && !isBadRegT(regDLo) && regDHi != regDLo)
10636              gate = True;
10637        }
10638     } else {
10639        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
10640            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10641           regDHi = INSNA(19,16);
10642           regN   = INSNA(3,0);
10643           regM   = INSNA(11,8);
10644           regDLo = INSNA(15,12);
10645           bitM   = INSNA(6,6);
10646           bitN   = INSNA(5,5);
10647           if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
10648               regDHi != regDLo)
10649              gate = True;
10650        }
10651     }
10652
10653     if (gate) {
10654        IRTemp irt_regD  = newTemp(Ity_I64);
10655        IRTemp irt_prod  = newTemp(Ity_I64);
10656        IRTemp irt_res   = newTemp(Ity_I64);
10657        IRTemp irt_resHi = newTemp(Ity_I32);
10658        IRTemp irt_resLo = newTemp(Ity_I32);
10659
10660        assign( irt_prod,
10661                binop(Iop_MullS32,
10662                      binop(Iop_Sar32,
10663                            binop(Iop_Shl32,
10664                                  isT ? getIRegT(regN) : getIRegA(regN),
10665                                  mkU8(bitN ? 0 : 16)),
10666                            mkU8(16)),
10667                      binop(Iop_Sar32,
10668                            binop(Iop_Shl32,
10669                                  isT ? getIRegT(regM) : getIRegA(regM),
10670                                  mkU8(bitM ? 0 : 16)),
10671                            mkU8(16))) );
10672
10673        assign( irt_regD, binop(Iop_32HLto64,
10674                                isT ? getIRegT(regDHi) : getIRegA(regDHi),
10675                                isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
10676        assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
10677        assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
10678        assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
10679
10680        if (isT) {
10681           putIRegT( regDHi, mkexpr(irt_resHi), condT );
10682           putIRegT( regDLo, mkexpr(irt_resLo), condT );
10683        } else {
10684           putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
10685           putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
10686        }
10687
10688        DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
10689             bitN ? 't' : 'b', bitM ? 't' : 'b',
10690             nCC(conq), regDHi, regN, regM, regDLo );
10691        return True;
10692     }
10693     /* fall through */
10694   }
10695
10696   /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10697   {
10698     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10699     Bool gate = False;
10700
10701     if (isT) {
10702        if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
10703           regN = INSNT0(3,0);
10704           regD = INSNT1(11,8);
10705           regM = INSNT1(3,0);
10706           regA = INSNT1(15,12);
10707           bitM = INSNT1(4,4);
10708           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10709               && !isBadRegT(regA))
10710              gate = True;
10711        }
10712     } else {
10713        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
10714            (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
10715           regD = INSNA(19,16);
10716           regN = INSNA(3,0);
10717           regM = INSNA(11,8);
10718           regA = INSNA(15,12);
10719           bitM = INSNA(6,6);
10720           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10721              gate = True;
10722        }
10723     }
10724
10725     if (gate) {
10726        IRTemp irt_regA = newTemp(Ity_I32);
10727        IRTemp irt_prod = newTemp(Ity_I64);
10728
10729        assign( irt_prod,
10730                binop(Iop_MullS32,
10731                      isT ? getIRegT(regN) : getIRegA(regN),
10732                      binop(Iop_Sar32,
10733                            binop(Iop_Shl32,
10734                                  isT ? getIRegT(regM) : getIRegA(regM),
10735                                  mkU8(bitM ? 0 : 16)),
10736                            mkU8(16))) );
10737
10738        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10739
10740        IRTemp prod32 = newTemp(Ity_I32);
10741        assign(prod32,
10742               binop(Iop_Or32,
10743                     binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
10744                     binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
10745        ));
10746
10747        IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
10748
10749        if (isT)
10750           putIRegT( regD, ire_result, condT );
10751        else
10752           putIRegA( regD, ire_result, condT, Ijk_Boring );
10753
10754        or_into_QFLAG32(
10755           signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
10756           condT
10757        );
10758
10759        DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
10760             bitM ? 't' : 'b',
10761             nCC(conq), regD, regN, regM, regA );
10762        return True;
10763     }
10764     /* fall through */
10765   }
10766
10767   /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
10768   /* fixme: fix up the test in v6media.c so that we can pass the ge
10769      flags as part of the test. */
10770   {
10771     UInt regD = 99, regN = 99, regM = 99;
10772     Bool gate = False;
10773
10774     if (isT) {
10775        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10776           regN = INSNT0(3,0);
10777           regD = INSNT1(11,8);
10778           regM = INSNT1(3,0);
10779           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10780              gate = True;
10781        }
10782     } else {
10783        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
10784            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10785            INSNA(7,4)   == BITS4(1,0,1,1)) {
10786           regD = INSNA(15,12);
10787           regN = INSNA(19,16);
10788           regM = INSNA(3,0);
10789           if (regD != 15 && regN != 15 && regM != 15)
10790              gate = True;
10791        }
10792     }
10793
10794     if (gate) {
10795        IRTemp irt_ge_flag0 = newTemp(Ity_I32);
10796        IRTemp irt_ge_flag1 = newTemp(Ity_I32);
10797        IRTemp irt_ge_flag2 = newTemp(Ity_I32);
10798        IRTemp irt_ge_flag3 = newTemp(Ity_I32);
10799
10800        assign( irt_ge_flag0, get_GEFLAG32(0) );
10801        assign( irt_ge_flag1, get_GEFLAG32(1) );
10802        assign( irt_ge_flag2, get_GEFLAG32(2) );
10803        assign( irt_ge_flag3, get_GEFLAG32(3) );
10804
10805        IRExpr* ire_ge_flag0_or
10806          = binop(Iop_Or32, mkexpr(irt_ge_flag0),
10807                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
10808        IRExpr* ire_ge_flag1_or
10809          = binop(Iop_Or32, mkexpr(irt_ge_flag1),
10810                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
10811        IRExpr* ire_ge_flag2_or
10812          = binop(Iop_Or32, mkexpr(irt_ge_flag2),
10813                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
10814        IRExpr* ire_ge_flag3_or
10815          = binop(Iop_Or32, mkexpr(irt_ge_flag3),
10816                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
10817
10818        IRExpr* ire_ge_flags
10819          = binop( Iop_Or32,
10820                   binop(Iop_Or32,
10821                         binop(Iop_And32,
10822                               binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
10823                               mkU32(0x000000ff)),
10824                         binop(Iop_And32,
10825                               binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
10826                               mkU32(0x0000ff00))),
10827                   binop(Iop_Or32,
10828                         binop(Iop_And32,
10829                               binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
10830                               mkU32(0x00ff0000)),
10831                         binop(Iop_And32,
10832                               binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
10833                               mkU32(0xff000000))) );
10834
10835        IRExpr* ire_result
10836          = binop(Iop_Or32,
10837                  binop(Iop_And32,
10838                        isT ? getIRegT(regN) : getIRegA(regN),
10839                        ire_ge_flags ),
10840                  binop(Iop_And32,
10841                        isT ? getIRegT(regM) : getIRegA(regM),
10842                        unop(Iop_Not32, ire_ge_flags)));
10843
10844        if (isT)
10845           putIRegT( regD, ire_result, condT );
10846        else
10847           putIRegA( regD, ire_result, condT, Ijk_Boring );
10848
10849        DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10850        return True;
10851     }
10852     /* fall through */
10853   }
10854
10855   /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
10856   {
10857     UInt regD = 99, regN = 99, regM = 99, rotate = 99;
10858     Bool gate = False;
10859
10860     if (isT) {
10861        if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
10862           regN   = INSNT0(3,0);
10863           regD   = INSNT1(11,8);
10864           regM   = INSNT1(3,0);
10865           rotate = INSNT1(5,4);
10866           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10867              gate = True;
10868        }
10869     } else {
10870        if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
10871            INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
10872           regD   = INSNA(15,12);
10873           regN   = INSNA(19,16);
10874           regM   = INSNA(3,0);
10875           rotate = INSNA(11,10);
10876           if (regD != 15 && regN != 15 && regM != 15)
10877             gate = True;
10878        }
10879     }
10880
10881     if (gate) {
10882        IRTemp irt_regN = newTemp(Ity_I32);
10883        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10884
10885        IRTemp irt_regM = newTemp(Ity_I32);
10886        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10887
10888        IRTemp irt_rot = newTemp(Ity_I32);
10889        assign( irt_rot, binop(Iop_And32,
10890                               genROR32(irt_regM, 8 * rotate),
10891                               mkU32(0x00FF00FF)) );
10892
10893        IRExpr* resLo
10894           = binop(Iop_And32,
10895                   binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
10896                   mkU32(0x0000FFFF));
10897
10898        IRExpr* resHi
10899           = binop(Iop_Add32,
10900                   binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
10901                   binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
10902
10903        IRExpr* ire_result
10904           = binop( Iop_Or32, resHi, resLo );
10905
10906        if (isT)
10907           putIRegT( regD, ire_result, condT );
10908        else
10909           putIRegA( regD, ire_result, condT, Ijk_Boring );
10910
10911        DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
10912             nCC(conq), regD, regN, regM, 8 * rotate );
10913        return True;
10914     }
10915     /* fall through */
10916   }
10917
10918   /* --------------- usad8  Rd,Rn,Rm    ---------------- */
10919   /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
10920   {
10921     UInt rD = 99, rN = 99, rM = 99, rA = 99;
10922     Bool gate = False;
10923
10924     if (isT) {
10925       if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
10926           rN = INSNT0(3,0);
10927           rA = INSNT1(15,12);
10928           rD = INSNT1(11,8);
10929           rM = INSNT1(3,0);
10930           if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
10931              gate = True;
10932        }
10933     } else {
10934        if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
10935            INSNA(7,4)   == BITS4(0,0,0,1) ) {
10936           rD = INSNA(19,16);
10937           rA = INSNA(15,12);
10938           rM = INSNA(11,8);
10939           rN = INSNA(3,0);
10940           if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
10941              gate = True;
10942        }
10943     }
10944     /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
10945
10946     if (gate) {
10947        IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
10948        IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
10949        IRExpr* rAe = rA == 15 ? mkU32(0)
10950                               : (isT ? getIRegT(rA) : getIRegA(rA));
10951        IRExpr* res = binop(Iop_Add32,
10952                            binop(Iop_Sad8Ux4, rNe, rMe),
10953                            rAe);
10954        if (isT)
10955           putIRegT( rD, res, condT );
10956        else
10957           putIRegA( rD, res, condT, Ijk_Boring );
10958
10959        if (rA == 15) {
10960           DIP( "usad8%s r%u, r%u, r%u\n",
10961                nCC(conq), rD, rN, rM );
10962        } else {
10963           DIP( "usada8%s r%u, r%u, r%u, r%u\n",
10964                nCC(conq), rD, rN, rM, rA );
10965        }
10966        return True;
10967     }
10968     /* fall through */
10969   }
10970
10971   /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
10972   {
10973     UInt regD = 99, regN = 99, regM = 99;
10974     Bool gate = False;
10975
10976     if (isT) {
10977        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10978           regN = INSNT0(3,0);
10979           regD = INSNT1(11,8);
10980           regM = INSNT1(3,0);
10981           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10982              gate = True;
10983        }
10984     } else {
10985        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
10986            INSNA(11,8)  == BITS4(0,0,0,0)         &&
10987            INSNA(7,4)   == BITS4(0,1,0,1)) {
10988           regD = INSNA(15,12);
10989           regN = INSNA(19,16);
10990           regM = INSNA(3,0);
10991           if (regD != 15 && regN != 15 && regM != 15)
10992              gate = True;
10993        }
10994     }
10995
10996     if (gate) {
10997        IRTemp rNt   = newTemp(Ity_I32);
10998        IRTemp rMt   = newTemp(Ity_I32);
10999        IRTemp res_q = newTemp(Ity_I32);
11000
11001        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11002        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11003
11004        assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
11005        if (isT)
11006           putIRegT( regD, mkexpr(res_q), condT );
11007        else
11008           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11009
11010        or_into_QFLAG32(
11011           signed_overflow_after_Add32(
11012              binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11013           condT
11014        );
11015
11016        DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11017        return True;
11018     }
11019     /* fall through */
11020   }
11021
11022   /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
11023   {
11024     UInt regD = 99, regN = 99, regM = 99;
11025     Bool gate = False;
11026
11027     if (isT) {
11028        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
11029           regN = INSNT0(3,0);
11030           regD = INSNT1(11,8);
11031           regM = INSNT1(3,0);
11032           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11033              gate = True;
11034        }
11035     } else {
11036        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
11037            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11038            INSNA(7,4)   == BITS4(0,1,0,1)) {
11039           regD = INSNA(15,12);
11040           regN = INSNA(19,16);
11041           regM = INSNA(3,0);
11042           if (regD != 15 && regN != 15 && regM != 15)
11043              gate = True;
11044        }
11045     }
11046
11047     if (gate) {
11048        IRTemp rNt   = newTemp(Ity_I32);
11049        IRTemp rMt   = newTemp(Ity_I32);
11050        IRTemp rN_d  = newTemp(Ity_I32);
11051        IRTemp res_q = newTemp(Ity_I32);
11052
11053        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11054        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11055
11056        or_into_QFLAG32(
11057           signed_overflow_after_Add32(
11058              binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11059           condT
11060        );
11061
11062        assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11063        assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
11064        if (isT)
11065           putIRegT( regD, mkexpr(res_q), condT );
11066        else
11067           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11068
11069        or_into_QFLAG32(
11070           signed_overflow_after_Add32(
11071              binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11072           condT
11073        );
11074
11075        DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11076        return True;
11077     }
11078     /* fall through */
11079   }
11080
11081   /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
11082   {
11083     UInt regD = 99, regN = 99, regM = 99;
11084     Bool gate = False;
11085
11086     if (isT) {
11087        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
11088           regN = INSNT0(3,0);
11089           regD = INSNT1(11,8);
11090           regM = INSNT1(3,0);
11091           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11092              gate = True;
11093        }
11094     } else {
11095        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
11096            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11097            INSNA(7,4)   == BITS4(0,1,0,1)) {
11098           regD = INSNA(15,12);
11099           regN = INSNA(19,16);
11100           regM = INSNA(3,0);
11101           if (regD != 15 && regN != 15 && regM != 15)
11102              gate = True;
11103        }
11104     }
11105
11106     if (gate) {
11107        IRTemp rNt   = newTemp(Ity_I32);
11108        IRTemp rMt   = newTemp(Ity_I32);
11109        IRTemp res_q = newTemp(Ity_I32);
11110
11111        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11112        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11113
11114        assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
11115        if (isT)
11116           putIRegT( regD, mkexpr(res_q), condT );
11117        else
11118           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11119
11120        or_into_QFLAG32(
11121           signed_overflow_after_Sub32(
11122              binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11123           condT
11124        );
11125
11126        DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11127        return True;
11128     }
11129     /* fall through */
11130   }
11131
11132   /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
11133   {
11134     UInt regD = 99, regN = 99, regM = 99;
11135     Bool gate = False;
11136
11137     if (isT) {
11138        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
11139           regN = INSNT0(3,0);
11140           regD = INSNT1(11,8);
11141           regM = INSNT1(3,0);
11142           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11143              gate = True;
11144        }
11145     } else {
11146        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
11147            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11148            INSNA(7,4)   == BITS4(0,1,0,1)) {
11149           regD = INSNA(15,12);
11150           regN = INSNA(19,16);
11151           regM = INSNA(3,0);
11152           if (regD != 15 && regN != 15 && regM != 15)
11153              gate = True;
11154        }
11155     }
11156
11157     if (gate) {
11158        IRTemp rNt   = newTemp(Ity_I32);
11159        IRTemp rMt   = newTemp(Ity_I32);
11160        IRTemp rN_d  = newTemp(Ity_I32);
11161        IRTemp res_q = newTemp(Ity_I32);
11162
11163        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11164        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11165
11166        or_into_QFLAG32(
11167           signed_overflow_after_Add32(
11168              binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11169           condT
11170        );
11171
11172        assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11173        assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
11174        if (isT)
11175           putIRegT( regD, mkexpr(res_q), condT );
11176        else
11177           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11178
11179        or_into_QFLAG32(
11180           signed_overflow_after_Sub32(
11181              binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11182           condT
11183        );
11184
11185        DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11186        return True;
11187     }
11188     /* fall through */
11189   }
11190
11191   /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
11192   {
11193     UInt regD = 99, regN = 99, regM = 99;
11194     Bool gate = False;
11195
11196     if (isT) {
11197        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11198           regN = INSNT0(3,0);
11199           regD = INSNT1(11,8);
11200           regM = INSNT1(3,0);
11201           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11202              gate = True;
11203        }
11204     } else {
11205        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11206            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11207            INSNA(7,4)   == BITS4(0,1,1,1)) {
11208           regD = INSNA(15,12);
11209           regN = INSNA(19,16);
11210           regM = INSNA(3,0);
11211           if (regD != 15 && regN != 15 && regM != 15)
11212             gate = True;
11213        }
11214     }
11215
11216     if (gate) {
11217        IRTemp rNt   = newTemp(Ity_I32);
11218        IRTemp rMt   = newTemp(Ity_I32);
11219        IRTemp res_q = newTemp(Ity_I32);
11220
11221        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11222        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11223
11224        assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11225        if (isT)
11226           putIRegT( regD, mkexpr(res_q), condT );
11227        else
11228           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11229
11230        DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11231        return True;
11232     }
11233     /* fall through */
11234   }
11235
11236   /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
11237   {
11238     UInt regD = 99, regN = 99, regM = 99;
11239     Bool gate = False;
11240
11241     if (isT) {
11242        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11243           regN = INSNT0(3,0);
11244           regD = INSNT1(11,8);
11245           regM = INSNT1(3,0);
11246           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11247              gate = True;
11248        }
11249     } else {
11250        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11251            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11252            INSNA(7,4)   == BITS4(0,0,0,1)) {
11253           regD = INSNA(15,12);
11254           regN = INSNA(19,16);
11255           regM = INSNA(3,0);
11256           if (regD != 15 && regN != 15 && regM != 15)
11257              gate = True;
11258        }
11259     }
11260
11261     if (gate) {
11262        IRTemp rNt   = newTemp(Ity_I32);
11263        IRTemp rMt   = newTemp(Ity_I32);
11264        IRTemp res_q = newTemp(Ity_I32);
11265
11266        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11267        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11268
11269        assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
11270        if (isT)
11271           putIRegT( regD, mkexpr(res_q), condT );
11272        else
11273           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11274
11275        DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11276        return True;
11277     }
11278     /* fall through */
11279   }
11280
11281   /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11282   {
11283     UInt regD = 99, regN = 99, regM = 99;
11284     Bool gate = False;
11285
11286     if (isT) {
11287        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11288           regN = INSNT0(3,0);
11289           regD = INSNT1(11,8);
11290           regM = INSNT1(3,0);
11291           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11292              gate = True;
11293        }
11294     } else {
11295        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11296            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11297            INSNA(7,4)   == BITS4(1,1,1,1)) {
11298           regD = INSNA(15,12);
11299           regN = INSNA(19,16);
11300           regM = INSNA(3,0);
11301           if (regD != 15 && regN != 15 && regM != 15)
11302              gate = True;
11303        }
11304     }
11305
11306     if (gate) {
11307        IRTemp rNt   = newTemp(Ity_I32);
11308        IRTemp rMt   = newTemp(Ity_I32);
11309        IRTemp res_q = newTemp(Ity_I32);
11310
11311        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11312        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11313
11314        assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
11315        if (isT)
11316           putIRegT( regD, mkexpr(res_q), condT );
11317        else
11318           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11319
11320        DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11321        return True;
11322     }
11323     /* fall through */
11324   }
11325
11326   /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
11327   {
11328     UInt regD = 99, regN = 99, regM = 99;
11329     Bool gate = False;
11330
11331     if (isT) {
11332        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11333           regN = INSNT0(3,0);
11334           regD = INSNT1(11,8);
11335           regM = INSNT1(3,0);
11336           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11337              gate = True;
11338        }
11339     } else {
11340        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11341            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11342            INSNA(7,4)   == BITS4(0,1,1,1)) {
11343           regD = INSNA(15,12);
11344           regN = INSNA(19,16);
11345           regM = INSNA(3,0);
11346           if (regD != 15 && regN != 15 && regM != 15)
11347              gate = True;
11348        }
11349     }
11350
11351     if (gate) {
11352        IRTemp rNt   = newTemp(Ity_I32);
11353        IRTemp rMt   = newTemp(Ity_I32);
11354        IRTemp res_q = newTemp(Ity_I32);
11355
11356        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11357        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11358
11359        assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11360        if (isT)
11361           putIRegT( regD, mkexpr(res_q), condT );
11362        else
11363           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11364
11365        DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11366        return True;
11367     }
11368     /* fall through */
11369   }
11370
11371   /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
11372   {
11373     UInt regD = 99, regN = 99, regM = 99;
11374     Bool gate = False;
11375
11376     if (isT) {
11377        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11378           regN = INSNT0(3,0);
11379           regD = INSNT1(11,8);
11380           regM = INSNT1(3,0);
11381           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11382              gate = True;
11383        }
11384     } else {
11385        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11386            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11387            INSNA(7,4)   == BITS4(0,0,0,1)) {
11388           regD = INSNA(15,12);
11389           regN = INSNA(19,16);
11390           regM = INSNA(3,0);
11391           if (regD != 15 && regN != 15 && regM != 15)
11392              gate = True;
11393        }
11394     }
11395
11396     if (gate) {
11397        IRTemp rNt   = newTemp(Ity_I32);
11398        IRTemp rMt   = newTemp(Ity_I32);
11399        IRTemp res_q = newTemp(Ity_I32);
11400
11401        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11402        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11403
11404        assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
11405        if (isT)
11406           putIRegT( regD, mkexpr(res_q), condT );
11407        else
11408           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11409
11410        DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11411        return True;
11412     }
11413     /* fall through */
11414   }
11415
11416   /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
11417   {
11418     UInt regD = 99, regN = 99, regM = 99;
11419     Bool gate = False;
11420
11421     if (isT) {
11422        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11423           regN = INSNT0(3,0);
11424           regD = INSNT1(11,8);
11425           regM = INSNT1(3,0);
11426           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11427              gate = True;
11428        }
11429     } else {
11430        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11431            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11432            INSNA(7,4)   == BITS4(0,1,0,1)) {
11433           regD = INSNA(15,12);
11434           regN = INSNA(19,16);
11435           regM = INSNA(3,0);
11436           if (regD != 15 && regN != 15 && regM != 15)
11437              gate = True;
11438        }
11439     }
11440
11441     if (gate) {
11442        IRTemp irt_regN     = newTemp(Ity_I32);
11443        IRTemp irt_regM     = newTemp(Ity_I32);
11444        IRTemp irt_sum      = newTemp(Ity_I32);
11445        IRTemp irt_diff     = newTemp(Ity_I32);
11446        IRTemp irt_sum_res  = newTemp(Ity_I32);
11447        IRTemp irt_diff_res = newTemp(Ity_I32);
11448
11449        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11450        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11451
11452        assign( irt_diff,
11453                binop( Iop_Sub32,
11454                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11455                       binop( Iop_Shr32,
11456                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
11457                              mkU8(16) ) ) );
11458        armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
11459
11460        assign( irt_sum,
11461                binop( Iop_Add32,
11462                       binop( Iop_Shr32,
11463                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11464                              mkU8(16) ),
11465                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
11466        armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
11467
11468        IRExpr* ire_result = binop( Iop_Or32,
11469                                    binop( Iop_Shl32, mkexpr(irt_diff_res),
11470                                           mkU8(16) ),
11471                                    binop( Iop_And32, mkexpr(irt_sum_res),
11472                                           mkU32(0xFFFF)) );
11473
11474        if (isT)
11475           putIRegT( regD, ire_result, condT );
11476        else
11477           putIRegA( regD, ire_result, condT, Ijk_Boring );
11478
11479        DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11480        return True;
11481     }
11482     /* fall through */
11483   }
11484
11485   /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11486   {
11487     UInt regD = 99, regN = 99, regM = 99;
11488     Bool gate = False;
11489
11490     if (isT) {
11491        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11492           regN = INSNT0(3,0);
11493           regD = INSNT1(11,8);
11494           regM = INSNT1(3,0);
11495           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11496              gate = True;
11497        }
11498     } else {
11499        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11500            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11501            INSNA(7,4)   == BITS4(0,0,1,1)) {
11502           regD = INSNA(15,12);
11503           regN = INSNA(19,16);
11504           regM = INSNA(3,0);
11505           if (regD != 15 && regN != 15 && regM != 15)
11506              gate = True;
11507        }
11508     }
11509
11510     if (gate) {
11511        IRTemp irt_regN     = newTemp(Ity_I32);
11512        IRTemp irt_regM     = newTemp(Ity_I32);
11513        IRTemp irt_sum      = newTemp(Ity_I32);
11514        IRTemp irt_diff     = newTemp(Ity_I32);
11515        IRTemp irt_res_sum  = newTemp(Ity_I32);
11516        IRTemp irt_res_diff = newTemp(Ity_I32);
11517
11518        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11519        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11520
11521        assign( irt_diff,
11522                binop( Iop_Sub32,
11523                       binop( Iop_Shr32,
11524                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11525                              mkU8(16) ),
11526                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11527        armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
11528
11529        assign( irt_sum,
11530                binop( Iop_Add32,
11531                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11532                       binop( Iop_Shr32,
11533                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11534                              mkU8(16) ) ) );
11535        armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
11536
11537        IRExpr* ire_result
11538          = binop( Iop_Or32,
11539                   binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
11540                   binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
11541
11542        if (isT)
11543           putIRegT( regD, ire_result, condT );
11544        else
11545           putIRegA( regD, ire_result, condT, Ijk_Boring );
11546
11547        DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11548        return True;
11549     }
11550     /* fall through */
11551   }
11552
11553   /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
11554   {
11555     UInt regD = 99, regN = 99, regM = 99;
11556     Bool gate = False;
11557
11558     if (isT) {
11559        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11560           regN = INSNT0(3,0);
11561           regD = INSNT1(11,8);
11562           regM = INSNT1(3,0);
11563           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11564              gate = True;
11565        }
11566     } else {
11567        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11568            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11569            INSNA(7,4)   == BITS4(0,1,0,1)) {
11570           regD = INSNA(15,12);
11571           regN = INSNA(19,16);
11572           regM = INSNA(3,0);
11573           if (regD != 15 && regN != 15 && regM != 15)
11574              gate = True;
11575        }
11576     }
11577
11578     if (gate) {
11579        IRTemp irt_regN = newTemp(Ity_I32);
11580        IRTemp irt_regM = newTemp(Ity_I32);
11581        IRTemp irt_sum  = newTemp(Ity_I32);
11582        IRTemp irt_diff = newTemp(Ity_I32);
11583
11584        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11585        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11586
11587        assign( irt_sum,
11588                binop( Iop_Add32,
11589                       unop( Iop_16Uto32,
11590                             unop( Iop_32to16, mkexpr(irt_regN) )
11591                       ),
11592                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11593
11594        assign( irt_diff,
11595                binop( Iop_Sub32,
11596                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11597                       unop( Iop_16Uto32,
11598                             unop( Iop_32to16, mkexpr(irt_regM) )
11599                       )
11600                )
11601        );
11602
11603        IRExpr* ire_result
11604          = binop( Iop_Or32,
11605                   binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11606                   binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11607
11608        IRTemp ge10 = newTemp(Ity_I32);
11609        assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
11610                                         mkU32(0x10000), mkexpr(irt_sum) ),
11611                                  mkU32(1), mkU32(0) ) );
11612        put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
11613        put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
11614
11615        IRTemp ge32 = newTemp(Ity_I32);
11616        assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11617        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11618        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11619
11620        if (isT)
11621           putIRegT( regD, ire_result, condT );
11622        else
11623           putIRegA( regD, ire_result, condT, Ijk_Boring );
11624
11625        DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11626        return True;
11627     }
11628     /* fall through */
11629   }
11630
11631   /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11632   {
11633     UInt regD = 99, regN = 99, regM = 99;
11634     Bool gate = False;
11635
11636     if (isT) {
11637        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11638           regN = INSNT0(3,0);
11639           regD = INSNT1(11,8);
11640           regM = INSNT1(3,0);
11641           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11642              gate = True;
11643        }
11644     } else {
11645        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11646            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11647            INSNA(7,4)   == BITS4(0,0,1,1)) {
11648           regD = INSNA(15,12);
11649           regN = INSNA(19,16);
11650           regM = INSNA(3,0);
11651           if (regD != 15 && regN != 15 && regM != 15)
11652              gate = True;
11653        }
11654     }
11655
11656     if (gate) {
11657        IRTemp irt_regN = newTemp(Ity_I32);
11658        IRTemp irt_regM = newTemp(Ity_I32);
11659        IRTemp irt_sum  = newTemp(Ity_I32);
11660        IRTemp irt_diff = newTemp(Ity_I32);
11661
11662        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11663        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11664
11665        assign( irt_diff,
11666                binop( Iop_Sub32,
11667                       unop( Iop_16Uto32,
11668                             unop( Iop_32to16, mkexpr(irt_regN) )
11669                       ),
11670                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11671
11672        assign( irt_sum,
11673                binop( Iop_Add32,
11674                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11675                       unop( Iop_16Uto32,
11676                             unop( Iop_32to16, mkexpr(irt_regM) )
11677                       ) ) );
11678
11679        IRExpr* ire_result
11680          = binop( Iop_Or32,
11681                   binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
11682                   binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
11683
11684        IRTemp ge10 = newTemp(Ity_I32);
11685        assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
11686        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11687        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11688
11689        IRTemp ge32 = newTemp(Ity_I32);
11690        assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
11691                                         mkU32(0x10000), mkexpr(irt_sum) ),
11692                                  mkU32(1), mkU32(0) ) );
11693        put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
11694        put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
11695
11696        if (isT)
11697           putIRegT( regD, ire_result, condT );
11698        else
11699           putIRegA( regD, ire_result, condT, Ijk_Boring );
11700
11701        DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11702        return True;
11703     }
11704     /* fall through */
11705   }
11706
11707   /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
11708   {
11709     UInt regD = 99, regN = 99, regM = 99;
11710     Bool gate = False;
11711
11712     if (isT) {
11713        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
11714           regN = INSNT0(3,0);
11715           regD = INSNT1(11,8);
11716           regM = INSNT1(3,0);
11717           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11718              gate = True;
11719        }
11720     } else {
11721        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
11722            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11723            INSNA(7,4)   == BITS4(0,1,0,1)) {
11724           regD = INSNA(15,12);
11725           regN = INSNA(19,16);
11726           regM = INSNA(3,0);
11727           if (regD != 15 && regN != 15 && regM != 15)
11728              gate = True;
11729        }
11730     }
11731
11732     if (gate) {
11733        IRTemp irt_regN = newTemp(Ity_I32);
11734        IRTemp irt_regM = newTemp(Ity_I32);
11735        IRTemp irt_sum  = newTemp(Ity_I32);
11736        IRTemp irt_diff = newTemp(Ity_I32);
11737
11738        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11739        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11740
11741        assign( irt_sum,
11742                binop( Iop_Add32,
11743                       binop( Iop_Sar32,
11744                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11745                              mkU8(16) ),
11746                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
11747
11748        assign( irt_diff,
11749                binop( Iop_Sub32,
11750                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
11751                       binop( Iop_Sar32,
11752                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11753                              mkU8(16) ) ) );
11754
11755        IRExpr* ire_result
11756          = binop( Iop_Or32,
11757                   binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11758                   binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11759
11760        IRTemp ge10 = newTemp(Ity_I32);
11761        assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
11762        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11763        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11764
11765        IRTemp ge32 = newTemp(Ity_I32);
11766        assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11767        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11768        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11769
11770        if (isT)
11771           putIRegT( regD, ire_result, condT );
11772        else
11773           putIRegA( regD, ire_result, condT, Ijk_Boring );
11774
11775        DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11776        return True;
11777     }
11778     /* fall through */
11779   }
11780
11781   /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11782   {
11783     UInt regD = 99, regN = 99, regM = 99;
11784     Bool gate = False;
11785
11786     if (isT) {
11787        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11788           regN = INSNT0(3,0);
11789           regD = INSNT1(11,8);
11790           regM = INSNT1(3,0);
11791           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11792              gate = True;
11793        }
11794     } else {
11795        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11796            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11797            INSNA(7,4)   == BITS4(1,1,1,1)) {
11798           regD = INSNA(15,12);
11799           regN = INSNA(19,16);
11800           regM = INSNA(3,0);
11801           if (regD != 15 && regN != 15 && regM != 15)
11802              gate = True;
11803        }
11804     }
11805
11806     if (gate) {
11807        IRTemp rNt   = newTemp(Ity_I32);
11808        IRTemp rMt   = newTemp(Ity_I32);
11809        IRTemp res_q = newTemp(Ity_I32);
11810
11811        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11812        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11813
11814        assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
11815        if (isT)
11816           putIRegT( regD, mkexpr(res_q), condT );
11817        else
11818           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11819
11820        DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11821        return True;
11822     }
11823     /* fall through */
11824   }
11825
11826   /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
11827   {
11828     UInt regD = 99, regN = 99, regM = 99, rotate = 99;
11829     Bool gate = False;
11830
11831     if (isT) {
11832        if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
11833           regN   = INSNT0(3,0);
11834           regD   = INSNT1(11,8);
11835           regM   = INSNT1(3,0);
11836           rotate = INSNT1(5,4);
11837           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11838              gate = True;
11839        }
11840     } else {
11841        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
11842            INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
11843           regD   = INSNA(15,12);
11844           regN   = INSNA(19,16);
11845           regM   = INSNA(3,0);
11846           rotate = INSNA(11,10);
11847           if (regD != 15 && regN != 15 && regM != 15)
11848             gate = True;
11849        }
11850     }
11851
11852     if (gate) {
11853        IRTemp irt_regN = newTemp(Ity_I32);
11854        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11855
11856        IRTemp irt_regM = newTemp(Ity_I32);
11857        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11858
11859        IRTemp irt_rot = newTemp(Ity_I32);
11860        assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
11861
11862        /* FIXME Maybe we can write this arithmetic in shorter form. */
11863        IRExpr* resLo
11864           = binop(Iop_And32,
11865                   binop(Iop_Add32,
11866                         mkexpr(irt_regN),
11867                         unop(Iop_16Uto32,
11868                              unop(Iop_8Sto16,
11869                                   unop(Iop_32to8, mkexpr(irt_rot))))),
11870                   mkU32(0x0000FFFF));
11871
11872        IRExpr* resHi
11873           = binop(Iop_And32,
11874                   binop(Iop_Add32,
11875                         mkexpr(irt_regN),
11876                         binop(Iop_Shl32,
11877                               unop(Iop_16Uto32,
11878                                    unop(Iop_8Sto16,
11879                                         unop(Iop_32to8,
11880                                              binop(Iop_Shr32,
11881                                                    mkexpr(irt_rot),
11882                                                    mkU8(16))))),
11883                               mkU8(16))),
11884                   mkU32(0xFFFF0000));
11885
11886        IRExpr* ire_result
11887           = binop( Iop_Or32, resHi, resLo );
11888
11889        if (isT)
11890           putIRegT( regD, ire_result, condT );
11891        else
11892           putIRegA( regD, ire_result, condT, Ijk_Boring );
11893
11894        DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
11895             nCC(conq), regD, regN, regM, 8 * rotate );
11896        return True;
11897     }
11898     /* fall through */
11899   }
11900
11901   /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11902   {
11903     UInt regD = 99, regN = 99, regM = 99;
11904     Bool gate = False;
11905
11906     if (isT) {
11907        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11908           regN = INSNT0(3,0);
11909           regD = INSNT1(11,8);
11910           regM = INSNT1(3,0);
11911           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11912              gate = True;
11913        }
11914     } else {
11915        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11916            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11917            INSNA(7,4)   == BITS4(0,0,1,1)) {
11918           regD = INSNA(15,12);
11919           regN = INSNA(19,16);
11920           regM = INSNA(3,0);
11921           if (regD != 15 && regN != 15 && regM != 15)
11922              gate = True;
11923        }
11924     }
11925
11926     if (gate) {
11927        IRTemp rNt   = newTemp(Ity_I32);
11928        IRTemp rMt   = newTemp(Ity_I32);
11929        IRTemp irt_diff  = newTemp(Ity_I32);
11930        IRTemp irt_sum   = newTemp(Ity_I32);
11931        IRTemp res_q = newTemp(Ity_I32);
11932
11933        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11934        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11935
11936        assign( irt_diff,
11937                binop(Iop_Sub32,
11938                      unop(Iop_16Sto32,
11939                           unop(Iop_32to16,
11940                                mkexpr(rNt)
11941                           )
11942                      ),
11943                      unop(Iop_16Sto32,
11944                           unop(Iop_32to16,
11945                                binop(Iop_Shr32,
11946                                      mkexpr(rMt), mkU8(16)
11947                                )
11948                           )
11949                      )
11950                )
11951        );
11952
11953        assign( irt_sum,
11954                binop(Iop_Add32,
11955                      unop(Iop_16Sto32,
11956                           unop(Iop_32to16,
11957                                binop(Iop_Shr32,
11958                                      mkexpr(rNt), mkU8(16)
11959                                )
11960                           )
11961                      ),
11962                      unop(Iop_16Sto32,
11963                           unop(Iop_32to16, mkexpr(rMt)
11964                           )
11965                      )
11966                )
11967        );
11968
11969        assign( res_q,
11970                binop(Iop_Or32,
11971                      unop(Iop_16Uto32,
11972                           unop(Iop_32to16,
11973                                binop(Iop_Shr32,
11974                                      mkexpr(irt_diff), mkU8(1)
11975                                )
11976                           )
11977                      ),
11978                      binop(Iop_Shl32,
11979                            binop(Iop_Shr32,
11980                                  mkexpr(irt_sum), mkU8(1)
11981                            ),
11982                            mkU8(16)
11983                     )
11984                )
11985        );
11986
11987        if (isT)
11988           putIRegT( regD, mkexpr(res_q), condT );
11989        else
11990           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11991
11992        DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11993        return True;
11994     }
11995     /* fall through */
11996   }
11997
11998   /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11999   {
12000     UInt regD = 99, regN = 99, regM = 99;
12001     Bool gate = False;
12002
12003     if (isT) {
12004        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12005           regN = INSNT0(3,0);
12006           regD = INSNT1(11,8);
12007           regM = INSNT1(3,0);
12008           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12009              gate = True;
12010        }
12011     } else {
12012        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12013            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12014            INSNA(7,4)   == BITS4(0,0,1,1)) {
12015           regD = INSNA(15,12);
12016           regN = INSNA(19,16);
12017           regM = INSNA(3,0);
12018           if (regD != 15 && regN != 15 && regM != 15)
12019              gate = True;
12020        }
12021     }
12022
12023     if (gate) {
12024        IRTemp rNt   = newTemp(Ity_I32);
12025        IRTemp rMt   = newTemp(Ity_I32);
12026        IRTemp irt_diff  = newTemp(Ity_I32);
12027        IRTemp irt_sum   = newTemp(Ity_I32);
12028        IRTemp res_q = newTemp(Ity_I32);
12029
12030        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12031        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12032
12033        assign( irt_diff,
12034                binop(Iop_Sub32,
12035                      unop(Iop_16Uto32,
12036                           unop(Iop_32to16,
12037                                mkexpr(rNt)
12038                           )
12039                      ),
12040                      unop(Iop_16Uto32,
12041                           unop(Iop_32to16,
12042                                binop(Iop_Shr32,
12043                                      mkexpr(rMt), mkU8(16)
12044                                )
12045                           )
12046                      )
12047                )
12048        );
12049
12050        assign( irt_sum,
12051                binop(Iop_Add32,
12052                      unop(Iop_16Uto32,
12053                           unop(Iop_32to16,
12054                                binop(Iop_Shr32,
12055                                      mkexpr(rNt), mkU8(16)
12056                                )
12057                           )
12058                      ),
12059                      unop(Iop_16Uto32,
12060                           unop(Iop_32to16, mkexpr(rMt)
12061                           )
12062                      )
12063                )
12064        );
12065
12066        assign( res_q,
12067                binop(Iop_Or32,
12068                      unop(Iop_16Uto32,
12069                           unop(Iop_32to16,
12070                                binop(Iop_Shr32,
12071                                      mkexpr(irt_diff), mkU8(1)
12072                                )
12073                           )
12074                      ),
12075                      binop(Iop_Shl32,
12076                            binop(Iop_Shr32,
12077                                  mkexpr(irt_sum), mkU8(1)
12078                            ),
12079                            mkU8(16)
12080                     )
12081                )
12082        );
12083
12084        if (isT)
12085           putIRegT( regD, mkexpr(res_q), condT );
12086        else
12087           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12088
12089        DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12090        return True;
12091     }
12092     /* fall through */
12093   }
12094
12095   /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12096   {
12097     UInt regD = 99, regN = 99, regM = 99;
12098     Bool gate = False;
12099
12100     if (isT) {
12101        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12102           regN = INSNT0(3,0);
12103           regD = INSNT1(11,8);
12104           regM = INSNT1(3,0);
12105           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12106              gate = True;
12107        }
12108     } else {
12109        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12110            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12111            INSNA(7,4)   == BITS4(0,1,0,1)) {
12112           regD = INSNA(15,12);
12113           regN = INSNA(19,16);
12114           regM = INSNA(3,0);
12115           if (regD != 15 && regN != 15 && regM != 15)
12116              gate = True;
12117        }
12118     }
12119
12120     if (gate) {
12121        IRTemp rNt   = newTemp(Ity_I32);
12122        IRTemp rMt   = newTemp(Ity_I32);
12123        IRTemp irt_diff  = newTemp(Ity_I32);
12124        IRTemp irt_sum   = newTemp(Ity_I32);
12125        IRTemp res_q = newTemp(Ity_I32);
12126
12127        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12128        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12129
12130        assign( irt_sum,
12131                binop(Iop_Add32,
12132                      unop(Iop_16Sto32,
12133                           unop(Iop_32to16,
12134                                mkexpr(rNt)
12135                           )
12136                      ),
12137                      unop(Iop_16Sto32,
12138                           unop(Iop_32to16,
12139                                binop(Iop_Shr32,
12140                                      mkexpr(rMt), mkU8(16)
12141                                )
12142                           )
12143                      )
12144                )
12145        );
12146
12147        assign( irt_diff,
12148                binop(Iop_Sub32,
12149                      unop(Iop_16Sto32,
12150                           unop(Iop_32to16,
12151                                binop(Iop_Shr32,
12152                                      mkexpr(rNt), mkU8(16)
12153                                )
12154                           )
12155                      ),
12156                      unop(Iop_16Sto32,
12157                           unop(Iop_32to16, mkexpr(rMt)
12158                           )
12159                      )
12160                )
12161        );
12162
12163        assign( res_q,
12164                binop(Iop_Or32,
12165                      unop(Iop_16Uto32,
12166                           unop(Iop_32to16,
12167                                binop(Iop_Shr32,
12168                                      mkexpr(irt_sum), mkU8(1)
12169                                )
12170                           )
12171                      ),
12172                      binop(Iop_Shl32,
12173                            binop(Iop_Shr32,
12174                                  mkexpr(irt_diff), mkU8(1)
12175                            ),
12176                            mkU8(16)
12177                     )
12178                )
12179        );
12180
12181        if (isT)
12182           putIRegT( regD, mkexpr(res_q), condT );
12183        else
12184           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12185
12186        DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12187        return True;
12188     }
12189     /* fall through */
12190   }
12191
12192   /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12193   {
12194     UInt regD = 99, regN = 99, regM = 99;
12195     Bool gate = False;
12196
12197     if (isT) {
12198        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12199           regN = INSNT0(3,0);
12200           regD = INSNT1(11,8);
12201           regM = INSNT1(3,0);
12202           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12203              gate = True;
12204        }
12205     } else {
12206        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12207            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12208            INSNA(7,4)   == BITS4(0,1,0,1)) {
12209           regD = INSNA(15,12);
12210           regN = INSNA(19,16);
12211           regM = INSNA(3,0);
12212           if (regD != 15 && regN != 15 && regM != 15)
12213              gate = True;
12214        }
12215     }
12216
12217     if (gate) {
12218        IRTemp rNt   = newTemp(Ity_I32);
12219        IRTemp rMt   = newTemp(Ity_I32);
12220        IRTemp irt_diff  = newTemp(Ity_I32);
12221        IRTemp irt_sum   = newTemp(Ity_I32);
12222        IRTemp res_q = newTemp(Ity_I32);
12223
12224        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12225        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12226
12227        assign( irt_sum,
12228                binop(Iop_Add32,
12229                      unop(Iop_16Uto32,
12230                           unop(Iop_32to16,
12231                                mkexpr(rNt)
12232                           )
12233                      ),
12234                      unop(Iop_16Uto32,
12235                           unop(Iop_32to16,
12236                                binop(Iop_Shr32,
12237                                      mkexpr(rMt), mkU8(16)
12238                                )
12239                           )
12240                      )
12241                )
12242        );
12243
12244        assign( irt_diff,
12245                binop(Iop_Sub32,
12246                      unop(Iop_16Uto32,
12247                           unop(Iop_32to16,
12248                                binop(Iop_Shr32,
12249                                      mkexpr(rNt), mkU8(16)
12250                                )
12251                           )
12252                      ),
12253                      unop(Iop_16Uto32,
12254                           unop(Iop_32to16, mkexpr(rMt)
12255                           )
12256                      )
12257                )
12258        );
12259
12260        assign( res_q,
12261                binop(Iop_Or32,
12262                      unop(Iop_16Uto32,
12263                           unop(Iop_32to16,
12264                                binop(Iop_Shr32,
12265                                      mkexpr(irt_sum), mkU8(1)
12266                                )
12267                           )
12268                      ),
12269                      binop(Iop_Shl32,
12270                            binop(Iop_Shr32,
12271                                  mkexpr(irt_diff), mkU8(1)
12272                            ),
12273                            mkU8(16)
12274                     )
12275                )
12276        );
12277
12278        if (isT)
12279           putIRegT( regD, mkexpr(res_q), condT );
12280        else
12281           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12282
12283        DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12284        return True;
12285     }
12286     /* fall through */
12287   }
12288
12289   /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
12290   {
12291     UInt regD = 99, regN = 99, regM = 99;
12292     Bool gate = False;
12293
12294     if (isT) {
12295        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12296           regN = INSNT0(3,0);
12297           regD = INSNT1(11,8);
12298           regM = INSNT1(3,0);
12299           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12300              gate = True;
12301        }
12302     } else {
12303        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12304            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12305            INSNA(7,4)   == BITS4(0,1,1,1)) {
12306           regD = INSNA(15,12);
12307           regN = INSNA(19,16);
12308           regM = INSNA(3,0);
12309           if (regD != 15 && regN != 15 && regM != 15)
12310              gate = True;
12311        }
12312     }
12313
12314     if (gate) {
12315        IRTemp rNt   = newTemp(Ity_I32);
12316        IRTemp rMt   = newTemp(Ity_I32);
12317        IRTemp res_q = newTemp(Ity_I32);
12318
12319        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12320        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12321
12322        assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
12323        if (isT)
12324           putIRegT( regD, mkexpr(res_q), condT );
12325        else
12326           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12327
12328        DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12329        return True;
12330     }
12331     /* fall through */
12332   }
12333
12334   /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
12335   {
12336     UInt rD = 99, rN = 99, rM = 99, rA = 99;
12337     Bool round  = False;
12338     Bool gate   = False;
12339
12340     if (isT) {
12341        if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
12342            && INSNT0(6,4) == BITS3(1,1,0)
12343            && INSNT1(7,5) == BITS3(0,0,0)) {
12344           round = INSNT1(4,4);
12345           rA    = INSNT1(15,12);
12346           rD    = INSNT1(11,8);
12347           rM    = INSNT1(3,0);
12348           rN    = INSNT0(3,0);
12349           if (!isBadRegT(rD)
12350               && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
12351              gate = True;
12352        }
12353     } else {
12354        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
12355            && INSNA(15,12) != BITS4(1,1,1,1)
12356            && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
12357           round = INSNA(5,5);
12358           rD    = INSNA(19,16);
12359           rA    = INSNA(15,12);
12360           rM    = INSNA(11,8);
12361           rN    = INSNA(3,0);
12362           if (rD != 15 && rM != 15 && rN != 15)
12363              gate = True;
12364        }
12365     }
12366     if (gate) {
12367        IRTemp irt_rA   = newTemp(Ity_I32);
12368        IRTemp irt_rN   = newTemp(Ity_I32);
12369        IRTemp irt_rM   = newTemp(Ity_I32);
12370        assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
12371        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12372        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12373        IRExpr* res
12374        = unop(Iop_64HIto32,
12375               binop(Iop_Add64,
12376                     binop(Iop_Sub64,
12377                           binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
12378                           binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
12379                     mkU64(round ? 0x80000000ULL : 0ULL)));
12380        if (isT)
12381           putIRegT( rD, res, condT );
12382        else
12383           putIRegA(rD, res, condT, Ijk_Boring);
12384        DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
12385            round ? "r" : "", nCC(conq), rD, rN, rM, rA);
12386        return True;
12387     }
12388     /* fall through */
12389   }
12390
12391   /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12392   {
12393     UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12394     Bool m_swap = False;
12395     Bool gate   = False;
12396
12397     if (isT) {
12398        if (INSNT0(15,4) == 0xFBC &&
12399            (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
12400           rN     = INSNT0(3,0);
12401           rDlo   = INSNT1(15,12);
12402           rDhi   = INSNT1(11,8);
12403           rM     = INSNT1(3,0);
12404           m_swap = (INSNT1(4,4) & 1) == 1;
12405           if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
12406               && !isBadRegT(rM) && rDhi != rDlo)
12407              gate = True;
12408        }
12409     } else {
12410        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
12411            && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
12412           rN     = INSNA(3,0);
12413           rDlo   = INSNA(15,12);
12414           rDhi   = INSNA(19,16);
12415           rM     = INSNA(11,8);
12416           m_swap = ( INSNA(5,5) & 1 ) == 1;
12417           if (rDlo != 15 && rDhi != 15
12418               && rN != 15 && rM != 15 && rDlo != rDhi)
12419              gate = True;
12420        }
12421     }
12422
12423     if (gate) {
12424        IRTemp irt_rM   = newTemp(Ity_I32);
12425        IRTemp irt_rN   = newTemp(Ity_I32);
12426        IRTemp irt_rDhi = newTemp(Ity_I32);
12427        IRTemp irt_rDlo = newTemp(Ity_I32);
12428        IRTemp op_2     = newTemp(Ity_I32);
12429        IRTemp pr_1     = newTemp(Ity_I64);
12430        IRTemp pr_2     = newTemp(Ity_I64);
12431        IRTemp result   = newTemp(Ity_I64);
12432        IRTemp resHi    = newTemp(Ity_I32);
12433        IRTemp resLo    = newTemp(Ity_I32);
12434        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
12435        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
12436        assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
12437        assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
12438        assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12439        assign( pr_1, binop(Iop_MullS32,
12440                            unop(Iop_16Sto32,
12441                                 unop(Iop_32to16, mkexpr(irt_rN))
12442                            ),
12443                            unop(Iop_16Sto32,
12444                                 unop(Iop_32to16, mkexpr(op_2))
12445                            )
12446                      )
12447        );
12448        assign( pr_2, binop(Iop_MullS32,
12449                            binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12450                            binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12451                      )
12452        );
12453        assign( result, binop(Iop_Add64,
12454                              binop(Iop_Add64,
12455                                    mkexpr(pr_1),
12456                                    mkexpr(pr_2)
12457                              ),
12458                              binop(Iop_32HLto64,
12459                                    mkexpr(irt_rDhi),
12460                                    mkexpr(irt_rDlo)
12461                              )
12462                        )
12463        );
12464        assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12465        assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12466        if (isT) {
12467           putIRegT( rDhi, mkexpr(resHi), condT );
12468           putIRegT( rDlo, mkexpr(resLo), condT );
12469        } else {
12470           putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12471           putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12472        }
12473        DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
12474            m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12475        return True;
12476     }
12477     /* fall through */
12478   }
12479
12480   /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12481   {
12482     UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12483     Bool m_swap = False;
12484     Bool gate   = False;
12485
12486     if (isT) {
12487        if ((INSNT0(15,4) == 0xFBD &&
12488            (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
12489           rN     = INSNT0(3,0);
12490           rDlo   = INSNT1(15,12);
12491           rDhi   = INSNT1(11,8);
12492           rM     = INSNT1(3,0);
12493           m_swap = (INSNT1(4,4) & 1) == 1;
12494           if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
12495               !isBadRegT(rM) && rDhi != rDlo)
12496              gate = True;
12497        }
12498     } else {
12499        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
12500            (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
12501           rN     = INSNA(3,0);
12502           rDlo   = INSNA(15,12);
12503           rDhi   = INSNA(19,16);
12504           rM     = INSNA(11,8);
12505           m_swap = (INSNA(5,5) & 1) == 1;
12506           if (rDlo != 15 && rDhi != 15 &&
12507               rN != 15 && rM != 15 && rDlo != rDhi)
12508              gate = True;
12509        }
12510     }
12511     if (gate) {
12512        IRTemp irt_rM   = newTemp(Ity_I32);
12513        IRTemp irt_rN   = newTemp(Ity_I32);
12514        IRTemp irt_rDhi = newTemp(Ity_I32);
12515        IRTemp irt_rDlo = newTemp(Ity_I32);
12516        IRTemp op_2     = newTemp(Ity_I32);
12517        IRTemp pr_1     = newTemp(Ity_I64);
12518        IRTemp pr_2     = newTemp(Ity_I64);
12519        IRTemp result   = newTemp(Ity_I64);
12520        IRTemp resHi    = newTemp(Ity_I32);
12521        IRTemp resLo    = newTemp(Ity_I32);
12522        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12523        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12524        assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
12525        assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
12526        assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12527        assign( pr_1, binop(Iop_MullS32,
12528                            unop(Iop_16Sto32,
12529                                 unop(Iop_32to16, mkexpr(irt_rN))
12530                            ),
12531                            unop(Iop_16Sto32,
12532                                 unop(Iop_32to16, mkexpr(op_2))
12533                            )
12534                      )
12535        );
12536        assign( pr_2, binop(Iop_MullS32,
12537                            binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12538                            binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12539                      )
12540        );
12541        assign( result, binop(Iop_Add64,
12542                              binop(Iop_Sub64,
12543                                    mkexpr(pr_1),
12544                                    mkexpr(pr_2)
12545                              ),
12546                              binop(Iop_32HLto64,
12547                                    mkexpr(irt_rDhi),
12548                                    mkexpr(irt_rDlo)
12549                              )
12550                        )
12551        );
12552        assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12553        assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12554        if (isT) {
12555           putIRegT( rDhi, mkexpr(resHi), condT );
12556           putIRegT( rDlo, mkexpr(resLo), condT );
12557        } else {
12558           putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12559           putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12560        }
12561        DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
12562            m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12563        return True;
12564     }
12565     /* fall through */
12566   }
12567
12568   /* ---------- Doesn't match anything. ---------- */
12569   return False;
12570
12571#  undef INSNA
12572#  undef INSNT0
12573#  undef INSNT1
12574}
12575
12576
12577/*------------------------------------------------------------*/
12578/*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
12579/*------------------------------------------------------------*/
12580
12581/* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
12582   unconditional, so the caller must produce a jump-around before
12583   calling this, if the insn is to be conditional.  Caller is
12584   responsible for all validation of parameters.  For LDMxx, if PC is
12585   amongst the values loaded, caller is also responsible for
12586   generating the jump. */
12587static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
12588                         UInt rN,      /* base reg */
12589                         UInt bINC,    /* 1: inc,  0: dec */
12590                         UInt bBEFORE, /* 1: inc/dec before, 0: after */
12591                         UInt bW,      /* 1: writeback to Rn */
12592                         UInt bL,      /* 1: load, 0: store */
12593                         UInt regList )
12594{
12595   Int i, r, m, nRegs;
12596   IRTemp jk = Ijk_Boring;
12597
12598   /* Get hold of the old Rn value.  We might need to write its value
12599      to memory during a store, and if it's also the writeback
12600      register then we need to get its value now.  We can't treat it
12601      exactly like the other registers we're going to transfer,
12602      because for xxMDA and xxMDB writeback forms, the generated IR
12603      updates Rn in the guest state before any transfers take place.
12604      We have to do this as per comments below, in order that if Rn is
12605      the stack pointer then it always has a value is below or equal
12606      to any of the transfer addresses.  Ick. */
12607   IRTemp oldRnT = newTemp(Ity_I32);
12608   assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
12609
12610   IRTemp anchorT = newTemp(Ity_I32);
12611   /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
12612      ignore the bottom two bits of the address.  However, Cortex-A8
12613      doesn't seem to care.  Hence: */
12614   /* No .. don't force alignment .. */
12615   /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
12616   /* Instead, use the potentially misaligned address directly. */
12617   assign(anchorT, mkexpr(oldRnT));
12618
12619   IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
12620   // bINC == 1:  xxMIA, xxMIB
12621   // bINC == 0:  xxMDA, xxMDB
12622
12623   // For xxMDA and xxMDB, update Rn first if necessary.  We have
12624   // to do this first so that, for the common idiom of the transfers
12625   // faulting because we're pushing stuff onto a stack and the stack
12626   // is growing down onto allocate-on-fault pages (as Valgrind simulates),
12627   // we need to have the SP up-to-date "covering" (pointing below) the
12628   // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
12629   // do the transfer first, and then update rN afterwards.
12630   nRegs = 0;
12631   for (i = 0; i < 16; i++) {
12632     if ((regList & (1 << i)) != 0)
12633         nRegs++;
12634   }
12635   if (bW == 1 && !bINC) {
12636      IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
12637      if (arm)
12638         putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
12639      else
12640         putIRegT( rN, e, IRTemp_INVALID );
12641   }
12642
12643   // Make up a list of the registers to transfer, and their offsets
12644   // in memory relative to the anchor.  If the base reg (Rn) is part
12645   // of the transfer, then do it last for a load and first for a store.
12646   UInt xReg[16], xOff[16];
12647   Int  nX = 0;
12648   m = 0;
12649   for (i = 0; i < 16; i++) {
12650      r = bINC ? i : (15-i);
12651      if (0 == (regList & (1<<r)))
12652         continue;
12653      if (bBEFORE)
12654         m++;
12655      /* paranoia: check we aren't transferring the writeback
12656         register during a load. Should be assured by decode-point
12657         check above. */
12658      if (bW == 1 && bL == 1)
12659         vassert(r != rN);
12660
12661      xOff[nX] = 4 * m;
12662      xReg[nX] = r;
12663      nX++;
12664
12665      if (!bBEFORE)
12666         m++;
12667   }
12668   vassert(m == nRegs);
12669   vassert(nX == nRegs);
12670   vassert(nX <= 16);
12671
12672   if (bW == 0 && (regList & (1<<rN)) != 0) {
12673      /* Non-writeback, and basereg is to be transferred.  Do its
12674         transfer last for a load and first for a store.  Requires
12675         reordering xOff/xReg. */
12676      if (0) {
12677         vex_printf("\nREG_LIST_PRE: (rN=%d)\n", rN);
12678         for (i = 0; i < nX; i++)
12679            vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
12680         vex_printf("\n");
12681      }
12682
12683      vassert(nX > 0);
12684      for (i = 0; i < nX; i++) {
12685         if (xReg[i] == rN)
12686             break;
12687      }
12688      vassert(i < nX); /* else we didn't find it! */
12689      UInt tReg = xReg[i];
12690      UInt tOff = xOff[i];
12691      if (bL == 1) {
12692         /* load; make this transfer happen last */
12693         if (i < nX-1) {
12694            for (m = i+1; m < nX; m++) {
12695               xReg[m-1] = xReg[m];
12696               xOff[m-1] = xOff[m];
12697            }
12698            vassert(m == nX);
12699            xReg[m-1] = tReg;
12700            xOff[m-1] = tOff;
12701         }
12702      } else {
12703         /* store; make this transfer happen first */
12704         if (i > 0) {
12705            for (m = i-1; m >= 0; m--) {
12706               xReg[m+1] = xReg[m];
12707               xOff[m+1] = xOff[m];
12708            }
12709            vassert(m == -1);
12710            xReg[0] = tReg;
12711            xOff[0] = tOff;
12712         }
12713      }
12714
12715      if (0) {
12716         vex_printf("REG_LIST_POST:\n");
12717         for (i = 0; i < nX; i++)
12718            vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
12719         vex_printf("\n");
12720      }
12721   }
12722
12723   /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
12724       register and PC in the register list is a return for purposes of branch
12725       prediction.
12726      The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
12727       to be counted in event 0x0E (Procedure return).*/
12728   if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
12729      jk = Ijk_Ret;
12730   }
12731
12732   /* Actually generate the transfers */
12733   for (i = 0; i < nX; i++) {
12734      r = xReg[i];
12735      if (bL == 1) {
12736         IRExpr* e = loadLE(Ity_I32,
12737                            binop(opADDorSUB, mkexpr(anchorT),
12738                                  mkU32(xOff[i])));
12739         if (arm) {
12740            putIRegA( r, e, IRTemp_INVALID, jk );
12741         } else {
12742            // no: putIRegT( r, e, IRTemp_INVALID );
12743            // putIRegT refuses to write to R15.  But that might happen.
12744            // Since this is uncond, and we need to be able to
12745            // write the PC, just use the low level put:
12746            llPutIReg( r, e );
12747         }
12748      } else {
12749         /* if we're storing Rn, make sure we use the correct
12750            value, as per extensive comments above */
12751         storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
12752                  r == rN ? mkexpr(oldRnT)
12753                          : (arm ? getIRegA(r) : getIRegT(r) ) );
12754      }
12755   }
12756
12757   // If we are doing xxMIA or xxMIB,
12758   // do the transfer first, and then update rN afterwards.
12759   if (bW == 1 && bINC) {
12760      IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
12761      if (arm)
12762         putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
12763      else
12764         putIRegT( rN, e, IRTemp_INVALID );
12765   }
12766}
12767
12768
12769/*------------------------------------------------------------*/
12770/*--- VFP (CP 10 and 11) instructions                      ---*/
12771/*------------------------------------------------------------*/
12772
12773/* Both ARM and Thumb */
12774
12775/* Translate a CP10 or CP11 instruction.  If successful, returns
12776   True and *dres may or may not be updated.  If failure, returns
12777   False and doesn't change *dres nor create any IR.
12778
12779   The ARM and Thumb encodings are identical for the low 28 bits of
12780   the insn (yay!) and that's what the caller must supply, iow, imm28
12781   has the top 4 bits masked out.  Caller is responsible for
12782   determining whether the masked-out bits are valid for a CP10/11
12783   insn.  The rules for the top 4 bits are:
12784
12785     ARM: 0000 to 1110 allowed, and this is the gating condition.
12786     1111 (NV) is not allowed.
12787
12788     Thumb: must be 1110.  The gating condition is taken from
12789     ITSTATE in the normal way.
12790
12791   Conditionalisation:
12792
12793   Caller must supply an IRTemp 'condT' holding the gating condition,
12794   or IRTemp_INVALID indicating the insn is always executed.
12795
12796   Caller must also supply an ARMCondcode 'cond'.  This is only used
12797   for debug printing, no other purpose.  For ARM, this is simply the
12798   top 4 bits of the original instruction.  For Thumb, the condition
12799   is not (really) known until run time, and so ARMCondAL should be
12800   passed, only so that printing of these instructions does not show
12801   any condition.
12802
12803   Finally, the caller must indicate whether this occurs in ARM or
12804   Thumb code.
12805*/
12806static Bool decode_CP10_CP11_instruction (
12807               /*MOD*/DisResult* dres,
12808               UInt              insn28,
12809               IRTemp            condT,
12810               ARMCondcode       conq,
12811               Bool              isT
12812            )
12813{
12814#  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
12815
12816   vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
12817
12818   if (isT) {
12819      vassert(conq == ARMCondAL);
12820   } else {
12821      vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
12822   }
12823
12824   /* ----------------------------------------------------------- */
12825   /* -- VFP instructions -- double precision (mostly)         -- */
12826   /* ----------------------------------------------------------- */
12827
12828   /* --------------------- fldmx, fstmx --------------------- */
12829   /*
12830                                 31   27   23   19 15 11   7   0
12831                                         P U WL
12832      C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
12833      C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
12834      C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
12835
12836      C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
12837      C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
12838      C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
12839
12840      Regs transferred: Dd .. D(d + (offset-3)/2)
12841      offset must be odd, must not imply a reg > 15
12842      IA/DB: Rn is changed by (4 + 8 x # regs transferred)
12843
12844      case coding:
12845         1  at-Rn   (access at Rn)
12846         2  ia-Rn   (access at Rn, then Rn += 4+8n)
12847         3  db-Rn   (Rn -= 4+8n,   then access at Rn)
12848   */
12849   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
12850       && INSN(11,8) == BITS4(1,0,1,1)) {
12851      UInt bP      = (insn28 >> 24) & 1;
12852      UInt bU      = (insn28 >> 23) & 1;
12853      UInt bW      = (insn28 >> 21) & 1;
12854      UInt bL      = (insn28 >> 20) & 1;
12855      UInt offset  = (insn28 >> 0) & 0xFF;
12856      UInt rN      = INSN(19,16);
12857      UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
12858      UInt nRegs   = (offset - 1) / 2;
12859      UInt summary = 0;
12860      Int  i;
12861
12862      /**/ if (bP == 0 && bU == 1 && bW == 0) {
12863         summary = 1;
12864      }
12865      else if (bP == 0 && bU == 1 && bW == 1) {
12866         summary = 2;
12867      }
12868      else if (bP == 1 && bU == 0 && bW == 1) {
12869         summary = 3;
12870      }
12871      else goto after_vfp_fldmx_fstmx;
12872
12873      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
12874      if (rN == 15 && (summary == 2 || summary == 3 || isT))
12875         goto after_vfp_fldmx_fstmx;
12876
12877      /* offset must be odd, and specify at least one register */
12878      if (0 == (offset & 1) || offset < 3)
12879         goto after_vfp_fldmx_fstmx;
12880
12881      /* can't transfer regs after D15 */
12882      if (dD + nRegs - 1 >= 32)
12883         goto after_vfp_fldmx_fstmx;
12884
12885      /* Now, we can't do a conditional load or store, since that very
12886         likely will generate an exception.  So we have to take a side
12887         exit at this point if the condition is false. */
12888      if (condT != IRTemp_INVALID) {
12889         if (isT)
12890            mk_skip_over_T32_if_cond_is_false( condT );
12891         else
12892            mk_skip_over_A32_if_cond_is_false( condT );
12893         condT = IRTemp_INVALID;
12894      }
12895      /* Ok, now we're unconditional.  Do the load or store. */
12896
12897      /* get the old Rn value */
12898      IRTemp rnT = newTemp(Ity_I32);
12899      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
12900                           rN == 15));
12901
12902      /* make a new value for Rn, post-insn */
12903      IRTemp rnTnew = IRTemp_INVALID;
12904      if (summary == 2 || summary == 3) {
12905         rnTnew = newTemp(Ity_I32);
12906         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
12907                              mkexpr(rnT),
12908                              mkU32(4 + 8 * nRegs)));
12909      }
12910
12911      /* decide on the base transfer address */
12912      IRTemp taT = newTemp(Ity_I32);
12913      assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
12914
12915      /* update Rn if necessary -- in case 3, we're moving it down, so
12916         update before any memory reference, in order to keep Memcheck
12917         and V's stack-extending logic (on linux) happy */
12918      if (summary == 3) {
12919         if (isT)
12920            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
12921         else
12922            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
12923      }
12924
12925      /* generate the transfers */
12926      for (i = 0; i < nRegs; i++) {
12927         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
12928         if (bL) {
12929            putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
12930         } else {
12931            storeLE(addr, getDReg(dD + i));
12932         }
12933      }
12934
12935      /* update Rn if necessary -- in case 2, we're moving it up, so
12936         update after any memory reference, in order to keep Memcheck
12937         and V's stack-extending logic (on linux) happy */
12938      if (summary == 2) {
12939         if (isT)
12940            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
12941         else
12942            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
12943      }
12944
12945      const HChar* nm = bL==1 ? "ld" : "st";
12946      switch (summary) {
12947         case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
12948                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
12949                  break;
12950         case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
12951                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
12952                  break;
12953         case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
12954                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
12955                  break;
12956         default: vassert(0);
12957      }
12958
12959      goto decode_success_vfp;
12960      /* FIXME alignment constraints? */
12961   }
12962
12963  after_vfp_fldmx_fstmx:
12964
12965   /* --------------------- fldmd, fstmd --------------------- */
12966   /*
12967                                 31   27   23   19 15 11   7   0
12968                                         P U WL
12969      C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
12970      C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
12971      C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
12972
12973      C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
12974      C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
12975      C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
12976
12977      Regs transferred: Dd .. D(d + (offset-2)/2)
12978      offset must be even, must not imply a reg > 15
12979      IA/DB: Rn is changed by (8 x # regs transferred)
12980
12981      case coding:
12982         1  at-Rn   (access at Rn)
12983         2  ia-Rn   (access at Rn, then Rn += 8n)
12984         3  db-Rn   (Rn -= 8n,     then access at Rn)
12985   */
12986   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
12987       && INSN(11,8) == BITS4(1,0,1,1)) {
12988      UInt bP      = (insn28 >> 24) & 1;
12989      UInt bU      = (insn28 >> 23) & 1;
12990      UInt bW      = (insn28 >> 21) & 1;
12991      UInt bL      = (insn28 >> 20) & 1;
12992      UInt offset  = (insn28 >> 0) & 0xFF;
12993      UInt rN      = INSN(19,16);
12994      UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
12995      UInt nRegs   = offset / 2;
12996      UInt summary = 0;
12997      Int  i;
12998
12999      /**/ if (bP == 0 && bU == 1 && bW == 0) {
13000         summary = 1;
13001      }
13002      else if (bP == 0 && bU == 1 && bW == 1) {
13003         summary = 2;
13004      }
13005      else if (bP == 1 && bU == 0 && bW == 1) {
13006         summary = 3;
13007      }
13008      else goto after_vfp_fldmd_fstmd;
13009
13010      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
13011      if (rN == 15 && (summary == 2 || summary == 3 || isT))
13012         goto after_vfp_fldmd_fstmd;
13013
13014      /* offset must be even, and specify at least one register */
13015      if (1 == (offset & 1) || offset < 2)
13016         goto after_vfp_fldmd_fstmd;
13017
13018      /* can't transfer regs after D15 */
13019      if (dD + nRegs - 1 >= 32)
13020         goto after_vfp_fldmd_fstmd;
13021
13022      /* Now, we can't do a conditional load or store, since that very
13023         likely will generate an exception.  So we have to take a side
13024         exit at this point if the condition is false. */
13025      if (condT != IRTemp_INVALID) {
13026         if (isT)
13027            mk_skip_over_T32_if_cond_is_false( condT );
13028         else
13029            mk_skip_over_A32_if_cond_is_false( condT );
13030         condT = IRTemp_INVALID;
13031      }
13032      /* Ok, now we're unconditional.  Do the load or store. */
13033
13034      /* get the old Rn value */
13035      IRTemp rnT = newTemp(Ity_I32);
13036      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
13037                           rN == 15));
13038
13039      /* make a new value for Rn, post-insn */
13040      IRTemp rnTnew = IRTemp_INVALID;
13041      if (summary == 2 || summary == 3) {
13042         rnTnew = newTemp(Ity_I32);
13043         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
13044                              mkexpr(rnT),
13045                              mkU32(8 * nRegs)));
13046      }
13047
13048      /* decide on the base transfer address */
13049      IRTemp taT = newTemp(Ity_I32);
13050      assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
13051
13052      /* update Rn if necessary -- in case 3, we're moving it down, so
13053         update before any memory reference, in order to keep Memcheck
13054         and V's stack-extending logic (on linux) happy */
13055      if (summary == 3) {
13056         if (isT)
13057            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13058         else
13059            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13060      }
13061
13062      /* generate the transfers */
13063      for (i = 0; i < nRegs; i++) {
13064         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
13065         if (bL) {
13066            putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
13067         } else {
13068            storeLE(addr, getDReg(dD + i));
13069         }
13070      }
13071
13072      /* update Rn if necessary -- in case 2, we're moving it up, so
13073         update after any memory reference, in order to keep Memcheck
13074         and V's stack-extending logic (on linux) happy */
13075      if (summary == 2) {
13076         if (isT)
13077            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13078         else
13079            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13080      }
13081
13082      const HChar* nm = bL==1 ? "ld" : "st";
13083      switch (summary) {
13084         case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
13085                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
13086                  break;
13087         case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
13088                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
13089                  break;
13090         case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
13091                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
13092                  break;
13093         default: vassert(0);
13094      }
13095
13096      goto decode_success_vfp;
13097      /* FIXME alignment constraints? */
13098   }
13099
13100  after_vfp_fldmd_fstmd:
13101
13102   /* ------------------- fmrx, fmxr ------------------- */
13103   if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
13104       && BITS4(1,0,1,0) == INSN(11,8)
13105       && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
13106      UInt rD  = INSN(15,12);
13107      UInt reg = INSN(19,16);
13108      if (reg == BITS4(0,0,0,1)) {
13109         if (rD == 15) {
13110            IRTemp nzcvT = newTemp(Ity_I32);
13111            /* When rD is 15, we are copying the top 4 bits of FPSCR
13112               into CPSR.  That is, set the flags thunk to COPY and
13113               install FPSCR[31:28] as the value to copy. */
13114            assign(nzcvT, binop(Iop_And32,
13115                                IRExpr_Get(OFFB_FPSCR, Ity_I32),
13116                                mkU32(0xF0000000)));
13117            setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
13118            DIP("fmstat%s\n", nCC(conq));
13119         } else {
13120            /* Otherwise, merely transfer FPSCR to r0 .. r14. */
13121            IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
13122            if (isT)
13123               putIRegT(rD, e, condT);
13124            else
13125               putIRegA(rD, e, condT, Ijk_Boring);
13126            DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
13127         }
13128         goto decode_success_vfp;
13129      }
13130      /* fall through */
13131   }
13132
13133   if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
13134       && BITS4(1,0,1,0) == INSN(11,8)
13135       && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
13136      UInt rD  = INSN(15,12);
13137      UInt reg = INSN(19,16);
13138      if (reg == BITS4(0,0,0,1)) {
13139         putMiscReg32(OFFB_FPSCR,
13140                      isT ? getIRegT(rD) : getIRegA(rD), condT);
13141         DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
13142         goto decode_success_vfp;
13143      }
13144      /* fall through */
13145   }
13146
13147   /* --------------------- vmov --------------------- */
13148   // VMOV dM, rD, rN
13149   if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
13150      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
13151      UInt rD = INSN(15,12); /* lo32 */
13152      UInt rN = INSN(19,16); /* hi32 */
13153      if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
13154         /* fall through */
13155      } else {
13156         putDReg(dM,
13157                 unop(Iop_ReinterpI64asF64,
13158                      binop(Iop_32HLto64,
13159                            isT ? getIRegT(rN) : getIRegA(rN),
13160                            isT ? getIRegT(rD) : getIRegA(rD))),
13161                 condT);
13162         DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
13163         goto decode_success_vfp;
13164      }
13165      /* fall through */
13166   }
13167
13168   // VMOV rD, rN, dM
13169   if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
13170      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
13171      UInt rD = INSN(15,12); /* lo32 */
13172      UInt rN = INSN(19,16); /* hi32 */
13173      if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
13174          || rD == rN) {
13175         /* fall through */
13176      } else {
13177         IRTemp i64 = newTemp(Ity_I64);
13178         assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
13179         IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
13180         IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
13181         if (isT) {
13182            putIRegT(rN, hi32, condT);
13183            putIRegT(rD, lo32, condT);
13184         } else {
13185            putIRegA(rN, hi32, condT, Ijk_Boring);
13186            putIRegA(rD, lo32, condT, Ijk_Boring);
13187         }
13188         DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
13189         goto decode_success_vfp;
13190      }
13191      /* fall through */
13192   }
13193
13194   // VMOV sD, sD+1, rN, rM
13195   if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
13196      UInt sD = (INSN(3,0) << 1) | INSN(5,5);
13197      UInt rN = INSN(15,12);
13198      UInt rM = INSN(19,16);
13199      if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
13200          || sD == 31) {
13201         /* fall through */
13202      } else {
13203         putFReg(sD,
13204                 unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
13205                 condT);
13206         putFReg(sD+1,
13207                 unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
13208                 condT);
13209         DIP("vmov%s, s%u, s%u, r%u, r%u\n",
13210              nCC(conq), sD, sD + 1, rN, rM);
13211         goto decode_success_vfp;
13212      }
13213   }
13214
13215   // VMOV rN, rM, sD, sD+1
13216   if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
13217      UInt sD = (INSN(3,0) << 1) | INSN(5,5);
13218      UInt rN = INSN(15,12);
13219      UInt rM = INSN(19,16);
13220      if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
13221          || sD == 31 || rN == rM) {
13222         /* fall through */
13223      } else {
13224         IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
13225         IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
13226         if (isT) {
13227            putIRegT(rN, res0, condT);
13228            putIRegT(rM, res1, condT);
13229         } else {
13230            putIRegA(rN, res0, condT, Ijk_Boring);
13231            putIRegA(rM, res1, condT, Ijk_Boring);
13232         }
13233         DIP("vmov%s, r%u, r%u, s%u, s%u\n",
13234             nCC(conq), rN, rM, sD, sD + 1);
13235         goto decode_success_vfp;
13236      }
13237   }
13238
13239   // VMOV rD[x], rT  (ARM core register to scalar)
13240   if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
13241      UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
13242      UInt rT  = INSN(15,12);
13243      UInt opc = (INSN(22,21) << 2) | INSN(6,5);
13244      UInt index;
13245      if (rT == 15 || (isT && rT == 13)) {
13246         /* fall through */
13247      } else {
13248         if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
13249            index = opc & 7;
13250            putDRegI64(rD, triop(Iop_SetElem8x8,
13251                                 getDRegI64(rD),
13252                                 mkU8(index),
13253                                 unop(Iop_32to8,
13254                                      isT ? getIRegT(rT) : getIRegA(rT))),
13255                           condT);
13256            DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
13257            goto decode_success_vfp;
13258         }
13259         else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
13260            index = (opc >> 1) & 3;
13261            putDRegI64(rD, triop(Iop_SetElem16x4,
13262                                 getDRegI64(rD),
13263                                 mkU8(index),
13264                                 unop(Iop_32to16,
13265                                      isT ? getIRegT(rT) : getIRegA(rT))),
13266                           condT);
13267            DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
13268            goto decode_success_vfp;
13269         }
13270         else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
13271            index = (opc >> 2) & 1;
13272            putDRegI64(rD, triop(Iop_SetElem32x2,
13273                                 getDRegI64(rD),
13274                                 mkU8(index),
13275                                 isT ? getIRegT(rT) : getIRegA(rT)),
13276                           condT);
13277            DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
13278            goto decode_success_vfp;
13279         } else {
13280            /* fall through */
13281         }
13282      }
13283   }
13284
13285   // VMOV (scalar to ARM core register)
13286   // VMOV rT, rD[x]
13287   if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
13288      UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
13289      UInt rT  = INSN(15,12);
13290      UInt U   = INSN(23,23);
13291      UInt opc = (INSN(22,21) << 2) | INSN(6,5);
13292      UInt index;
13293      if (rT == 15 || (isT && rT == 13)) {
13294         /* fall through */
13295      } else {
13296         if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
13297            index = opc & 7;
13298            IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
13299                             binop(Iop_GetElem8x8,
13300                                   getDRegI64(rN),
13301                                   mkU8(index)));
13302            if (isT)
13303               putIRegT(rT, e, condT);
13304            else
13305               putIRegA(rT, e, condT, Ijk_Boring);
13306            DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
13307                  rT, rN, index);
13308            goto decode_success_vfp;
13309         }
13310         else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
13311            index = (opc >> 1) & 3;
13312            IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
13313                             binop(Iop_GetElem16x4,
13314                                   getDRegI64(rN),
13315                                   mkU8(index)));
13316            if (isT)
13317               putIRegT(rT, e, condT);
13318            else
13319               putIRegA(rT, e, condT, Ijk_Boring);
13320            DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
13321                  rT, rN, index);
13322            goto decode_success_vfp;
13323         }
13324         else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
13325            index = (opc >> 2) & 1;
13326            IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
13327            if (isT)
13328               putIRegT(rT, e, condT);
13329            else
13330               putIRegA(rT, e, condT, Ijk_Boring);
13331            DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
13332            goto decode_success_vfp;
13333         } else {
13334            /* fall through */
13335         }
13336      }
13337   }
13338
13339   // VMOV.F32 sD, #imm
13340   // FCONSTS sD, #imm
13341   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13342       && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
13343      UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
13344      UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
13345      UInt b    = (imm8 >> 6) & 1;
13346      UInt imm;
13347      imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
13348             | ((imm8 & 0x1f) << 3);
13349      imm <<= 16;
13350      putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
13351      DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
13352      goto decode_success_vfp;
13353   }
13354
13355   // VMOV.F64 dD, #imm
13356   // FCONSTD dD, #imm
13357   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13358       && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
13359      UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
13360      UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
13361      UInt b    = (imm8 >> 6) & 1;
13362      ULong imm;
13363      imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
13364             | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
13365      imm <<= 48;
13366      putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
13367      DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
13368      goto decode_success_vfp;
13369   }
13370
13371   /* ---------------------- vdup ------------------------- */
13372   // VDUP dD, rT
13373   // VDUP qD, rT
13374   if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
13375       && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
13376      UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
13377      UInt rT   = INSN(15,12);
13378      UInt Q    = INSN(21,21);
13379      UInt size = (INSN(22,22) << 1) | INSN(5,5);
13380      if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
13381         /* fall through */
13382      } else {
13383         IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
13384         if (Q) {
13385            rD >>= 1;
13386            switch (size) {
13387               case 0:
13388                  putQReg(rD, unop(Iop_Dup32x4, e), condT);
13389                  break;
13390               case 1:
13391                  putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
13392                              condT);
13393                  break;
13394               case 2:
13395                  putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
13396                              condT);
13397                  break;
13398               default:
13399                  vassert(0);
13400            }
13401            DIP("vdup.%u q%u, r%u\n", 32 / (1<<size), rD, rT);
13402         } else {
13403            switch (size) {
13404               case 0:
13405                  putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
13406                  break;
13407               case 1:
13408                  putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
13409                               condT);
13410                  break;
13411               case 2:
13412                  putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
13413                               condT);
13414                  break;
13415               default:
13416                  vassert(0);
13417            }
13418            DIP("vdup.%u d%u, r%u\n", 32 / (1<<size), rD, rT);
13419         }
13420         goto decode_success_vfp;
13421      }
13422   }
13423
13424   /* --------------------- f{ld,st}d --------------------- */
13425   // FLDD, FSTD
13426   if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
13427       && BITS4(1,0,1,1) == INSN(11,8)) {
13428      UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
13429      UInt rN     = INSN(19,16);
13430      UInt offset = (insn28 & 0xFF) << 2;
13431      UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
13432      UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
13433      /* make unconditional */
13434      if (condT != IRTemp_INVALID) {
13435         if (isT)
13436            mk_skip_over_T32_if_cond_is_false( condT );
13437         else
13438            mk_skip_over_A32_if_cond_is_false( condT );
13439         condT = IRTemp_INVALID;
13440      }
13441      IRTemp ea = newTemp(Ity_I32);
13442      assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
13443                       align4if(isT ? getIRegT(rN) : getIRegA(rN),
13444                                rN == 15),
13445                       mkU32(offset)));
13446      if (bL) {
13447         putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
13448      } else {
13449         storeLE(mkexpr(ea), getDReg(dD));
13450      }
13451      DIP("f%sd%s d%u, [r%u, %c#%u]\n",
13452          bL ? "ld" : "st", nCC(conq), dD, rN,
13453          bU ? '+' : '-', offset);
13454      goto decode_success_vfp;
13455   }
13456
13457   /* --------------------- dp insns (D) --------------------- */
13458   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
13459       && BITS4(1,0,1,1) == INSN(11,8)
13460       && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
13461      UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
13462      UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
13463      UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
13464      UInt    bP  = (insn28 >> 23) & 1;
13465      UInt    bQ  = (insn28 >> 21) & 1;
13466      UInt    bR  = (insn28 >> 20) & 1;
13467      UInt    bS  = (insn28 >> 6) & 1;
13468      UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
13469      IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
13470      switch (opc) {
13471         case BITS4(0,0,0,0): /* MAC: d + n * m */
13472            putDReg(dD, triop(Iop_AddF64, rm,
13473                              getDReg(dD),
13474                              triop(Iop_MulF64, rm, getDReg(dN),
13475                                                    getDReg(dM))),
13476                        condT);
13477            DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13478            goto decode_success_vfp;
13479         case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
13480            putDReg(dD, triop(Iop_AddF64, rm,
13481                              getDReg(dD),
13482                              unop(Iop_NegF64,
13483                                   triop(Iop_MulF64, rm, getDReg(dN),
13484                                                         getDReg(dM)))),
13485                        condT);
13486            DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13487            goto decode_success_vfp;
13488         case BITS4(0,0,1,0): /* MSC: - d + n * m */
13489            putDReg(dD, triop(Iop_AddF64, rm,
13490                              unop(Iop_NegF64, getDReg(dD)),
13491                              triop(Iop_MulF64, rm, getDReg(dN),
13492                                                    getDReg(dM))),
13493                        condT);
13494            DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13495            goto decode_success_vfp;
13496         case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
13497            putDReg(dD, triop(Iop_AddF64, rm,
13498                              unop(Iop_NegF64, getDReg(dD)),
13499                              unop(Iop_NegF64,
13500                                   triop(Iop_MulF64, rm, getDReg(dN),
13501                                                         getDReg(dM)))),
13502                        condT);
13503            DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13504            goto decode_success_vfp;
13505         case BITS4(0,1,0,0): /* MUL: n * m */
13506            putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
13507                        condT);
13508            DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13509            goto decode_success_vfp;
13510         case BITS4(0,1,0,1): /* NMUL: - n * m */
13511            putDReg(dD, unop(Iop_NegF64,
13512                             triop(Iop_MulF64, rm, getDReg(dN),
13513                                                   getDReg(dM))),
13514                    condT);
13515            DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13516            goto decode_success_vfp;
13517         case BITS4(0,1,1,0): /* ADD: n + m */
13518            putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
13519                        condT);
13520            DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13521            goto decode_success_vfp;
13522         case BITS4(0,1,1,1): /* SUB: n - m */
13523            putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
13524                        condT);
13525            DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13526            goto decode_success_vfp;
13527         case BITS4(1,0,0,0): /* DIV: n / m */
13528            putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
13529                        condT);
13530            DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13531            goto decode_success_vfp;
13532         case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
13533            /* XXXROUNDINGFIXME look up ARM reference for fused
13534               multiply-add rounding */
13535            putDReg(dD, triop(Iop_AddF64, rm,
13536                              unop(Iop_NegF64, getDReg(dD)),
13537                              triop(Iop_MulF64, rm,
13538                                                getDReg(dN),
13539                                                getDReg(dM))),
13540                        condT);
13541            DIP("vfnmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13542            goto decode_success_vfp;
13543         case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
13544            /* XXXROUNDINGFIXME look up ARM reference for fused
13545               multiply-add rounding */
13546            putDReg(dD, triop(Iop_AddF64, rm,
13547                              unop(Iop_NegF64, getDReg(dD)),
13548                              triop(Iop_MulF64, rm,
13549                                                unop(Iop_NegF64, getDReg(dN)),
13550                                                getDReg(dM))),
13551                        condT);
13552            DIP("vfnmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13553            goto decode_success_vfp;
13554         case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
13555            /* XXXROUNDINGFIXME look up ARM reference for fused
13556               multiply-add rounding */
13557            putDReg(dD, triop(Iop_AddF64, rm,
13558                              getDReg(dD),
13559                              triop(Iop_MulF64, rm, getDReg(dN),
13560                                                    getDReg(dM))),
13561                        condT);
13562            DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13563            goto decode_success_vfp;
13564         case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
13565            /* XXXROUNDINGFIXME look up ARM reference for fused
13566               multiply-add rounding */
13567            putDReg(dD, triop(Iop_AddF64, rm,
13568                              getDReg(dD),
13569                              triop(Iop_MulF64, rm,
13570                                    unop(Iop_NegF64, getDReg(dN)),
13571                                    getDReg(dM))),
13572                        condT);
13573            DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13574            goto decode_success_vfp;
13575         default:
13576            break;
13577      }
13578   }
13579
13580   /* --------------------- compares (D) --------------------- */
13581   /*          31   27   23   19   15 11   7    3
13582                 28   24   20   16 12    8    4    0
13583      FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
13584      FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
13585      FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
13586      FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
13587                                 Z         N
13588
13589      Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
13590      Z=1 Compare Dd vs zero
13591
13592      N=1 generates Invalid Operation exn if either arg is any kind of NaN
13593      N=0 generates Invalid Operation exn if either arg is a signalling NaN
13594      (Not that we pay any attention to N here)
13595   */
13596   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13597       && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
13598       && BITS4(1,0,1,1) == INSN(11,8)
13599       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13600      UInt bZ = (insn28 >> 16) & 1;
13601      UInt bN = (insn28 >> 7) & 1;
13602      UInt dD = INSN(15,12) | (INSN(22,22) << 4);
13603      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
13604      if (bZ && INSN(3,0) != 0) {
13605         /* does not decode; fall through */
13606      } else {
13607         IRTemp argL = newTemp(Ity_F64);
13608         IRTemp argR = newTemp(Ity_F64);
13609         IRTemp irRes = newTemp(Ity_I32);
13610         assign(argL, getDReg(dD));
13611         assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
13612         assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
13613
13614         IRTemp nzcv     = IRTemp_INVALID;
13615         IRTemp oldFPSCR = newTemp(Ity_I32);
13616         IRTemp newFPSCR = newTemp(Ity_I32);
13617
13618         /* This is where the fun starts.  We have to convert 'irRes'
13619            from an IR-convention return result (IRCmpF64Result) to an
13620            ARM-encoded (N,Z,C,V) group.  The final result is in the
13621            bottom 4 bits of 'nzcv'. */
13622         /* Map compare result from IR to ARM(nzcv) */
13623         /*
13624            FP cmp result | IR   | ARM(nzcv)
13625            --------------------------------
13626            UN              0x45   0011
13627            LT              0x01   1000
13628            GT              0x00   0010
13629            EQ              0x40   0110
13630         */
13631         nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13632
13633         /* And update FPSCR accordingly */
13634         assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
13635         assign(newFPSCR,
13636                binop(Iop_Or32,
13637                      binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
13638                      binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
13639
13640         putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
13641
13642         if (bZ) {
13643            DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
13644         } else {
13645            DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
13646         }
13647         goto decode_success_vfp;
13648      }
13649      /* fall through */
13650   }
13651
13652   /* --------------------- unary (D) --------------------- */
13653   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13654       && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
13655       && BITS4(1,0,1,1) == INSN(11,8)
13656       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13657      UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
13658      UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
13659      UInt b16 = (insn28 >> 16) & 1;
13660      UInt b7  = (insn28 >> 7) & 1;
13661      /**/ if (b16 == 0 && b7 == 0) {
13662         // FCPYD
13663         putDReg(dD, getDReg(dM), condT);
13664         DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
13665         goto decode_success_vfp;
13666      }
13667      else if (b16 == 0 && b7 == 1) {
13668         // FABSD
13669         putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
13670         DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
13671         goto decode_success_vfp;
13672      }
13673      else if (b16 == 1 && b7 == 0) {
13674         // FNEGD
13675         putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
13676         DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
13677         goto decode_success_vfp;
13678      }
13679      else if (b16 == 1 && b7 == 1) {
13680         // FSQRTD
13681         IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
13682         putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
13683         DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
13684         goto decode_success_vfp;
13685      }
13686      else
13687         vassert(0);
13688
13689      /* fall through */
13690   }
13691
13692   /* ----------------- I <-> D conversions ----------------- */
13693
13694   // F{S,U}ITOD dD, fM
13695   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13696       && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
13697       && BITS4(1,0,1,1) == INSN(11,8)
13698       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13699      UInt bM    = (insn28 >> 5) & 1;
13700      UInt fM    = (INSN(3,0) << 1) | bM;
13701      UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
13702      UInt syned = (insn28 >> 7) & 1;
13703      if (syned) {
13704         // FSITOD
13705         putDReg(dD, unop(Iop_I32StoF64,
13706                          unop(Iop_ReinterpF32asI32, getFReg(fM))),
13707                 condT);
13708         DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
13709      } else {
13710         // FUITOD
13711         putDReg(dD, unop(Iop_I32UtoF64,
13712                          unop(Iop_ReinterpF32asI32, getFReg(fM))),
13713                 condT);
13714         DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
13715      }
13716      goto decode_success_vfp;
13717   }
13718
13719   // FTO{S,U}ID fD, dM
13720   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13721       && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
13722       && BITS4(1,0,1,1) == INSN(11,8)
13723       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13724      UInt   bD    = (insn28 >> 22) & 1;
13725      UInt   fD    = (INSN(15,12) << 1) | bD;
13726      UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
13727      UInt   bZ    = (insn28 >> 7) & 1;
13728      UInt   syned = (insn28 >> 16) & 1;
13729      IRTemp rmode = newTemp(Ity_I32);
13730      assign(rmode, bZ ? mkU32(Irrm_ZERO)
13731                       : mkexpr(mk_get_IR_rounding_mode()));
13732      if (syned) {
13733         // FTOSID
13734         putFReg(fD, unop(Iop_ReinterpI32asF32,
13735                          binop(Iop_F64toI32S, mkexpr(rmode),
13736                                getDReg(dM))),
13737                 condT);
13738         DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
13739             nCC(conq), fD, dM);
13740      } else {
13741         // FTOUID
13742         putFReg(fD, unop(Iop_ReinterpI32asF32,
13743                          binop(Iop_F64toI32U, mkexpr(rmode),
13744                                getDReg(dM))),
13745                 condT);
13746         DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
13747             nCC(conq), fD, dM);
13748      }
13749      goto decode_success_vfp;
13750   }
13751
13752   /* ----------------------------------------------------------- */
13753   /* -- VFP instructions -- single precision                  -- */
13754   /* ----------------------------------------------------------- */
13755
13756   /* --------------------- fldms, fstms --------------------- */
13757   /*
13758                                 31   27   23   19 15 11   7   0
13759                                         P UDWL
13760      C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
13761      C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
13762      C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
13763
13764      C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
13765      C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
13766      C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
13767
13768      Regs transferred: F(Fd:D) .. F(Fd:d + offset)
13769      offset must not imply a reg > 15
13770      IA/DB: Rn is changed by (4 x # regs transferred)
13771
13772      case coding:
13773         1  at-Rn   (access at Rn)
13774         2  ia-Rn   (access at Rn, then Rn += 4n)
13775         3  db-Rn   (Rn -= 4n,     then access at Rn)
13776   */
13777   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
13778       && INSN(11,8) == BITS4(1,0,1,0)) {
13779      UInt bP      = (insn28 >> 24) & 1;
13780      UInt bU      = (insn28 >> 23) & 1;
13781      UInt bW      = (insn28 >> 21) & 1;
13782      UInt bL      = (insn28 >> 20) & 1;
13783      UInt bD      = (insn28 >> 22) & 1;
13784      UInt offset  = (insn28 >> 0) & 0xFF;
13785      UInt rN      = INSN(19,16);
13786      UInt fD      = (INSN(15,12) << 1) | bD;
13787      UInt nRegs   = offset;
13788      UInt summary = 0;
13789      Int  i;
13790
13791      /**/ if (bP == 0 && bU == 1 && bW == 0) {
13792         summary = 1;
13793      }
13794      else if (bP == 0 && bU == 1 && bW == 1) {
13795         summary = 2;
13796      }
13797      else if (bP == 1 && bU == 0 && bW == 1) {
13798         summary = 3;
13799      }
13800      else goto after_vfp_fldms_fstms;
13801
13802      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
13803      if (rN == 15 && (summary == 2 || summary == 3 || isT))
13804         goto after_vfp_fldms_fstms;
13805
13806      /* offset must specify at least one register */
13807      if (offset < 1)
13808         goto after_vfp_fldms_fstms;
13809
13810      /* can't transfer regs after S31 */
13811      if (fD + nRegs - 1 >= 32)
13812         goto after_vfp_fldms_fstms;
13813
13814      /* Now, we can't do a conditional load or store, since that very
13815         likely will generate an exception.  So we have to take a side
13816         exit at this point if the condition is false. */
13817      if (condT != IRTemp_INVALID) {
13818         if (isT)
13819            mk_skip_over_T32_if_cond_is_false( condT );
13820         else
13821            mk_skip_over_A32_if_cond_is_false( condT );
13822         condT = IRTemp_INVALID;
13823      }
13824      /* Ok, now we're unconditional.  Do the load or store. */
13825
13826      /* get the old Rn value */
13827      IRTemp rnT = newTemp(Ity_I32);
13828      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
13829                           rN == 15));
13830
13831      /* make a new value for Rn, post-insn */
13832      IRTemp rnTnew = IRTemp_INVALID;
13833      if (summary == 2 || summary == 3) {
13834         rnTnew = newTemp(Ity_I32);
13835         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
13836                              mkexpr(rnT),
13837                              mkU32(4 * nRegs)));
13838      }
13839
13840      /* decide on the base transfer address */
13841      IRTemp taT = newTemp(Ity_I32);
13842      assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
13843
13844      /* update Rn if necessary -- in case 3, we're moving it down, so
13845         update before any memory reference, in order to keep Memcheck
13846         and V's stack-extending logic (on linux) happy */
13847      if (summary == 3) {
13848         if (isT)
13849            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13850         else
13851            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13852      }
13853
13854      /* generate the transfers */
13855      for (i = 0; i < nRegs; i++) {
13856         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
13857         if (bL) {
13858            putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
13859         } else {
13860            storeLE(addr, getFReg(fD + i));
13861         }
13862      }
13863
13864      /* update Rn if necessary -- in case 2, we're moving it up, so
13865         update after any memory reference, in order to keep Memcheck
13866         and V's stack-extending logic (on linux) happy */
13867      if (summary == 2) {
13868         if (isT)
13869            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13870         else
13871            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13872      }
13873
13874      const HChar* nm = bL==1 ? "ld" : "st";
13875      switch (summary) {
13876         case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
13877                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
13878                  break;
13879         case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
13880                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
13881                  break;
13882         case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
13883                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
13884                  break;
13885         default: vassert(0);
13886      }
13887
13888      goto decode_success_vfp;
13889      /* FIXME alignment constraints? */
13890   }
13891
13892  after_vfp_fldms_fstms:
13893
13894   /* --------------------- fmsr, fmrs --------------------- */
13895   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
13896       && BITS4(1,0,1,0) == INSN(11,8)
13897       && BITS4(0,0,0,0) == INSN(3,0)
13898       && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
13899      UInt rD  = INSN(15,12);
13900      UInt b7  = (insn28 >> 7) & 1;
13901      UInt fN  = (INSN(19,16) << 1) | b7;
13902      UInt b20 = (insn28 >> 20) & 1;
13903      if (rD == 15) {
13904         /* fall through */
13905         /* Let's assume that no sane person would want to do
13906            floating-point transfers to or from the program counter,
13907            and simply decline to decode the instruction.  The ARM ARM
13908            doesn't seem to explicitly disallow this case, though. */
13909      } else {
13910         if (b20) {
13911            IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
13912            if (isT)
13913               putIRegT(rD, res, condT);
13914            else
13915               putIRegA(rD, res, condT, Ijk_Boring);
13916            DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
13917         } else {
13918            putFReg(fN, unop(Iop_ReinterpI32asF32,
13919                             isT ? getIRegT(rD) : getIRegA(rD)),
13920                        condT);
13921            DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
13922         }
13923         goto decode_success_vfp;
13924      }
13925      /* fall through */
13926   }
13927
13928   /* --------------------- f{ld,st}s --------------------- */
13929   // FLDS, FSTS
13930   if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
13931       && BITS4(1,0,1,0) == INSN(11,8)) {
13932      UInt bD     = (insn28 >> 22) & 1;
13933      UInt fD     = (INSN(15,12) << 1) | bD;
13934      UInt rN     = INSN(19,16);
13935      UInt offset = (insn28 & 0xFF) << 2;
13936      UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
13937      UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
13938      /* make unconditional */
13939      if (condT != IRTemp_INVALID) {
13940         if (isT)
13941            mk_skip_over_T32_if_cond_is_false( condT );
13942         else
13943            mk_skip_over_A32_if_cond_is_false( condT );
13944         condT = IRTemp_INVALID;
13945      }
13946      IRTemp ea = newTemp(Ity_I32);
13947      assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
13948                       align4if(isT ? getIRegT(rN) : getIRegA(rN),
13949                                rN == 15),
13950                       mkU32(offset)));
13951      if (bL) {
13952         putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
13953      } else {
13954         storeLE(mkexpr(ea), getFReg(fD));
13955      }
13956      DIP("f%ss%s s%u, [r%u, %c#%u]\n",
13957          bL ? "ld" : "st", nCC(conq), fD, rN,
13958          bU ? '+' : '-', offset);
13959      goto decode_success_vfp;
13960   }
13961
13962   /* --------------------- dp insns (F) --------------------- */
13963   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
13964       && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
13965       && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
13966      UInt    bM  = (insn28 >> 5) & 1;
13967      UInt    bD  = (insn28 >> 22) & 1;
13968      UInt    bN  = (insn28 >> 7) & 1;
13969      UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
13970      UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
13971      UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
13972      UInt    bP  = (insn28 >> 23) & 1;
13973      UInt    bQ  = (insn28 >> 21) & 1;
13974      UInt    bR  = (insn28 >> 20) & 1;
13975      UInt    bS  = (insn28 >> 6) & 1;
13976      UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
13977      IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
13978      switch (opc) {
13979         case BITS4(0,0,0,0): /* MAC: d + n * m */
13980            putFReg(fD, triop(Iop_AddF32, rm,
13981                              getFReg(fD),
13982                              triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
13983                        condT);
13984            DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13985            goto decode_success_vfp;
13986         case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
13987            putFReg(fD, triop(Iop_AddF32, rm,
13988                              getFReg(fD),
13989                              unop(Iop_NegF32,
13990                                   triop(Iop_MulF32, rm, getFReg(fN),
13991                                                         getFReg(fM)))),
13992                        condT);
13993            DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13994            goto decode_success_vfp;
13995         case BITS4(0,0,1,0): /* MSC: - d + n * m */
13996            putFReg(fD, triop(Iop_AddF32, rm,
13997                              unop(Iop_NegF32, getFReg(fD)),
13998                              triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
13999                        condT);
14000            DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14001            goto decode_success_vfp;
14002         case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
14003            putFReg(fD, triop(Iop_AddF32, rm,
14004                              unop(Iop_NegF32, getFReg(fD)),
14005                              unop(Iop_NegF32,
14006                                   triop(Iop_MulF32, rm,
14007                                                     getFReg(fN),
14008                                                    getFReg(fM)))),
14009                        condT);
14010            DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14011            goto decode_success_vfp;
14012         case BITS4(0,1,0,0): /* MUL: n * m */
14013            putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
14014                        condT);
14015            DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14016            goto decode_success_vfp;
14017         case BITS4(0,1,0,1): /* NMUL: - n * m */
14018            putFReg(fD, unop(Iop_NegF32,
14019                             triop(Iop_MulF32, rm, getFReg(fN),
14020                                                   getFReg(fM))),
14021                    condT);
14022            DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14023            goto decode_success_vfp;
14024         case BITS4(0,1,1,0): /* ADD: n + m */
14025            putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
14026                        condT);
14027            DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14028            goto decode_success_vfp;
14029         case BITS4(0,1,1,1): /* SUB: n - m */
14030            putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
14031                        condT);
14032            DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14033            goto decode_success_vfp;
14034         case BITS4(1,0,0,0): /* DIV: n / m */
14035            putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
14036                        condT);
14037            DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14038            goto decode_success_vfp;
14039         case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
14040            /* XXXROUNDINGFIXME look up ARM reference for fused
14041               multiply-add rounding */
14042            putFReg(fD, triop(Iop_AddF32, rm,
14043                              unop(Iop_NegF32, getFReg(fD)),
14044                              triop(Iop_MulF32, rm,
14045                                                getFReg(fN),
14046                                                getFReg(fM))),
14047                        condT);
14048            DIP("vfnmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14049            goto decode_success_vfp;
14050         case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
14051            /* XXXROUNDINGFIXME look up ARM reference for fused
14052               multiply-add rounding */
14053            putFReg(fD, triop(Iop_AddF32, rm,
14054                              unop(Iop_NegF32, getFReg(fD)),
14055                              triop(Iop_MulF32, rm,
14056                                                unop(Iop_NegF32, getFReg(fN)),
14057                                                getFReg(fM))),
14058                        condT);
14059            DIP("vfnmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14060            goto decode_success_vfp;
14061         case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
14062            /* XXXROUNDINGFIXME look up ARM reference for fused
14063               multiply-add rounding */
14064            putFReg(fD, triop(Iop_AddF32, rm,
14065                              getFReg(fD),
14066                              triop(Iop_MulF32, rm, getFReg(fN),
14067                                                    getFReg(fM))),
14068                        condT);
14069            DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14070            goto decode_success_vfp;
14071         case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
14072            /* XXXROUNDINGFIXME look up ARM reference for fused
14073               multiply-add rounding */
14074            putFReg(fD, triop(Iop_AddF32, rm,
14075                              getFReg(fD),
14076                              triop(Iop_MulF32, rm,
14077                                    unop(Iop_NegF32, getFReg(fN)),
14078                                    getFReg(fM))),
14079                        condT);
14080            DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
14081            goto decode_success_vfp;
14082         default:
14083            break;
14084      }
14085   }
14086
14087   /* --------------------- compares (S) --------------------- */
14088   /*          31   27   23   19   15 11   7    3
14089                 28   24   20   16 12    8    4    0
14090      FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
14091      FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
14092      FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
14093      FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
14094                                 Z         N
14095
14096      Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
14097      Z=1 Compare Fd:D vs zero
14098
14099      N=1 generates Invalid Operation exn if either arg is any kind of NaN
14100      N=0 generates Invalid Operation exn if either arg is a signalling NaN
14101      (Not that we pay any attention to N here)
14102   */
14103   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14104       && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
14105       && BITS4(1,0,1,0) == INSN(11,8)
14106       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14107      UInt bZ = (insn28 >> 16) & 1;
14108      UInt bN = (insn28 >> 7) & 1;
14109      UInt bD = (insn28 >> 22) & 1;
14110      UInt bM = (insn28 >> 5) & 1;
14111      UInt fD = (INSN(15,12) << 1) | bD;
14112      UInt fM = (INSN(3,0) << 1) | bM;
14113      if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
14114         /* does not decode; fall through */
14115      } else {
14116         IRTemp argL = newTemp(Ity_F64);
14117         IRTemp argR = newTemp(Ity_F64);
14118         IRTemp irRes = newTemp(Ity_I32);
14119
14120         assign(argL, unop(Iop_F32toF64, getFReg(fD)));
14121         assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
14122                         : unop(Iop_F32toF64, getFReg(fM)));
14123         assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
14124
14125         IRTemp nzcv     = IRTemp_INVALID;
14126         IRTemp oldFPSCR = newTemp(Ity_I32);
14127         IRTemp newFPSCR = newTemp(Ity_I32);
14128
14129         /* This is where the fun starts.  We have to convert 'irRes'
14130            from an IR-convention return result (IRCmpF64Result) to an
14131            ARM-encoded (N,Z,C,V) group.  The final result is in the
14132            bottom 4 bits of 'nzcv'. */
14133         /* Map compare result from IR to ARM(nzcv) */
14134         /*
14135            FP cmp result | IR   | ARM(nzcv)
14136            --------------------------------
14137            UN              0x45   0011
14138            LT              0x01   1000
14139            GT              0x00   0010
14140            EQ              0x40   0110
14141         */
14142         nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
14143
14144         /* And update FPSCR accordingly */
14145         assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
14146         assign(newFPSCR,
14147                binop(Iop_Or32,
14148                      binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
14149                      binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
14150
14151         putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
14152
14153         if (bZ) {
14154            DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
14155         } else {
14156            DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
14157                nCC(conq), fD, fM);
14158         }
14159         goto decode_success_vfp;
14160      }
14161      /* fall through */
14162   }
14163
14164   /* --------------------- unary (S) --------------------- */
14165   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14166       && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
14167       && BITS4(1,0,1,0) == INSN(11,8)
14168       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14169      UInt bD = (insn28 >> 22) & 1;
14170      UInt bM = (insn28 >> 5) & 1;
14171      UInt fD  = (INSN(15,12) << 1) | bD;
14172      UInt fM  = (INSN(3,0) << 1) | bM;
14173      UInt b16 = (insn28 >> 16) & 1;
14174      UInt b7  = (insn28 >> 7) & 1;
14175      /**/ if (b16 == 0 && b7 == 0) {
14176         // FCPYS
14177         putFReg(fD, getFReg(fM), condT);
14178         DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
14179         goto decode_success_vfp;
14180      }
14181      else if (b16 == 0 && b7 == 1) {
14182         // FABSS
14183         putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
14184         DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
14185         goto decode_success_vfp;
14186      }
14187      else if (b16 == 1 && b7 == 0) {
14188         // FNEGS
14189         putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
14190         DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
14191         goto decode_success_vfp;
14192      }
14193      else if (b16 == 1 && b7 == 1) {
14194         // FSQRTS
14195         IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
14196         putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
14197         DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
14198         goto decode_success_vfp;
14199      }
14200      else
14201         vassert(0);
14202
14203      /* fall through */
14204   }
14205
14206   /* ----------------- I <-> S conversions ----------------- */
14207
14208   // F{S,U}ITOS fD, fM
14209   /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
14210      bit int will always fit within the 53 bit mantissa, so there's
14211      no possibility of a loss of precision, but that's obviously not
14212      the case here.  Hence this case possibly requires rounding, and
14213      so it drags in the current rounding mode. */
14214   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14215       && BITS4(1,0,0,0) == INSN(19,16)
14216       && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
14217       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14218      UInt bM    = (insn28 >> 5) & 1;
14219      UInt bD    = (insn28 >> 22) & 1;
14220      UInt fM    = (INSN(3,0) << 1) | bM;
14221      UInt fD    = (INSN(15,12) << 1) | bD;
14222      UInt syned = (insn28 >> 7) & 1;
14223      IRTemp rmode = newTemp(Ity_I32);
14224      assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
14225      if (syned) {
14226         // FSITOS
14227         putFReg(fD, binop(Iop_F64toF32,
14228                           mkexpr(rmode),
14229                           unop(Iop_I32StoF64,
14230                                unop(Iop_ReinterpF32asI32, getFReg(fM)))),
14231                 condT);
14232         DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
14233      } else {
14234         // FUITOS
14235         putFReg(fD, binop(Iop_F64toF32,
14236                           mkexpr(rmode),
14237                           unop(Iop_I32UtoF64,
14238                                unop(Iop_ReinterpF32asI32, getFReg(fM)))),
14239                 condT);
14240         DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
14241      }
14242      goto decode_success_vfp;
14243   }
14244
14245   // FTO{S,U}IS fD, fM
14246   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14247       && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
14248       && BITS4(1,0,1,0) == INSN(11,8)
14249       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14250      UInt   bM    = (insn28 >> 5) & 1;
14251      UInt   bD    = (insn28 >> 22) & 1;
14252      UInt   fD    = (INSN(15,12) << 1) | bD;
14253      UInt   fM    = (INSN(3,0) << 1) | bM;
14254      UInt   bZ    = (insn28 >> 7) & 1;
14255      UInt   syned = (insn28 >> 16) & 1;
14256      IRTemp rmode = newTemp(Ity_I32);
14257      assign(rmode, bZ ? mkU32(Irrm_ZERO)
14258                       : mkexpr(mk_get_IR_rounding_mode()));
14259      if (syned) {
14260         // FTOSIS
14261         putFReg(fD, unop(Iop_ReinterpI32asF32,
14262                          binop(Iop_F64toI32S, mkexpr(rmode),
14263                                unop(Iop_F32toF64, getFReg(fM)))),
14264                 condT);
14265         DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
14266             nCC(conq), fD, fM);
14267         goto decode_success_vfp;
14268      } else {
14269         // FTOUIS
14270         putFReg(fD, unop(Iop_ReinterpI32asF32,
14271                          binop(Iop_F64toI32U, mkexpr(rmode),
14272                                unop(Iop_F32toF64, getFReg(fM)))),
14273                 condT);
14274         DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
14275             nCC(conq), fD, fM);
14276         goto decode_success_vfp;
14277      }
14278   }
14279
14280   /* ----------------- S <-> D conversions ----------------- */
14281
14282   // FCVTDS
14283   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14284       && BITS4(0,1,1,1) == INSN(19,16)
14285       && BITS4(1,0,1,0) == INSN(11,8)
14286       && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
14287      UInt dD = INSN(15,12) | (INSN(22,22) << 4);
14288      UInt bM = (insn28 >> 5) & 1;
14289      UInt fM = (INSN(3,0) << 1) | bM;
14290      putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
14291      DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
14292      goto decode_success_vfp;
14293   }
14294
14295   // FCVTSD
14296   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14297       && BITS4(0,1,1,1) == INSN(19,16)
14298       && BITS4(1,0,1,1) == INSN(11,8)
14299       && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
14300      UInt   bD    = (insn28 >> 22) & 1;
14301      UInt   fD    = (INSN(15,12) << 1) | bD;
14302      UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
14303      IRTemp rmode = newTemp(Ity_I32);
14304      assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
14305      putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
14306                  condT);
14307      DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
14308      goto decode_success_vfp;
14309   }
14310
14311   /* --------------- VCVT fixed<->floating, VFP --------------- */
14312   /*          31   27   23   19   15 11   7    3
14313                 28   24   20   16 12    8    4    0
14314
14315               cond 1110 1D11 1p1U Vd 101f x1i0 imm4
14316
14317      VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
14318      VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
14319      VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
14320      VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
14321      are of this form.  We only handle a subset of the cases though.
14322   */
14323   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14324       && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
14325       && BITS3(1,0,1) == INSN(11,9)
14326       && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
14327      UInt bD        = INSN(22,22);
14328      UInt bOP       = INSN(18,18);
14329      UInt bU        = INSN(16,16);
14330      UInt Vd        = INSN(15,12);
14331      UInt bSF       = INSN(8,8);
14332      UInt bSX       = INSN(7,7);
14333      UInt bI        = INSN(5,5);
14334      UInt imm4      = INSN(3,0);
14335      Bool to_fixed  = bOP == 1;
14336      Bool dp_op     = bSF == 1;
14337      Bool unsyned   = bU == 1;
14338      UInt size      = bSX == 0 ? 16 : 32;
14339      Int  frac_bits = size - ((imm4 << 1) | bI);
14340      UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
14341      if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
14342                                            && size == 32) {
14343         /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
14344         /* This generates really horrible code.  We could potentially
14345            do much better. */
14346         IRTemp rmode = newTemp(Ity_I32);
14347         assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
14348         IRTemp src32 = newTemp(Ity_I32);
14349         assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
14350         IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
14351                                mkexpr(src32 ) );
14352         IRTemp scale = newTemp(Ity_F64);
14353         assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
14354         IRExpr* rm     = mkU32(Irrm_NEAREST);
14355         IRExpr* resF64 = triop(Iop_DivF64,
14356                                rm, as_F64,
14357                                triop(Iop_AddF64, rm, mkexpr(scale),
14358                                                      mkexpr(scale)));
14359         IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
14360         putFReg(d, resF32, condT);
14361         DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
14362             unsyned ? 'u' : 's', d, d, frac_bits);
14363         goto decode_success_vfp;
14364      }
14365      if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
14366                                            && size == 32) {
14367         /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
14368         /* This generates really horrible code.  We could potentially
14369            do much better. */
14370         IRTemp src32 = newTemp(Ity_I32);
14371         assign(src32, unop(Iop_64to32, getDRegI64(d)));
14372         IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
14373                                mkexpr(src32 ) );
14374         IRTemp scale = newTemp(Ity_F64);
14375         assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
14376         IRExpr* rm     = mkU32(Irrm_NEAREST);
14377         IRExpr* resF64 = triop(Iop_DivF64,
14378                                rm, as_F64,
14379                                triop(Iop_AddF64, rm, mkexpr(scale),
14380                                                      mkexpr(scale)));
14381         putDReg(d, resF64, condT);
14382         DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
14383             unsyned ? 'u' : 's', d, d, frac_bits);
14384         goto decode_success_vfp;
14385      }
14386      if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
14387                                            && size == 32) {
14388         /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
14389         IRTemp srcF64 = newTemp(Ity_F64);
14390         assign(srcF64, getDReg(d));
14391         IRTemp scale = newTemp(Ity_F64);
14392         assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
14393         IRTemp scaledF64 = newTemp(Ity_F64);
14394         IRExpr* rm = mkU32(Irrm_NEAREST);
14395         assign(scaledF64, triop(Iop_MulF64,
14396                                 rm, mkexpr(srcF64),
14397                                 triop(Iop_AddF64, rm, mkexpr(scale),
14398                                                       mkexpr(scale))));
14399         IRTemp rmode = newTemp(Ity_I32);
14400         assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
14401         IRTemp asI32 = newTemp(Ity_I32);
14402         assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
14403                             mkexpr(rmode), mkexpr(scaledF64)));
14404         putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
14405                            mkexpr(asI32)), condT);
14406         goto decode_success_vfp;
14407      }
14408      /* fall through */
14409   }
14410
14411   /* FAILURE */
14412   return False;
14413
14414  decode_success_vfp:
14415   /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
14416      assert that we aren't accepting, in this fn, insns that actually
14417      should be handled somewhere else. */
14418   vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
14419   return True;
14420
14421#  undef INSN
14422}
14423
14424
14425/*------------------------------------------------------------*/
14426/*--- Instructions in NV (never) space                     ---*/
14427/*------------------------------------------------------------*/
14428
14429/* ARM only */
14430/* Translate a NV space instruction.  If successful, returns True and
14431   *dres may or may not be updated.  If failure, returns False and
14432   doesn't change *dres nor create any IR.
14433
14434   Note that all NEON instructions (in ARM mode) are handled through
14435   here, since they are all in NV space.
14436*/
14437static Bool decode_NV_instruction ( /*MOD*/DisResult* dres,
14438                                    const VexArchInfo* archinfo,
14439                                    UInt insn )
14440{
14441#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
14442#  define INSN_COND          SLICE_UInt(insn, 31, 28)
14443
14444   HChar dis_buf[128];
14445
14446   // Should only be called for NV instructions
14447   vassert(BITS4(1,1,1,1) == INSN_COND);
14448
14449   /* ------------------------ pld{w} ------------------------ */
14450   if (BITS8(0,1,0,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
14451       && BITS4(1,1,1,1) == INSN(15,12)) {
14452      UInt rN    = INSN(19,16);
14453      UInt imm12 = INSN(11,0);
14454      UInt bU    = INSN(23,23);
14455      UInt bR    = INSN(22,22);
14456      DIP("pld%c [r%u, #%c%u]\n", bR ? ' ' : 'w', rN, bU ? '+' : '-', imm12);
14457      return True;
14458   }
14459
14460   if (BITS8(0,1,1,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
14461       && BITS4(1,1,1,1) == INSN(15,12)
14462       && 0 == INSN(4,4)) {
14463      UInt rN   = INSN(19,16);
14464      UInt rM   = INSN(3,0);
14465      UInt imm5 = INSN(11,7);
14466      UInt sh2  = INSN(6,5);
14467      UInt bU   = INSN(23,23);
14468      UInt bR   = INSN(22,22);
14469      if (rM != 15 && (rN != 15 || bR)) {
14470         IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
14471                                                       sh2, imm5, dis_buf);
14472         IRTemp eaT = newTemp(Ity_I32);
14473         /* Bind eaE to a temp merely for debugging-vex purposes, so we
14474            can check it's a plausible decoding.  It will get removed
14475            by iropt a little later on. */
14476         vassert(eaE);
14477         assign(eaT, eaE);
14478         DIP("pld%c %s\n", bR ? ' ' : 'w', dis_buf);
14479         return True;
14480      }
14481      /* fall through */
14482   }
14483
14484   /* ------------------------ pli ------------------------ */
14485   if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
14486       && BITS4(1,1,1,1) == INSN(15,12)) {
14487      UInt rN    = INSN(19,16);
14488      UInt imm12 = INSN(11,0);
14489      UInt bU    = INSN(23,23);
14490      DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
14491      return True;
14492   }
14493
14494   /* --------------------- Interworking branches --------------------- */
14495
14496   // BLX (1), viz, unconditional branch and link to R15+simm24
14497   // and set CPSR.T = 1, that is, switch to Thumb mode
14498   if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
14499      UInt bitH   = INSN(24,24);
14500      Int  uimm24 = INSN(23,0);
14501      Int  simm24 = (((uimm24 << 8) >> 8) << 2) + (bitH << 1);
14502      /* Now this is a bit tricky.  Since we're decoding an ARM insn,
14503         it is implies that CPSR.T == 0.  Hence the current insn's
14504         address is guaranteed to be of the form X--(30)--X00.  So, no
14505         need to mask any bits off it.  But need to set the lowest bit
14506         to 1 to denote we're in Thumb mode after this, since
14507         guest_R15T has CPSR.T as the lowest bit.  And we can't chase
14508         into the call, so end the block at this point. */
14509      UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
14510      putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
14511                    IRTemp_INVALID/*because AL*/, Ijk_Boring );
14512      llPutIReg(15, mkU32(dst));
14513      dres->jk_StopHere = Ijk_Call;
14514      dres->whatNext    = Dis_StopHere;
14515      DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
14516      return True;
14517   }
14518
14519   /* ------------------- v7 barrier insns ------------------- */
14520   switch (insn) {
14521      case 0xF57FF06F: /* ISB */
14522         stmt( IRStmt_MBE(Imbe_Fence) );
14523         DIP("ISB\n");
14524         return True;
14525      case 0xF57FF04F: /* DSB sy */
14526      case 0xF57FF04E: /* DSB st */
14527      case 0xF57FF04B: /* DSB ish */
14528      case 0xF57FF04A: /* DSB ishst */
14529      case 0xF57FF047: /* DSB nsh */
14530      case 0xF57FF046: /* DSB nshst */
14531      case 0xF57FF043: /* DSB osh */
14532      case 0xF57FF042: /* DSB oshst */
14533         stmt( IRStmt_MBE(Imbe_Fence) );
14534         DIP("DSB\n");
14535         return True;
14536      case 0xF57FF05F: /* DMB sy */
14537      case 0xF57FF05E: /* DMB st */
14538      case 0xF57FF05B: /* DMB ish */
14539      case 0xF57FF05A: /* DMB ishst */
14540      case 0xF57FF057: /* DMB nsh */
14541      case 0xF57FF056: /* DMB nshst */
14542      case 0xF57FF053: /* DMB osh */
14543      case 0xF57FF052: /* DMB oshst */
14544         stmt( IRStmt_MBE(Imbe_Fence) );
14545         DIP("DMB\n");
14546         return True;
14547      default:
14548         break;
14549   }
14550
14551   /* ------------------- CLREX ------------------ */
14552   if (insn == 0xF57FF01F) {
14553      /* AFAICS, this simply cancels a (all?) reservations made by a
14554         (any?) preceding LDREX(es).  Arrange to hand it through to
14555         the back end. */
14556      stmt( IRStmt_MBE(Imbe_CancelReservation) );
14557      DIP("clrex\n");
14558      return True;
14559   }
14560
14561   /* ------------------- NEON ------------------- */
14562   if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
14563      Bool ok_neon = decode_NEON_instruction(
14564                        dres, insn, IRTemp_INVALID/*unconditional*/,
14565                        False/*!isT*/
14566                     );
14567      if (ok_neon)
14568         return True;
14569   }
14570
14571   // unrecognised
14572   return False;
14573
14574#  undef INSN_COND
14575#  undef INSN
14576}
14577
14578
14579/*------------------------------------------------------------*/
14580/*--- Disassemble a single ARM instruction                 ---*/
14581/*------------------------------------------------------------*/
14582
14583/* Disassemble a single ARM instruction into IR.  The instruction is
14584   located in host memory at guest_instr, and has (decoded) guest IP
14585   of guest_R15_curr_instr_notENC, which will have been set before the
14586   call here. */
14587
14588static
14589DisResult disInstr_ARM_WRK (
14590             Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
14591             Bool         resteerCisOk,
14592             void*        callback_opaque,
14593             const UChar* guest_instr,
14594             const VexArchInfo* archinfo,
14595             const VexAbiInfo*  abiinfo,
14596             Bool         sigill_diag
14597          )
14598{
14599   // A macro to fish bits out of 'insn'.
14600#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
14601#  define INSN_COND          SLICE_UInt(insn, 31, 28)
14602
14603   DisResult dres;
14604   UInt      insn;
14605   //Bool      allow_VFP = False;
14606   //UInt      hwcaps = archinfo->hwcaps;
14607   IRTemp    condT; /* :: Ity_I32 */
14608   UInt      summary;
14609   HChar     dis_buf[128];  // big enough to hold LDMIA etc text
14610
14611   /* What insn variants are we supporting today? */
14612   //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14613   // etc etc
14614
14615   /* Set result defaults. */
14616   dres.whatNext    = Dis_Continue;
14617   dres.len         = 4;
14618   dres.continueAt  = 0;
14619   dres.jk_StopHere = Ijk_INVALID;
14620
14621   /* Set default actions for post-insn handling of writes to r15, if
14622      required. */
14623   r15written = False;
14624   r15guard   = IRTemp_INVALID; /* unconditional */
14625   r15kind    = Ijk_Boring;
14626
14627   /* At least this is simple on ARM: insns are all 4 bytes long, and
14628      4-aligned.  So just fish the whole thing out of memory right now
14629      and have done. */
14630   insn = getUIntLittleEndianly( guest_instr );
14631
14632   if (0) vex_printf("insn: 0x%x\n", insn);
14633
14634   DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
14635
14636   vassert(0 == (guest_R15_curr_instr_notENC & 3));
14637
14638   /* ----------------------------------------------------------- */
14639
14640   /* Spot "Special" instructions (see comment at top of file). */
14641   {
14642      const UChar* code = guest_instr;
14643      /* Spot the 16-byte preamble:
14644
14645         e1a0c1ec  mov r12, r12, ROR #3
14646         e1a0c6ec  mov r12, r12, ROR #13
14647         e1a0ceec  mov r12, r12, ROR #29
14648         e1a0c9ec  mov r12, r12, ROR #19
14649      */
14650      UInt word1 = 0xE1A0C1EC;
14651      UInt word2 = 0xE1A0C6EC;
14652      UInt word3 = 0xE1A0CEEC;
14653      UInt word4 = 0xE1A0C9EC;
14654      if (getUIntLittleEndianly(code+ 0) == word1 &&
14655          getUIntLittleEndianly(code+ 4) == word2 &&
14656          getUIntLittleEndianly(code+ 8) == word3 &&
14657          getUIntLittleEndianly(code+12) == word4) {
14658         /* Got a "Special" instruction preamble.  Which one is it? */
14659         if (getUIntLittleEndianly(code+16) == 0xE18AA00A
14660                                               /* orr r10,r10,r10 */) {
14661            /* R3 = client_request ( R4 ) */
14662            DIP("r3 = client_request ( %%r4 )\n");
14663            llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
14664            dres.jk_StopHere = Ijk_ClientReq;
14665            dres.whatNext    = Dis_StopHere;
14666            goto decode_success;
14667         }
14668         else
14669         if (getUIntLittleEndianly(code+16) == 0xE18BB00B
14670                                               /* orr r11,r11,r11 */) {
14671            /* R3 = guest_NRADDR */
14672            DIP("r3 = guest_NRADDR\n");
14673            dres.len = 20;
14674            llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
14675            goto decode_success;
14676         }
14677         else
14678         if (getUIntLittleEndianly(code+16) == 0xE18CC00C
14679                                               /* orr r12,r12,r12 */) {
14680            /*  branch-and-link-to-noredir R4 */
14681            DIP("branch-and-link-to-noredir r4\n");
14682            llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
14683            llPutIReg(15, llGetIReg(4));
14684            dres.jk_StopHere = Ijk_NoRedir;
14685            dres.whatNext    = Dis_StopHere;
14686            goto decode_success;
14687         }
14688         else
14689         if (getUIntLittleEndianly(code+16) == 0xE1899009
14690                                               /* orr r9,r9,r9 */) {
14691            /* IR injection */
14692            DIP("IR injection\n");
14693            vex_inject_ir(irsb, Iend_LE);
14694            // Invalidate the current insn. The reason is that the IRop we're
14695            // injecting here can change. In which case the translation has to
14696            // be redone. For ease of handling, we simply invalidate all the
14697            // time.
14698            stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
14699            stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
14700            llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
14701            dres.whatNext    = Dis_StopHere;
14702            dres.jk_StopHere = Ijk_InvalICache;
14703            goto decode_success;
14704         }
14705         /* We don't know what it is.  Set opc1/opc2 so decode_failure
14706            can print the insn following the Special-insn preamble. */
14707         insn = getUIntLittleEndianly(code+16);
14708         goto decode_failure;
14709         /*NOTREACHED*/
14710      }
14711
14712   }
14713
14714   /* ----------------------------------------------------------- */
14715
14716   /* Main ARM instruction decoder starts here. */
14717
14718   /* Deal with the condition.  Strategy is to merely generate a
14719      condition temporary at this point (or IRTemp_INVALID, meaning
14720      unconditional).  We leave it to lower-level instruction decoders
14721      to decide whether they can generate straight-line code, or
14722      whether they must generate a side exit before the instruction.
14723      condT :: Ity_I32 and is always either zero or one. */
14724   condT = IRTemp_INVALID;
14725   switch ( (ARMCondcode)INSN_COND ) {
14726      case ARMCondNV: {
14727         // Illegal instruction prior to v5 (see ARM ARM A3-5), but
14728         // some cases are acceptable
14729         Bool ok = decode_NV_instruction(&dres, archinfo, insn);
14730         if (ok)
14731            goto decode_success;
14732         else
14733            goto decode_failure;
14734      }
14735      case ARMCondAL: // Always executed
14736         break;
14737      case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
14738      case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
14739      case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
14740      case ARMCondGT: case ARMCondLE:
14741         condT = newTemp(Ity_I32);
14742         assign( condT, mk_armg_calculate_condition( INSN_COND ));
14743         break;
14744   }
14745
14746   /* ----------------------------------------------------------- */
14747   /* -- ARMv5 integer instructions                            -- */
14748   /* ----------------------------------------------------------- */
14749
14750   /* ---------------- Data processing ops ------------------- */
14751
14752   if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
14753       && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
14754      IRTemp  shop = IRTemp_INVALID; /* shifter operand */
14755      IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
14756      UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
14757      UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
14758      UInt    bitS = (insn >> 20) & 1; /* 20:20 */
14759      IRTemp  rNt  = IRTemp_INVALID;
14760      IRTemp  res  = IRTemp_INVALID;
14761      IRTemp  oldV = IRTemp_INVALID;
14762      IRTemp  oldC = IRTemp_INVALID;
14763      const HChar*  name = NULL;
14764      IROp    op   = Iop_INVALID;
14765      Bool    ok;
14766
14767      switch (INSN(24,21)) {
14768
14769         /* --------- ADD, SUB, AND, OR --------- */
14770         case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
14771            name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
14772         case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
14773            name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
14774         case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
14775            name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
14776         case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
14777            name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
14778         case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
14779            name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
14780         case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
14781            name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
14782         case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
14783            name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
14784         rd_eq_rn_op_SO: {
14785            Bool isRSB = False;
14786            Bool isBIC = False;
14787            switch (INSN(24,21)) {
14788               case BITS4(0,0,1,1):
14789                  vassert(op == Iop_Sub32); isRSB = True; break;
14790               case BITS4(1,1,1,0):
14791                  vassert(op == Iop_And32); isBIC = True; break;
14792               default:
14793                  break;
14794            }
14795            rNt = newTemp(Ity_I32);
14796            assign(rNt, getIRegA(rN));
14797            ok = mk_shifter_operand(
14798                    INSN(25,25), INSN(11,0),
14799                    &shop, bitS ? &shco : NULL, dis_buf
14800                 );
14801            if (!ok)
14802               break;
14803            res = newTemp(Ity_I32);
14804            // compute the main result
14805            if (isRSB) {
14806               // reverse-subtract: shifter_operand - Rn
14807               vassert(op == Iop_Sub32);
14808               assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
14809            } else if (isBIC) {
14810               // andn: shifter_operand & ~Rn
14811               vassert(op == Iop_And32);
14812               assign(res, binop(op, mkexpr(rNt),
14813                                     unop(Iop_Not32, mkexpr(shop))) );
14814            } else {
14815               // normal: Rn op shifter_operand
14816               assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
14817            }
14818            // but don't commit it until after we've finished
14819            // all necessary reads from the guest state
14820            if (bitS
14821                && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
14822               oldV = newTemp(Ity_I32);
14823               assign( oldV, mk_armg_calculate_flag_v() );
14824            }
14825            // can't safely read guest state after here
14826            // now safe to put the main result
14827            putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
14828            // XXXX!! not safe to read any guest state after
14829            // this point (I think the code below doesn't do that).
14830            if (!bitS)
14831               vassert(shco == IRTemp_INVALID);
14832            /* Update the flags thunk if necessary */
14833            if (bitS) {
14834               vassert(shco != IRTemp_INVALID);
14835               switch (op) {
14836                  case Iop_Add32:
14837                     setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
14838                     break;
14839                  case Iop_Sub32:
14840                     if (isRSB) {
14841                        setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
14842                     } else {
14843                        setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
14844                     }
14845                     break;
14846                  case Iop_And32: /* BIC and AND set the flags the same */
14847                  case Iop_Or32:
14848                  case Iop_Xor32:
14849                     // oldV has been read just above
14850                     setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
14851                                        res, shco, oldV, condT );
14852                     break;
14853                  default:
14854                     vassert(0);
14855               }
14856            }
14857            DIP("%s%s%s r%u, r%u, %s\n",
14858                name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
14859            goto decode_success;
14860         }
14861
14862         /* --------- MOV, MVN --------- */
14863         case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
14864         case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
14865            Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
14866            IRTemp jk = Ijk_Boring;
14867            if (rN != 0)
14868               break; /* rN must be zero */
14869            ok = mk_shifter_operand(
14870                    INSN(25,25), INSN(11,0),
14871                    &shop, bitS ? &shco : NULL, dis_buf
14872                 );
14873            if (!ok)
14874               break;
14875            res = newTemp(Ity_I32);
14876            assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
14877                               : mkexpr(shop) );
14878            if (bitS) {
14879               vassert(shco != IRTemp_INVALID);
14880               oldV = newTemp(Ity_I32);
14881               assign( oldV, mk_armg_calculate_flag_v() );
14882            } else {
14883               vassert(shco == IRTemp_INVALID);
14884            }
14885            /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
14886                return for purposes of branch prediction. */
14887            if (!isMVN && INSN(11,0) == 14) {
14888              jk = Ijk_Ret;
14889            }
14890            // can't safely read guest state after here
14891            putIRegA( rD, mkexpr(res), condT, jk );
14892            /* Update the flags thunk if necessary */
14893            if (bitS) {
14894               setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
14895                                  res, shco, oldV, condT );
14896            }
14897            DIP("%s%s%s r%u, %s\n",
14898                isMVN ? "mvn" : "mov",
14899                nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
14900            goto decode_success;
14901         }
14902
14903         /* --------- CMP --------- */
14904         case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
14905         case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
14906            Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
14907            if (rD != 0)
14908               break; /* rD must be zero */
14909            if (bitS == 0)
14910               break; /* if S (bit 20) is not set, it's not CMP/CMN */
14911            rNt = newTemp(Ity_I32);
14912            assign(rNt, getIRegA(rN));
14913            ok = mk_shifter_operand(
14914                    INSN(25,25), INSN(11,0),
14915                    &shop, NULL, dis_buf
14916                 );
14917            if (!ok)
14918               break;
14919            // can't safely read guest state after here
14920            /* Update the flags thunk. */
14921            setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
14922                            rNt, shop, condT );
14923            DIP("%s%s r%u, %s\n",
14924                isCMN ? "cmn" : "cmp",
14925                nCC(INSN_COND), rN, dis_buf );
14926            goto decode_success;
14927         }
14928
14929         /* --------- TST --------- */
14930         case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
14931         case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
14932            Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
14933            if (rD != 0)
14934               break; /* rD must be zero */
14935            if (bitS == 0)
14936               break; /* if S (bit 20) is not set, it's not TST/TEQ */
14937            rNt = newTemp(Ity_I32);
14938            assign(rNt, getIRegA(rN));
14939            ok = mk_shifter_operand(
14940                    INSN(25,25), INSN(11,0),
14941                    &shop, &shco, dis_buf
14942                 );
14943            if (!ok)
14944               break;
14945            /* Update the flags thunk. */
14946            res = newTemp(Ity_I32);
14947            assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
14948                               mkexpr(rNt), mkexpr(shop)) );
14949            oldV = newTemp(Ity_I32);
14950            assign( oldV, mk_armg_calculate_flag_v() );
14951            // can't safely read guest state after here
14952            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
14953                               res, shco, oldV, condT );
14954            DIP("%s%s r%u, %s\n",
14955                isTEQ ? "teq" : "tst",
14956                nCC(INSN_COND), rN, dis_buf );
14957            goto decode_success;
14958         }
14959
14960         /* --------- ADC, SBC, RSC --------- */
14961         case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
14962            name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
14963         case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
14964            name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
14965         case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
14966            name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
14967         rd_eq_rn_op_SO_op_oldC: {
14968            // FIXME: shco isn't used for anything.  Get rid of it.
14969            rNt = newTemp(Ity_I32);
14970            assign(rNt, getIRegA(rN));
14971            ok = mk_shifter_operand(
14972                    INSN(25,25), INSN(11,0),
14973                    &shop, bitS ? &shco : NULL, dis_buf
14974                 );
14975            if (!ok)
14976               break;
14977            oldC = newTemp(Ity_I32);
14978            assign( oldC, mk_armg_calculate_flag_c() );
14979            res = newTemp(Ity_I32);
14980            // compute the main result
14981            switch (INSN(24,21)) {
14982               case BITS4(0,1,0,1): /* ADC */
14983                  assign(res,
14984                         binop(Iop_Add32,
14985                               binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
14986                               mkexpr(oldC) ));
14987                  break;
14988               case BITS4(0,1,1,0): /* SBC */
14989                  assign(res,
14990                         binop(Iop_Sub32,
14991                               binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
14992                               binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
14993                  break;
14994               case BITS4(0,1,1,1): /* RSC */
14995                  assign(res,
14996                         binop(Iop_Sub32,
14997                               binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
14998                               binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
14999                  break;
15000               default:
15001                  vassert(0);
15002            }
15003            // but don't commit it until after we've finished
15004            // all necessary reads from the guest state
15005            // now safe to put the main result
15006            putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
15007            // XXXX!! not safe to read any guest state after
15008            // this point (I think the code below doesn't do that).
15009            if (!bitS)
15010               vassert(shco == IRTemp_INVALID);
15011            /* Update the flags thunk if necessary */
15012            if (bitS) {
15013               vassert(shco != IRTemp_INVALID);
15014               switch (INSN(24,21)) {
15015                  case BITS4(0,1,0,1): /* ADC */
15016                     setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
15017                                        rNt, shop, oldC, condT );
15018                     break;
15019                  case BITS4(0,1,1,0): /* SBC */
15020                     setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
15021                                        rNt, shop, oldC, condT );
15022                     break;
15023                  case BITS4(0,1,1,1): /* RSC */
15024                     setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
15025                                        shop, rNt, oldC, condT );
15026                     break;
15027                  default:
15028                     vassert(0);
15029               }
15030            }
15031            DIP("%s%s%s r%u, r%u, %s\n",
15032                name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
15033            goto decode_success;
15034         }
15035
15036         default:
15037            vassert(0);
15038      }
15039   } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
15040
15041   /* --------------------- Load/store (ubyte & word) -------- */
15042   // LDR STR LDRB STRB
15043   /*                 31   27   23   19 15 11    6   4 3  # highest bit
15044                        28   24   20 16 12
15045      A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
15046      A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
15047      A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
15048      A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
15049      A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
15050      A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
15051   */
15052   /* case coding:
15053             1   at-ea               (access at ea)
15054             2   at-ea-then-upd      (access at ea, then Rn = ea)
15055             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
15056      ea coding
15057             16  Rn +/- imm12
15058             32  Rn +/- Rm sh2 imm5
15059   */
15060   /* Quickly skip over all of this for hopefully most instructions */
15061   if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
15062      goto after_load_store_ubyte_or_word;
15063
15064   summary = 0;
15065
15066   /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
15067      summary = 1 | 16;
15068   }
15069   else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
15070                                          && INSN(4,4) == 0) {
15071      summary = 1 | 32;
15072   }
15073   else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
15074      summary = 2 | 16;
15075   }
15076   else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
15077                                          && INSN(4,4) == 0) {
15078      summary = 2 | 32;
15079   }
15080   else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
15081      summary = 3 | 16;
15082   }
15083   else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
15084                                          && INSN(4,4) == 0) {
15085      summary = 3 | 32;
15086   }
15087   else goto after_load_store_ubyte_or_word;
15088
15089   { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
15090     UInt rD = (insn >> 12) & 0xF; /* 15:12 */
15091     UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
15092     UInt bU = (insn >> 23) & 1;      /* 23 */
15093     UInt bB = (insn >> 22) & 1;      /* 22 */
15094     UInt bL = (insn >> 20) & 1;      /* 20 */
15095     UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
15096     UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
15097     UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
15098
15099     /* Skip some invalid cases, which would lead to two competing
15100        updates to the same register, or which are otherwise
15101        disallowed by the spec. */
15102     switch (summary) {
15103        case 1 | 16:
15104           break;
15105        case 1 | 32:
15106           if (rM == 15) goto after_load_store_ubyte_or_word;
15107           break;
15108        case 2 | 16: case 3 | 16:
15109           if (rN == 15) goto after_load_store_ubyte_or_word;
15110           if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
15111           break;
15112        case 2 | 32: case 3 | 32:
15113           if (rM == 15) goto after_load_store_ubyte_or_word;
15114           if (rN == 15) goto after_load_store_ubyte_or_word;
15115           if (rN == rM) goto after_load_store_ubyte_or_word;
15116           if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
15117           break;
15118        default:
15119           vassert(0);
15120     }
15121
15122     /* compute the effective address.  Bind it to a tmp since we
15123        may need to use it twice. */
15124     IRExpr* eaE = NULL;
15125     switch (summary & 0xF0) {
15126        case 16:
15127           eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
15128           break;
15129        case 32:
15130           eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
15131                                                  dis_buf );
15132           break;
15133     }
15134     vassert(eaE);
15135     IRTemp eaT = newTemp(Ity_I32);
15136     assign(eaT, eaE);
15137
15138     /* get the old Rn value */
15139     IRTemp rnT = newTemp(Ity_I32);
15140     assign(rnT, getIRegA(rN));
15141
15142     /* decide on the transfer address */
15143     IRTemp taT = IRTemp_INVALID;
15144     switch (summary & 0x0F) {
15145        case 1: case 2: taT = eaT; break;
15146        case 3:         taT = rnT; break;
15147     }
15148     vassert(taT != IRTemp_INVALID);
15149
15150     if (bL == 0) {
15151       /* Store.  If necessary, update the base register before the
15152          store itself, so that the common idiom of "str rX, [sp,
15153          #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
15154          rX") doesn't cause Memcheck to complain that the access is
15155          below the stack pointer.  Also, not updating sp before the
15156          store confuses Valgrind's dynamic stack-extending logic.  So
15157          do it before the store.  Hence we need to snarf the store
15158          data before doing the basereg update. */
15159
15160        /* get hold of the data to be stored */
15161        IRTemp rDt = newTemp(Ity_I32);
15162        assign(rDt, getIRegA(rD));
15163
15164        /* Update Rn if necessary. */
15165        switch (summary & 0x0F) {
15166           case 2: case 3:
15167              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
15168              break;
15169        }
15170
15171        /* generate the transfer */
15172        if (bB == 0) { // word store
15173           storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
15174        } else { // byte store
15175           vassert(bB == 1);
15176           storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
15177        }
15178
15179     } else {
15180        /* Load */
15181        vassert(bL == 1);
15182
15183        /* generate the transfer */
15184        if (bB == 0) { // word load
15185           IRTemp jk = Ijk_Boring;
15186           /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
15187               base register and PC as the destination register is a return for
15188               purposes of branch prediction.
15189              The ARM ARM Sec. C9.10.1 further specifies that it must use a
15190               post-increment by immediate addressing mode to be counted in
15191               event 0x0E (Procedure return).*/
15192           if (rN == 13 && summary == (3 | 16) && bB == 0) {
15193              jk = Ijk_Ret;
15194           }
15195           IRTemp tD = newTemp(Ity_I32);
15196           loadGuardedLE( tD, ILGop_Ident32,
15197                          mkexpr(taT), llGetIReg(rD), condT );
15198           /* "rD == 15 ? condT : IRTemp_INVALID": simply
15199              IRTemp_INVALID would be correct in all cases here, and
15200              for the non-r15 case it generates better code, by
15201              avoiding two tests of the cond (since it is already
15202              tested by loadGuardedLE).  However, the logic at the end
15203              of this function, that deals with writes to r15, has an
15204              optimisation which depends on seeing whether or not the
15205              write is conditional.  Hence in this particular case we
15206              let it "see" the guard condition. */
15207           putIRegA( rD, mkexpr(tD),
15208                     rD == 15 ? condT : IRTemp_INVALID, jk );
15209        } else { // byte load
15210           vassert(bB == 1);
15211           IRTemp tD = newTemp(Ity_I32);
15212           loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
15213           /* No point in similar 3rd arg complexity here, since we
15214              can't sanely write anything to r15 like this. */
15215           putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
15216        }
15217
15218        /* Update Rn if necessary. */
15219        switch (summary & 0x0F) {
15220           case 2: case 3:
15221              // should be assured by logic above:
15222              if (bL == 1)
15223                 vassert(rD != rN); /* since we just wrote rD */
15224              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
15225              break;
15226        }
15227     }
15228
15229     switch (summary & 0x0F) {
15230        case 1:  DIP("%sr%s%s r%u, %s\n",
15231                     bL == 0 ? "st" : "ld",
15232                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
15233                 break;
15234        case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
15235                     bL == 0 ? "st" : "ld",
15236                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
15237                 break;
15238        case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
15239                     bL == 0 ? "st" : "ld",
15240                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
15241                 break;
15242        default: vassert(0);
15243     }
15244
15245     /* XXX deal with alignment constraints */
15246
15247     goto decode_success;
15248
15249     /* Complications:
15250
15251        For all loads: if the Amode specifies base register
15252        writeback, and the same register is specified for Rd and Rn,
15253        the results are UNPREDICTABLE.
15254
15255        For all loads and stores: if R15 is written, branch to
15256        that address afterwards.
15257
15258        STRB: straightforward
15259        LDRB: loaded data is zero extended
15260        STR:  lowest 2 bits of address are ignored
15261        LDR:  if the lowest 2 bits of the address are nonzero
15262              then the loaded value is rotated right by 8 * the lowest 2 bits
15263     */
15264   }
15265
15266  after_load_store_ubyte_or_word:
15267
15268   /* --------------------- Load/store (sbyte & hword) -------- */
15269   // LDRH LDRSH STRH LDRSB
15270   /*                 31   27   23   19 15 11   7    3     # highest bit
15271                        28   24   20 16 12    8    4    0
15272      A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
15273      A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
15274      A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
15275      A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
15276      A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
15277      A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
15278   */
15279   /* case coding:
15280             1   at-ea               (access at ea)
15281             2   at-ea-then-upd      (access at ea, then Rn = ea)
15282             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
15283      ea coding
15284             16  Rn +/- imm8
15285             32  Rn +/- Rm
15286   */
15287   /* Quickly skip over all of this for hopefully most instructions */
15288   if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
15289      goto after_load_store_sbyte_or_hword;
15290
15291   /* Check the "1SH1" thing. */
15292   if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
15293      goto after_load_store_sbyte_or_hword;
15294
15295   summary = 0;
15296
15297   /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
15298      summary = 1 | 16;
15299   }
15300   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
15301      summary = 1 | 32;
15302   }
15303   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
15304      summary = 2 | 16;
15305   }
15306   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
15307      summary = 2 | 32;
15308   }
15309   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
15310      summary = 3 | 16;
15311   }
15312   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
15313      summary = 3 | 32;
15314   }
15315   else goto after_load_store_sbyte_or_hword;
15316
15317   { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
15318     UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
15319     UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
15320     UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
15321     UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
15322     UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
15323     UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
15324     UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
15325
15326     /* Skip combinations that are either meaningless or already
15327        handled by main word-or-unsigned-byte load-store
15328        instructions. */
15329     if (bS == 0 && bH == 0) /* "unsigned byte" */
15330        goto after_load_store_sbyte_or_hword;
15331     if (bS == 1 && bL == 0) /* "signed store" */
15332        goto after_load_store_sbyte_or_hword;
15333
15334     /* Require 11:8 == 0 for Rn +/- Rm cases */
15335     if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
15336        goto after_load_store_sbyte_or_hword;
15337
15338     /* Skip some invalid cases, which would lead to two competing
15339        updates to the same register, or which are otherwise
15340        disallowed by the spec. */
15341     switch (summary) {
15342        case 1 | 16:
15343           break;
15344        case 1 | 32:
15345           if (rM == 15) goto after_load_store_sbyte_or_hword;
15346           break;
15347        case 2 | 16: case 3 | 16:
15348           if (rN == 15) goto after_load_store_sbyte_or_hword;
15349           if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
15350           break;
15351        case 2 | 32: case 3 | 32:
15352           if (rM == 15) goto after_load_store_sbyte_or_hword;
15353           if (rN == 15) goto after_load_store_sbyte_or_hword;
15354           if (rN == rM) goto after_load_store_sbyte_or_hword;
15355           if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
15356           break;
15357        default:
15358           vassert(0);
15359     }
15360
15361     /* If this is a branch, make it unconditional at this point.
15362        Doing conditional branches in-line is too complex (for now).
15363        Note that you'd have to be insane to use any of these loads to
15364        do a branch, since they only load 16 bits at most, but we
15365        handle it just in case. */
15366     if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
15367        // go uncond
15368        mk_skip_over_A32_if_cond_is_false( condT );
15369        condT = IRTemp_INVALID;
15370        // now uncond
15371     }
15372
15373     /* compute the effective address.  Bind it to a tmp since we
15374        may need to use it twice. */
15375     IRExpr* eaE = NULL;
15376     switch (summary & 0xF0) {
15377        case 16:
15378           eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
15379           break;
15380        case 32:
15381           eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
15382           break;
15383     }
15384     vassert(eaE);
15385     IRTemp eaT = newTemp(Ity_I32);
15386     assign(eaT, eaE);
15387
15388     /* get the old Rn value */
15389     IRTemp rnT = newTemp(Ity_I32);
15390     assign(rnT, getIRegA(rN));
15391
15392     /* decide on the transfer address */
15393     IRTemp taT = IRTemp_INVALID;
15394     switch (summary & 0x0F) {
15395        case 1: case 2: taT = eaT; break;
15396        case 3:         taT = rnT; break;
15397     }
15398     vassert(taT != IRTemp_INVALID);
15399
15400     /* ll previous value of rD, for dealing with conditional loads */
15401     IRTemp llOldRd = newTemp(Ity_I32);
15402     assign(llOldRd, llGetIReg(rD));
15403
15404     /* halfword store  H 1  L 0  S 0
15405        uhalf load      H 1  L 1  S 0
15406        shalf load      H 1  L 1  S 1
15407        sbyte load      H 0  L 1  S 1
15408     */
15409     const HChar* name = NULL;
15410     /* generate the transfer */
15411     /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
15412        storeGuardedLE( mkexpr(taT),
15413                        unop(Iop_32to16, getIRegA(rD)), condT );
15414        name = "strh";
15415     }
15416     else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
15417        IRTemp newRd = newTemp(Ity_I32);
15418        loadGuardedLE( newRd, ILGop_16Uto32,
15419                       mkexpr(taT), mkexpr(llOldRd), condT );
15420        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
15421        name = "ldrh";
15422     }
15423     else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
15424        IRTemp newRd = newTemp(Ity_I32);
15425        loadGuardedLE( newRd, ILGop_16Sto32,
15426                       mkexpr(taT), mkexpr(llOldRd), condT );
15427        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
15428        name = "ldrsh";
15429     }
15430     else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
15431        IRTemp newRd = newTemp(Ity_I32);
15432        loadGuardedLE( newRd, ILGop_8Sto32,
15433                       mkexpr(taT), mkexpr(llOldRd), condT );
15434        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
15435        name = "ldrsb";
15436     }
15437     else
15438        vassert(0); // should be assured by logic above
15439
15440     /* Update Rn if necessary. */
15441     switch (summary & 0x0F) {
15442        case 2: case 3:
15443           // should be assured by logic above:
15444           if (bL == 1)
15445              vassert(rD != rN); /* since we just wrote rD */
15446           putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
15447           break;
15448     }
15449
15450     switch (summary & 0x0F) {
15451        case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
15452                 break;
15453        case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
15454                     name, nCC(INSN_COND), rD, dis_buf);
15455                 break;
15456        case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
15457                     name, nCC(INSN_COND), rD, dis_buf);
15458                 break;
15459        default: vassert(0);
15460     }
15461
15462     /* XXX deal with alignment constraints */
15463
15464     goto decode_success;
15465
15466     /* Complications:
15467
15468        For all loads: if the Amode specifies base register
15469        writeback, and the same register is specified for Rd and Rn,
15470        the results are UNPREDICTABLE.
15471
15472        For all loads and stores: if R15 is written, branch to
15473        that address afterwards.
15474
15475        Misaligned halfword stores => Unpredictable
15476        Misaligned halfword loads  => Unpredictable
15477     */
15478   }
15479
15480  after_load_store_sbyte_or_hword:
15481
15482   /* --------------------- Load/store multiple -------------- */
15483   // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
15484   // Remarkably complex and difficult to get right
15485   // match 27:20 as 100XX0WL
15486   if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
15487      // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
15488      // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
15489      // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
15490      // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
15491      //                   28   24   20 16       0
15492
15493      UInt bINC    = (insn >> 23) & 1;
15494      UInt bBEFORE = (insn >> 24) & 1;
15495
15496      UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
15497      UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
15498      UInt rN      = (insn >> 16) & 0xF;
15499      UInt regList = insn & 0xFFFF;
15500      /* Skip some invalid cases, which would lead to two competing
15501         updates to the same register, or which are otherwise
15502         disallowed by the spec.  Note the test above has required
15503         that S == 0, since that looks like a kernel-mode only thing.
15504         Done by forcing the real pattern, viz 100XXSWL to actually be
15505         100XX0WL. */
15506      if (rN == 15) goto after_load_store_multiple;
15507      // reglist can't be empty
15508      if (regList == 0) goto after_load_store_multiple;
15509      // if requested to writeback Rn, and this is a load instruction,
15510      // then Rn can't appear in RegList, since we'd have two competing
15511      // new values for Rn.  We do however accept this case for store
15512      // instructions.
15513      if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
15514         goto after_load_store_multiple;
15515
15516      /* Now, we can't do a conditional load or store, since that very
15517         likely will generate an exception.  So we have to take a side
15518         exit at this point if the condition is false. */
15519      if (condT != IRTemp_INVALID) {
15520         mk_skip_over_A32_if_cond_is_false( condT );
15521         condT = IRTemp_INVALID;
15522      }
15523
15524      /* Ok, now we're unconditional.  Generate the IR. */
15525      mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
15526
15527      DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
15528          bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
15529          nCC(INSN_COND),
15530          rN, bW ? "!" : "", regList);
15531
15532      goto decode_success;
15533   }
15534
15535  after_load_store_multiple:
15536
15537   /* --------------------- Control flow --------------------- */
15538   // B, BL (Branch, or Branch-and-Link, to immediate offset)
15539   //
15540   if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
15541      UInt link   = (insn >> 24) & 1;
15542      UInt uimm24 = insn & ((1<<24)-1);
15543      Int  simm24 = (Int)uimm24;
15544      UInt dst    = guest_R15_curr_instr_notENC + 8
15545                    + (((simm24 << 8) >> 8) << 2);
15546      IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
15547      if (link) {
15548         putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
15549                      condT, Ijk_Boring);
15550      }
15551      if (condT == IRTemp_INVALID) {
15552         /* unconditional transfer to 'dst'.  See if we can simply
15553            continue tracing at the destination. */
15554         if (resteerOkFn( callback_opaque, dst )) {
15555            /* yes */
15556            dres.whatNext   = Dis_ResteerU;
15557            dres.continueAt = dst;
15558         } else {
15559            /* no; terminate the SB at this point. */
15560            llPutIReg(15, mkU32(dst));
15561            dres.jk_StopHere = jk;
15562            dres.whatNext    = Dis_StopHere;
15563         }
15564         DIP("b%s 0x%x\n", link ? "l" : "", dst);
15565      } else {
15566         /* conditional transfer to 'dst' */
15567         const HChar* comment = "";
15568
15569         /* First see if we can do some speculative chasing into one
15570            arm or the other.  Be conservative and only chase if
15571            !link, that is, this is a normal conditional branch to a
15572            known destination. */
15573         if (!link
15574             && resteerCisOk
15575             && vex_control.guest_chase_cond
15576             && dst < guest_R15_curr_instr_notENC
15577             && resteerOkFn( callback_opaque, dst) ) {
15578            /* Speculation: assume this backward branch is taken.  So
15579               we need to emit a side-exit to the insn following this
15580               one, on the negation of the condition, and continue at
15581               the branch target address (dst). */
15582            stmt( IRStmt_Exit( unop(Iop_Not1,
15583                                    unop(Iop_32to1, mkexpr(condT))),
15584                               Ijk_Boring,
15585                               IRConst_U32(guest_R15_curr_instr_notENC+4),
15586                               OFFB_R15T ));
15587            dres.whatNext   = Dis_ResteerC;
15588            dres.continueAt = (Addr32)dst;
15589            comment = "(assumed taken)";
15590         }
15591         else
15592         if (!link
15593             && resteerCisOk
15594             && vex_control.guest_chase_cond
15595             && dst >= guest_R15_curr_instr_notENC
15596             && resteerOkFn( callback_opaque,
15597                             guest_R15_curr_instr_notENC+4) ) {
15598            /* Speculation: assume this forward branch is not taken.
15599               So we need to emit a side-exit to dst (the dest) and
15600               continue disassembling at the insn immediately
15601               following this one. */
15602            stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
15603                               Ijk_Boring,
15604                               IRConst_U32(dst),
15605                               OFFB_R15T ));
15606            dres.whatNext   = Dis_ResteerC;
15607            dres.continueAt = guest_R15_curr_instr_notENC+4;
15608            comment = "(assumed not taken)";
15609         }
15610         else {
15611            /* Conservative default translation - end the block at
15612               this point. */
15613            stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
15614                               jk, IRConst_U32(dst), OFFB_R15T ));
15615            llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
15616            dres.jk_StopHere = Ijk_Boring;
15617            dres.whatNext    = Dis_StopHere;
15618         }
15619         DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
15620             dst, comment);
15621      }
15622      goto decode_success;
15623   }
15624
15625   // B, BL (Branch, or Branch-and-Link, to a register)
15626   // NB: interworking branch
15627   if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
15628       && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
15629       && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
15630           || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
15631      IRTemp  dst = newTemp(Ity_I32);
15632      UInt    link = (INSN(11,4) >> 1) & 1;
15633      UInt    rM   = INSN(3,0);
15634      // we don't decode the case (link && rM == 15), as that's
15635      // Unpredictable.
15636      if (!(link && rM == 15)) {
15637         if (condT != IRTemp_INVALID) {
15638            mk_skip_over_A32_if_cond_is_false( condT );
15639         }
15640         // rM contains an interworking address exactly as we require
15641         // (with continuation CPSR.T in bit 0), so we can use it
15642         // as-is, with no masking.
15643         assign( dst, getIRegA(rM) );
15644         if (link) {
15645            putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
15646                      IRTemp_INVALID/*because AL*/, Ijk_Boring );
15647         }
15648         llPutIReg(15, mkexpr(dst));
15649         dres.jk_StopHere = link ? Ijk_Call
15650                                 : (rM == 14 ? Ijk_Ret : Ijk_Boring);
15651         dres.whatNext    = Dis_StopHere;
15652         if (condT == IRTemp_INVALID) {
15653            DIP("b%sx r%u\n", link ? "l" : "", rM);
15654         } else {
15655            DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
15656         }
15657         goto decode_success;
15658      }
15659      /* else: (link && rM == 15): just fall through */
15660   }
15661
15662   /* --- NB: ARM interworking branches are in NV space, hence
15663      are handled elsewhere by decode_NV_instruction.
15664      ---
15665   */
15666
15667   /* --------------------- Clz --------------------- */
15668   // CLZ
15669   if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
15670       && INSN(19,16) == BITS4(1,1,1,1)
15671       && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
15672      UInt rD = INSN(15,12);
15673      UInt rM = INSN(3,0);
15674      IRTemp arg = newTemp(Ity_I32);
15675      IRTemp res = newTemp(Ity_I32);
15676      assign(arg, getIRegA(rM));
15677      assign(res, IRExpr_ITE(
15678                     binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
15679                     mkU32(32),
15680                     unop(Iop_Clz32, mkexpr(arg))
15681            ));
15682      putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
15683      DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
15684      goto decode_success;
15685   }
15686
15687   /* --------------------- Mul etc --------------------- */
15688   // MUL
15689   if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
15690       && INSN(15,12) == BITS4(0,0,0,0)
15691       && INSN(7,4) == BITS4(1,0,0,1)) {
15692      UInt bitS = (insn >> 20) & 1; /* 20:20 */
15693      UInt rD = INSN(19,16);
15694      UInt rS = INSN(11,8);
15695      UInt rM = INSN(3,0);
15696      if (rD == 15 || rM == 15 || rS == 15) {
15697         /* Unpredictable; don't decode; fall through */
15698      } else {
15699         IRTemp argL = newTemp(Ity_I32);
15700         IRTemp argR = newTemp(Ity_I32);
15701         IRTemp res  = newTemp(Ity_I32);
15702         IRTemp oldC = IRTemp_INVALID;
15703         IRTemp oldV = IRTemp_INVALID;
15704         assign( argL, getIRegA(rM));
15705         assign( argR, getIRegA(rS));
15706         assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
15707         if (bitS) {
15708            oldC = newTemp(Ity_I32);
15709            assign(oldC, mk_armg_calculate_flag_c());
15710            oldV = newTemp(Ity_I32);
15711            assign(oldV, mk_armg_calculate_flag_v());
15712         }
15713         // now update guest state
15714         putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
15715         if (bitS) {
15716            IRTemp pair = newTemp(Ity_I32);
15717            assign( pair, binop(Iop_Or32,
15718                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15719                                mkexpr(oldV)) );
15720            setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
15721         }
15722         DIP("mul%c%s r%u, r%u, r%u\n",
15723             bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
15724         goto decode_success;
15725      }
15726      /* fall through */
15727   }
15728
15729   /* --------------------- Integer Divides --------------------- */
15730   // SDIV
15731   if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
15732       && INSN(15,12) == BITS4(1,1,1,1)
15733       && INSN(7,4) == BITS4(0,0,0,1)) {
15734      UInt rD = INSN(19,16);
15735      UInt rM = INSN(11,8);
15736      UInt rN = INSN(3,0);
15737      if (rD == 15 || rM == 15 || rN == 15) {
15738         /* Unpredictable; don't decode; fall through */
15739      } else {
15740         IRTemp res  = newTemp(Ity_I32);
15741         IRTemp argL = newTemp(Ity_I32);
15742         IRTemp argR = newTemp(Ity_I32);
15743         assign(argL, getIRegA(rN));
15744         assign(argR, getIRegA(rM));
15745         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
15746         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
15747         DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
15748         goto decode_success;
15749      }
15750    }
15751
15752   // UDIV
15753   if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
15754       && INSN(15,12) == BITS4(1,1,1,1)
15755       && INSN(7,4) == BITS4(0,0,0,1)) {
15756      UInt rD = INSN(19,16);
15757      UInt rM = INSN(11,8);
15758      UInt rN = INSN(3,0);
15759      if (rD == 15 || rM == 15 || rN == 15) {
15760         /* Unpredictable; don't decode; fall through */
15761      } else {
15762         IRTemp res  = newTemp(Ity_I32);
15763         IRTemp argL = newTemp(Ity_I32);
15764         IRTemp argR = newTemp(Ity_I32);
15765         assign(argL, getIRegA(rN));
15766         assign(argR, getIRegA(rM));
15767         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
15768         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
15769         DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
15770         goto decode_success;
15771      }
15772   }
15773
15774   // MLA, MLS
15775   if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
15776       && INSN(7,4) == BITS4(1,0,0,1)) {
15777      UInt bitS  = (insn >> 20) & 1; /* 20:20 */
15778      UInt isMLS = (insn >> 22) & 1; /* 22:22 */
15779      UInt rD = INSN(19,16);
15780      UInt rN = INSN(15,12);
15781      UInt rS = INSN(11,8);
15782      UInt rM = INSN(3,0);
15783      if (bitS == 1 && isMLS == 1) {
15784         /* This isn't allowed (MLS that sets flags).  don't decode;
15785            fall through */
15786      }
15787      else
15788      if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
15789         /* Unpredictable; don't decode; fall through */
15790      } else {
15791         IRTemp argL = newTemp(Ity_I32);
15792         IRTemp argR = newTemp(Ity_I32);
15793         IRTemp argP = newTemp(Ity_I32);
15794         IRTemp res  = newTemp(Ity_I32);
15795         IRTemp oldC = IRTemp_INVALID;
15796         IRTemp oldV = IRTemp_INVALID;
15797         assign( argL, getIRegA(rM));
15798         assign( argR, getIRegA(rS));
15799         assign( argP, getIRegA(rN));
15800         assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
15801                            mkexpr(argP),
15802                            binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
15803         if (bitS) {
15804            vassert(!isMLS); // guaranteed above
15805            oldC = newTemp(Ity_I32);
15806            assign(oldC, mk_armg_calculate_flag_c());
15807            oldV = newTemp(Ity_I32);
15808            assign(oldV, mk_armg_calculate_flag_v());
15809         }
15810         // now update guest state
15811         putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
15812         if (bitS) {
15813            IRTemp pair = newTemp(Ity_I32);
15814            assign( pair, binop(Iop_Or32,
15815                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15816                                mkexpr(oldV)) );
15817            setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
15818         }
15819         DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
15820             isMLS ? 's' : 'a', bitS ? 's' : ' ',
15821             nCC(INSN_COND), rD, rM, rS, rN);
15822         goto decode_success;
15823      }
15824      /* fall through */
15825   }
15826
15827   // SMULL, UMULL
15828   if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
15829       && INSN(7,4) == BITS4(1,0,0,1)) {
15830      UInt bitS = (insn >> 20) & 1; /* 20:20 */
15831      UInt rDhi = INSN(19,16);
15832      UInt rDlo = INSN(15,12);
15833      UInt rS   = INSN(11,8);
15834      UInt rM   = INSN(3,0);
15835      UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
15836      if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
15837         /* Unpredictable; don't decode; fall through */
15838      } else {
15839         IRTemp argL  = newTemp(Ity_I32);
15840         IRTemp argR  = newTemp(Ity_I32);
15841         IRTemp res   = newTemp(Ity_I64);
15842         IRTemp resHi = newTemp(Ity_I32);
15843         IRTemp resLo = newTemp(Ity_I32);
15844         IRTemp oldC  = IRTemp_INVALID;
15845         IRTemp oldV  = IRTemp_INVALID;
15846         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
15847         assign( argL, getIRegA(rM));
15848         assign( argR, getIRegA(rS));
15849         assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
15850         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
15851         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
15852         if (bitS) {
15853            oldC = newTemp(Ity_I32);
15854            assign(oldC, mk_armg_calculate_flag_c());
15855            oldV = newTemp(Ity_I32);
15856            assign(oldV, mk_armg_calculate_flag_v());
15857         }
15858         // now update guest state
15859         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
15860         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
15861         if (bitS) {
15862            IRTemp pair = newTemp(Ity_I32);
15863            assign( pair, binop(Iop_Or32,
15864                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15865                                mkexpr(oldV)) );
15866            setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
15867         }
15868         DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
15869             isS ? 's' : 'u', bitS ? 's' : ' ',
15870             nCC(INSN_COND), rDlo, rDhi, rM, rS);
15871         goto decode_success;
15872      }
15873      /* fall through */
15874   }
15875
15876   // SMLAL, UMLAL
15877   if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
15878       && INSN(7,4) == BITS4(1,0,0,1)) {
15879      UInt bitS = (insn >> 20) & 1; /* 20:20 */
15880      UInt rDhi = INSN(19,16);
15881      UInt rDlo = INSN(15,12);
15882      UInt rS   = INSN(11,8);
15883      UInt rM   = INSN(3,0);
15884      UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
15885      if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
15886         /* Unpredictable; don't decode; fall through */
15887      } else {
15888         IRTemp argL  = newTemp(Ity_I32);
15889         IRTemp argR  = newTemp(Ity_I32);
15890         IRTemp old   = newTemp(Ity_I64);
15891         IRTemp res   = newTemp(Ity_I64);
15892         IRTemp resHi = newTemp(Ity_I32);
15893         IRTemp resLo = newTemp(Ity_I32);
15894         IRTemp oldC  = IRTemp_INVALID;
15895         IRTemp oldV  = IRTemp_INVALID;
15896         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
15897         assign( argL, getIRegA(rM));
15898         assign( argR, getIRegA(rS));
15899         assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
15900         assign( res, binop(Iop_Add64,
15901                            mkexpr(old),
15902                            binop(mulOp, mkexpr(argL), mkexpr(argR))) );
15903         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
15904         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
15905         if (bitS) {
15906            oldC = newTemp(Ity_I32);
15907            assign(oldC, mk_armg_calculate_flag_c());
15908            oldV = newTemp(Ity_I32);
15909            assign(oldV, mk_armg_calculate_flag_v());
15910         }
15911         // now update guest state
15912         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
15913         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
15914         if (bitS) {
15915            IRTemp pair = newTemp(Ity_I32);
15916            assign( pair, binop(Iop_Or32,
15917                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15918                                mkexpr(oldV)) );
15919            setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
15920         }
15921         DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
15922             isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
15923             rDlo, rDhi, rM, rS);
15924         goto decode_success;
15925      }
15926      /* fall through */
15927   }
15928
15929   // UMAAL
15930   if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
15931      UInt rDhi = INSN(19,16);
15932      UInt rDlo = INSN(15,12);
15933      UInt rM   = INSN(11,8);
15934      UInt rN   = INSN(3,0);
15935      if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
15936         /* Unpredictable; don't decode; fall through */
15937      } else {
15938         IRTemp argN   = newTemp(Ity_I32);
15939         IRTemp argM   = newTemp(Ity_I32);
15940         IRTemp argDhi = newTemp(Ity_I32);
15941         IRTemp argDlo = newTemp(Ity_I32);
15942         IRTemp res    = newTemp(Ity_I64);
15943         IRTemp resHi  = newTemp(Ity_I32);
15944         IRTemp resLo  = newTemp(Ity_I32);
15945         assign( argN,   getIRegA(rN) );
15946         assign( argM,   getIRegA(rM) );
15947         assign( argDhi, getIRegA(rDhi) );
15948         assign( argDlo, getIRegA(rDlo) );
15949         assign( res,
15950                 binop(Iop_Add64,
15951                       binop(Iop_Add64,
15952                             binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
15953                             unop(Iop_32Uto64, mkexpr(argDhi))),
15954                       unop(Iop_32Uto64, mkexpr(argDlo))) );
15955         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
15956         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
15957         // now update guest state
15958         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
15959         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
15960         DIP("umaal %s r%u, r%u, r%u, r%u\n",
15961             nCC(INSN_COND), rDlo, rDhi, rN, rM);
15962         goto decode_success;
15963      }
15964      /* fall through */
15965   }
15966
15967   /* --------------------- Msr etc --------------------- */
15968
15969   // MSR apsr, #imm
15970   if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
15971       && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
15972      UInt write_ge    = INSN(18,18);
15973      UInt write_nzcvq = INSN(19,19);
15974      if (write_nzcvq || write_ge) {
15975         UInt   imm = (INSN(11,0) >> 0) & 0xFF;
15976         UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
15977         IRTemp immT = newTemp(Ity_I32);
15978         vassert(rot <= 30);
15979         imm = ROR32(imm, rot);
15980         assign(immT, mkU32(imm));
15981         desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
15982         DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
15983             write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
15984         goto decode_success;
15985      }
15986      /* fall through */
15987   }
15988
15989   // MSR apsr, reg
15990   if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
15991       && INSN(17,12) == BITS6(0,0,1,1,1,1)
15992       && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
15993      UInt rN          = INSN(3,0);
15994      UInt write_ge    = INSN(18,18);
15995      UInt write_nzcvq = INSN(19,19);
15996      if (rN != 15 && (write_nzcvq || write_ge)) {
15997         IRTemp rNt = newTemp(Ity_I32);
15998         assign(rNt, getIRegA(rN));
15999         desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
16000         DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
16001             write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
16002         goto decode_success;
16003      }
16004      /* fall through */
16005   }
16006
16007   // MRS rD, cpsr
16008   if ((insn & 0x0FFF0FFF) == 0x010F0000) {
16009      UInt rD   = INSN(15,12);
16010      if (rD != 15) {
16011         IRTemp apsr = synthesise_APSR();
16012         putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
16013         DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
16014         goto decode_success;
16015      }
16016      /* fall through */
16017   }
16018
16019   /* --------------------- Svc --------------------- */
16020   if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
16021      UInt imm24 = (insn >> 0) & 0xFFFFFF;
16022      if (imm24 == 0) {
16023         /* A syscall.  We can't do this conditionally, hence: */
16024         if (condT != IRTemp_INVALID) {
16025            mk_skip_over_A32_if_cond_is_false( condT );
16026         }
16027         // AL after here
16028         llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
16029         dres.jk_StopHere = Ijk_Sys_syscall;
16030         dres.whatNext    = Dis_StopHere;
16031         DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
16032         goto decode_success;
16033      }
16034      /* fall through */
16035   }
16036
16037   /* ------------------------ swp ------------------------ */
16038
16039   // SWP, SWPB
16040   if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
16041       && BITS4(0,0,0,0) == INSN(11,8)
16042       && BITS4(1,0,0,1) == INSN(7,4)) {
16043      UInt   rN   = INSN(19,16);
16044      UInt   rD   = INSN(15,12);
16045      UInt   rM   = INSN(3,0);
16046      IRTemp tRn  = newTemp(Ity_I32);
16047      IRTemp tNew = newTemp(Ity_I32);
16048      IRTemp tOld = IRTemp_INVALID;
16049      IRTemp tSC1 = newTemp(Ity_I1);
16050      UInt   isB  = (insn >> 22) & 1;
16051
16052      if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
16053         /* undecodable; fall through */
16054      } else {
16055         /* make unconditional */
16056         if (condT != IRTemp_INVALID) {
16057            mk_skip_over_A32_if_cond_is_false( condT );
16058            condT = IRTemp_INVALID;
16059         }
16060         /* Ok, now we're unconditional.  Generate a LL-SC loop. */
16061         assign(tRn, getIRegA(rN));
16062         assign(tNew, getIRegA(rM));
16063         if (isB) {
16064            /* swpb */
16065            tOld = newTemp(Ity_I8);
16066            stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
16067                              NULL/*=>isLL*/) );
16068            stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
16069                              unop(Iop_32to8, mkexpr(tNew))) );
16070         } else {
16071            /* swp */
16072            tOld = newTemp(Ity_I32);
16073            stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
16074                              NULL/*=>isLL*/) );
16075            stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
16076                              mkexpr(tNew)) );
16077         }
16078         stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
16079                           /*Ijk_NoRedir*/Ijk_Boring,
16080                           IRConst_U32(guest_R15_curr_instr_notENC),
16081                           OFFB_R15T ));
16082         putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
16083                      IRTemp_INVALID, Ijk_Boring);
16084         DIP("swp%s%s r%u, r%u, [r%u]\n",
16085             isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
16086         goto decode_success;
16087      }
16088      /* fall through */
16089   }
16090
16091   /* ----------------------------------------------------------- */
16092   /* -- ARMv6 instructions                                    -- */
16093   /* ----------------------------------------------------------- */
16094
16095   /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
16096
16097   // LDREXD, LDREX, LDREXH, LDREXB
16098   if (0x01900F9F == (insn & 0x0F900FFF)) {
16099      UInt   rT    = INSN(15,12);
16100      UInt   rN    = INSN(19,16);
16101      IRType ty    = Ity_INVALID;
16102      IROp   widen = Iop_INVALID;
16103      const HChar* nm = NULL;
16104      Bool   valid = True;
16105      switch (INSN(22,21)) {
16106         case 0: nm = "";  ty = Ity_I32; break;
16107         case 1: nm = "d"; ty = Ity_I64; break;
16108         case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
16109         case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
16110         default: vassert(0);
16111      }
16112      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
16113         if (rT == 15 || rN == 15)
16114            valid = False;
16115      } else {
16116         vassert(ty == Ity_I64);
16117         if ((rT & 1) == 1 || rT == 14 || rN == 15)
16118            valid = False;
16119      }
16120      if (valid) {
16121         IRTemp res;
16122         /* make unconditional */
16123         if (condT != IRTemp_INVALID) {
16124           mk_skip_over_A32_if_cond_is_false( condT );
16125           condT = IRTemp_INVALID;
16126         }
16127         /* Ok, now we're unconditional.  Do the load. */
16128         res = newTemp(ty);
16129         // FIXME: assumes little-endian guest
16130         stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
16131                           NULL/*this is a load*/) );
16132         if (ty == Ity_I64) {
16133            // FIXME: assumes little-endian guest
16134            putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
16135                           IRTemp_INVALID, Ijk_Boring);
16136            putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
16137                           IRTemp_INVALID, Ijk_Boring);
16138            DIP("ldrex%s%s r%u, r%u, [r%u]\n",
16139                nm, nCC(INSN_COND), rT+0, rT+1, rN);
16140         } else {
16141            putIRegA(rT, widen == Iop_INVALID
16142                            ? mkexpr(res) : unop(widen, mkexpr(res)),
16143                     IRTemp_INVALID, Ijk_Boring);
16144            DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
16145         }
16146         goto decode_success;
16147      }
16148      /* undecodable; fall through */
16149   }
16150
16151   // STREXD, STREX, STREXH, STREXB
16152   if (0x01800F90 == (insn & 0x0F900FF0)) {
16153      UInt   rT     = INSN(3,0);
16154      UInt   rN     = INSN(19,16);
16155      UInt   rD     = INSN(15,12);
16156      IRType ty     = Ity_INVALID;
16157      IROp   narrow = Iop_INVALID;
16158      const HChar* nm = NULL;
16159      Bool   valid  = True;
16160      switch (INSN(22,21)) {
16161         case 0: nm = "";  ty = Ity_I32; break;
16162         case 1: nm = "d"; ty = Ity_I64; break;
16163         case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
16164         case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
16165         default: vassert(0);
16166      }
16167      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
16168         if (rD == 15 || rN == 15 || rT == 15
16169             || rD == rN || rD == rT)
16170            valid = False;
16171      } else {
16172         vassert(ty == Ity_I64);
16173         if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
16174             || rD == rN || rD == rT || rD == rT+1)
16175            valid = False;
16176      }
16177      if (valid) {
16178         IRTemp resSC1, resSC32, data;
16179         /* make unconditional */
16180         if (condT != IRTemp_INVALID) {
16181            mk_skip_over_A32_if_cond_is_false( condT );
16182            condT = IRTemp_INVALID;
16183         }
16184         /* Ok, now we're unconditional.  Do the store. */
16185         data = newTemp(ty);
16186         assign(data,
16187                ty == Ity_I64
16188                   // FIXME: assumes little-endian guest
16189                   ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
16190                   : narrow == Iop_INVALID
16191                      ? getIRegA(rT)
16192                      : unop(narrow, getIRegA(rT)));
16193         resSC1 = newTemp(Ity_I1);
16194         // FIXME: assumes little-endian guest
16195         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
16196
16197         /* Set rD to 1 on failure, 0 on success.  Currently we have
16198            resSC1 == 0 on failure, 1 on success. */
16199         resSC32 = newTemp(Ity_I32);
16200         assign(resSC32,
16201                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
16202
16203         putIRegA(rD, mkexpr(resSC32),
16204                      IRTemp_INVALID, Ijk_Boring);
16205         if (ty == Ity_I64) {
16206            DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
16207                nm, nCC(INSN_COND), rD, rT, rT+1, rN);
16208         } else {
16209            DIP("strex%s%s r%u, r%u, [r%u]\n",
16210                nm, nCC(INSN_COND), rD, rT, rN);
16211         }
16212         goto decode_success;
16213      }
16214      /* fall through */
16215   }
16216
16217   /* --------------------- movw, movt --------------------- */
16218   if (0x03000000 == (insn & 0x0FF00000)
16219       || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
16220      UInt rD    = INSN(15,12);
16221      UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
16222      UInt isT   = (insn >> 22) & 1;
16223      if (rD == 15) {
16224         /* forget it */
16225      } else {
16226         if (isT) {
16227            putIRegA(rD,
16228                     binop(Iop_Or32,
16229                           binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
16230                           mkU32(imm16 << 16)),
16231                     condT, Ijk_Boring);
16232            DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
16233            goto decode_success;
16234         } else {
16235            putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
16236            DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
16237            goto decode_success;
16238         }
16239      }
16240      /* fall through */
16241   }
16242
16243   /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
16244   /* FIXME: this is an exact duplicate of the Thumb version.  They
16245      should be commoned up. */
16246   if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
16247       && BITS4(1,1,1,1) == INSN(19,16)
16248       && BITS4(0,1,1,1) == INSN(7,4)
16249       && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
16250      UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
16251      if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
16252         Int    rot  = (INSN(11,8) >> 2) & 3;
16253         UInt   rM   = INSN(3,0);
16254         UInt   rD   = INSN(15,12);
16255         IRTemp srcT = newTemp(Ity_I32);
16256         IRTemp rotT = newTemp(Ity_I32);
16257         IRTemp dstT = newTemp(Ity_I32);
16258         const HChar* nm = "???";
16259         assign(srcT, getIRegA(rM));
16260         assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
16261         switch (subopc) {
16262            case BITS4(0,1,1,0): // UXTB
16263               assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
16264               nm = "uxtb";
16265               break;
16266            case BITS4(0,0,1,0): // SXTB
16267               assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
16268               nm = "sxtb";
16269               break;
16270            case BITS4(0,1,1,1): // UXTH
16271               assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
16272               nm = "uxth";
16273               break;
16274            case BITS4(0,0,1,1): // SXTH
16275               assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
16276               nm = "sxth";
16277               break;
16278            case BITS4(0,1,0,0): // UXTB16
16279               assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
16280               nm = "uxtb16";
16281               break;
16282            case BITS4(0,0,0,0): { // SXTB16
16283               IRTemp lo32 = newTemp(Ity_I32);
16284               IRTemp hi32 = newTemp(Ity_I32);
16285               assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
16286               assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
16287               assign(
16288                  dstT,
16289                  binop(Iop_Or32,
16290                        binop(Iop_And32,
16291                              unop(Iop_8Sto32,
16292                                   unop(Iop_32to8, mkexpr(lo32))),
16293                              mkU32(0xFFFF)),
16294                        binop(Iop_Shl32,
16295                              unop(Iop_8Sto32,
16296                                   unop(Iop_32to8, mkexpr(hi32))),
16297                              mkU8(16))
16298               ));
16299               nm = "sxtb16";
16300               break;
16301            }
16302            default:
16303               vassert(0); // guarded by "if" above
16304         }
16305         putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
16306         DIP("%s%s r%u, r%u, ROR #%u\n", nm, nCC(INSN_COND), rD, rM, rot);
16307         goto decode_success;
16308      }
16309      /* fall through */
16310   }
16311
16312   /* ------------------- bfi, bfc ------------------- */
16313   if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
16314       && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
16315      UInt rD  = INSN(15,12);
16316      UInt rN  = INSN(3,0);
16317      UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
16318      UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
16319      if (rD == 15 || msb < lsb) {
16320         /* undecodable; fall through */
16321      } else {
16322         IRTemp src    = newTemp(Ity_I32);
16323         IRTemp olddst = newTemp(Ity_I32);
16324         IRTemp newdst = newTemp(Ity_I32);
16325         UInt   mask = 1 << (msb - lsb);
16326         mask = (mask - 1) + mask;
16327         vassert(mask != 0); // guaranteed by "msb < lsb" check above
16328         mask <<= lsb;
16329
16330         assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
16331         assign(olddst, getIRegA(rD));
16332         assign(newdst,
16333                binop(Iop_Or32,
16334                   binop(Iop_And32,
16335                         binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
16336                         mkU32(mask)),
16337                   binop(Iop_And32,
16338                         mkexpr(olddst),
16339                         mkU32(~mask)))
16340               );
16341
16342         putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
16343
16344         if (rN == 15) {
16345            DIP("bfc%s r%u, #%u, #%u\n",
16346                nCC(INSN_COND), rD, lsb, msb-lsb+1);
16347         } else {
16348            DIP("bfi%s r%u, r%u, #%u, #%u\n",
16349                nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
16350         }
16351         goto decode_success;
16352      }
16353      /* fall through */
16354   }
16355
16356   /* ------------------- {u,s}bfx ------------------- */
16357   if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
16358       && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
16359      UInt rD  = INSN(15,12);
16360      UInt rN  = INSN(3,0);
16361      UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
16362      UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
16363      UInt msb = lsb + wm1;
16364      UInt isU = (insn >> 22) & 1;    /* 22:22 */
16365      if (rD == 15 || rN == 15 || msb >= 32) {
16366         /* undecodable; fall through */
16367      } else {
16368         IRTemp src  = newTemp(Ity_I32);
16369         IRTemp tmp  = newTemp(Ity_I32);
16370         IRTemp res  = newTemp(Ity_I32);
16371         UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
16372         vassert(msb >= 0 && msb <= 31);
16373         vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
16374
16375         assign(src, getIRegA(rN));
16376         assign(tmp, binop(Iop_And32,
16377                           binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
16378                           mkU32(mask)));
16379         assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
16380                           binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
16381                           mkU8(31-wm1)));
16382
16383         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16384
16385         DIP("%s%s r%u, r%u, #%u, #%u\n",
16386             isU ? "ubfx" : "sbfx",
16387             nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
16388         goto decode_success;
16389      }
16390      /* fall through */
16391   }
16392
16393   /* --------------------- Load/store doubleword ------------- */
16394   // LDRD STRD
16395   /*                 31   27   23   19 15 11   7    3     # highest bit
16396                        28   24   20 16 12    8    4    0
16397      A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
16398      A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
16399      A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
16400      A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
16401      A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
16402      A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
16403   */
16404   /* case coding:
16405             1   at-ea               (access at ea)
16406             2   at-ea-then-upd      (access at ea, then Rn = ea)
16407             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16408      ea coding
16409             16  Rn +/- imm8
16410             32  Rn +/- Rm
16411   */
16412   /* Quickly skip over all of this for hopefully most instructions */
16413   if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
16414      goto after_load_store_doubleword;
16415
16416   /* Check the "11S1" thing. */
16417   if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
16418      goto after_load_store_doubleword;
16419
16420   summary = 0;
16421
16422   /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
16423      summary = 1 | 16;
16424   }
16425   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
16426      summary = 1 | 32;
16427   }
16428   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
16429      summary = 2 | 16;
16430   }
16431   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
16432      summary = 2 | 32;
16433   }
16434   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
16435      summary = 3 | 16;
16436   }
16437   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
16438      summary = 3 | 32;
16439   }
16440   else goto after_load_store_doubleword;
16441
16442   { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
16443     UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
16444     UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
16445     UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
16446     UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
16447     UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
16448
16449     /* Require rD to be an even numbered register */
16450     if ((rD & 1) != 0)
16451        goto after_load_store_doubleword;
16452
16453     /* Require 11:8 == 0 for Rn +/- Rm cases */
16454     if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
16455        goto after_load_store_doubleword;
16456
16457     /* Skip some invalid cases, which would lead to two competing
16458        updates to the same register, or which are otherwise
16459        disallowed by the spec. */
16460     switch (summary) {
16461        case 1 | 16:
16462           break;
16463        case 1 | 32:
16464           if (rM == 15) goto after_load_store_doubleword;
16465           break;
16466        case 2 | 16: case 3 | 16:
16467           if (rN == 15) goto after_load_store_doubleword;
16468           if (bS == 0 && (rN == rD || rN == rD+1))
16469              goto after_load_store_doubleword;
16470           break;
16471        case 2 | 32: case 3 | 32:
16472           if (rM == 15) goto after_load_store_doubleword;
16473           if (rN == 15) goto after_load_store_doubleword;
16474           if (rN == rM) goto after_load_store_doubleword;
16475           if (bS == 0 && (rN == rD || rN == rD+1))
16476              goto after_load_store_doubleword;
16477           break;
16478        default:
16479           vassert(0);
16480     }
16481
16482     /* If this is a branch, make it unconditional at this point.
16483        Doing conditional branches in-line is too complex (for
16484        now). */
16485     vassert((rD & 1) == 0); /* from tests above */
16486     if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
16487        // go uncond
16488        mk_skip_over_A32_if_cond_is_false( condT );
16489        condT = IRTemp_INVALID;
16490        // now uncond
16491     }
16492
16493     /* compute the effective address.  Bind it to a tmp since we
16494        may need to use it twice. */
16495     IRExpr* eaE = NULL;
16496     switch (summary & 0xF0) {
16497        case 16:
16498           eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
16499           break;
16500        case 32:
16501           eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
16502           break;
16503     }
16504     vassert(eaE);
16505     IRTemp eaT = newTemp(Ity_I32);
16506     assign(eaT, eaE);
16507
16508     /* get the old Rn value */
16509     IRTemp rnT = newTemp(Ity_I32);
16510     assign(rnT, getIRegA(rN));
16511
16512     /* decide on the transfer address */
16513     IRTemp taT = IRTemp_INVALID;
16514     switch (summary & 0x0F) {
16515        case 1: case 2: taT = eaT; break;
16516        case 3:         taT = rnT; break;
16517     }
16518     vassert(taT != IRTemp_INVALID);
16519
16520     /* XXX deal with alignment constraints */
16521     /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
16522        ignore alignment issues for the time being. */
16523
16524     /* For almost all cases, we do the writeback after the transfers.
16525        However, that leaves the stack "uncovered" in this case:
16526           strd    rD, [sp, #-8]
16527        In which case, do the writeback to SP now, instead of later.
16528        This is bad in that it makes the insn non-restartable if the
16529        accesses fault, but at least keeps Memcheck happy. */
16530     Bool writeback_already_done = False;
16531     if (bS == 1 /*store*/ && summary == (2 | 16)
16532         && rN == 13 && rN != rD && rN != rD+1
16533         && bU == 0/*minus*/) {
16534        putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16535        writeback_already_done = True;
16536     }
16537
16538     /* doubleword store  S 1
16539        doubleword load   S 0
16540     */
16541     const HChar* name = NULL;
16542     /* generate the transfers */
16543     if (bS == 1) { // doubleword store
16544        storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
16545                        getIRegA(rD+0), condT );
16546        storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
16547                        getIRegA(rD+1), condT );
16548        name = "strd";
16549     } else { // doubleword load
16550        IRTemp oldRd0 = newTemp(Ity_I32);
16551        IRTemp oldRd1 = newTemp(Ity_I32);
16552        assign(oldRd0, llGetIReg(rD+0));
16553        assign(oldRd1, llGetIReg(rD+1));
16554        IRTemp newRd0 = newTemp(Ity_I32);
16555        IRTemp newRd1 = newTemp(Ity_I32);
16556        loadGuardedLE( newRd0, ILGop_Ident32,
16557                       binop(Iop_Add32, mkexpr(taT), mkU32(0)),
16558                       mkexpr(oldRd0), condT );
16559        putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
16560        loadGuardedLE( newRd1, ILGop_Ident32,
16561                       binop(Iop_Add32, mkexpr(taT), mkU32(4)),
16562                       mkexpr(oldRd1), condT );
16563        putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
16564        name = "ldrd";
16565     }
16566
16567     /* Update Rn if necessary. */
16568     switch (summary & 0x0F) {
16569        case 2: case 3:
16570           // should be assured by logic above:
16571           vassert(rN != 15); /* from checks above */
16572           if (bS == 0) {
16573              vassert(rD+0 != rN); /* since we just wrote rD+0 */
16574              vassert(rD+1 != rN); /* since we just wrote rD+1 */
16575           }
16576           if (!writeback_already_done)
16577              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16578           break;
16579     }
16580
16581     switch (summary & 0x0F) {
16582        case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
16583                 break;
16584        case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16585                     name, nCC(INSN_COND), rD, dis_buf);
16586                 break;
16587        case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16588                     name, nCC(INSN_COND), rD, dis_buf);
16589                 break;
16590        default: vassert(0);
16591     }
16592
16593     goto decode_success;
16594   }
16595
16596  after_load_store_doubleword:
16597
16598   /* ------------------- {s,u}xtab ------------- */
16599   if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
16600       && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
16601       && BITS4(0,1,1,1) == INSN(7,4)) {
16602      UInt rN  = INSN(19,16);
16603      UInt rD  = INSN(15,12);
16604      UInt rM  = INSN(3,0);
16605      UInt rot = (insn >> 10) & 3;
16606      UInt isU = INSN(22,22);
16607      if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
16608         /* undecodable; fall through */
16609      } else {
16610         IRTemp srcL = newTemp(Ity_I32);
16611         IRTemp srcR = newTemp(Ity_I32);
16612         IRTemp res  = newTemp(Ity_I32);
16613         assign(srcR, getIRegA(rM));
16614         assign(srcL, getIRegA(rN));
16615         assign(res,  binop(Iop_Add32,
16616                            mkexpr(srcL),
16617                            unop(isU ? Iop_8Uto32 : Iop_8Sto32,
16618                                 unop(Iop_32to8,
16619                                      genROR32(srcR, 8 * rot)))));
16620         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16621         DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
16622             isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
16623         goto decode_success;
16624      }
16625      /* fall through */
16626   }
16627
16628   /* ------------------- {s,u}xtah ------------- */
16629   if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
16630       && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
16631       && BITS4(0,1,1,1) == INSN(7,4)) {
16632      UInt rN  = INSN(19,16);
16633      UInt rD  = INSN(15,12);
16634      UInt rM  = INSN(3,0);
16635      UInt rot = (insn >> 10) & 3;
16636      UInt isU = INSN(22,22);
16637      if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
16638         /* undecodable; fall through */
16639      } else {
16640         IRTemp srcL = newTemp(Ity_I32);
16641         IRTemp srcR = newTemp(Ity_I32);
16642         IRTemp res  = newTemp(Ity_I32);
16643         assign(srcR, getIRegA(rM));
16644         assign(srcL, getIRegA(rN));
16645         assign(res,  binop(Iop_Add32,
16646                            mkexpr(srcL),
16647                            unop(isU ? Iop_16Uto32 : Iop_16Sto32,
16648                                 unop(Iop_32to16,
16649                                      genROR32(srcR, 8 * rot)))));
16650         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16651
16652         DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
16653             isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
16654         goto decode_success;
16655      }
16656      /* fall through */
16657   }
16658
16659   /* ------------------- rev16, rev ------------------ */
16660   if (INSN(27,16) == 0x6BF
16661       && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
16662      Bool isREV = INSN(11,4) == 0xF3;
16663      UInt rM    = INSN(3,0);
16664      UInt rD    = INSN(15,12);
16665      if (rM != 15 && rD != 15) {
16666         IRTemp rMt = newTemp(Ity_I32);
16667         assign(rMt, getIRegA(rM));
16668         IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
16669         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16670         DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
16671             nCC(INSN_COND), rD, rM);
16672         goto decode_success;
16673      }
16674   }
16675
16676   /* ------------------- revsh ----------------------- */
16677   if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
16678      UInt rM = INSN(3,0);
16679      UInt rD = INSN(15,12);
16680      if (rM != 15 && rD != 15) {
16681         IRTemp irt_rM  = newTemp(Ity_I32);
16682         IRTemp irt_hi  = newTemp(Ity_I32);
16683         IRTemp irt_low = newTemp(Ity_I32);
16684         IRTemp irt_res = newTemp(Ity_I32);
16685         assign(irt_rM, getIRegA(rM));
16686         assign(irt_hi,
16687                binop(Iop_Sar32,
16688                      binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
16689                      mkU8(16)
16690                )
16691         );
16692         assign(irt_low,
16693                binop(Iop_And32,
16694                      binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
16695                      mkU32(0xFF)
16696                )
16697         );
16698         assign(irt_res,
16699                binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
16700         );
16701         putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
16702         DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
16703         goto decode_success;
16704      }
16705   }
16706
16707   /* ------------------- rbit ------------------ */
16708   if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
16709      UInt rD = INSN(15,12);
16710      UInt rM = INSN(3,0);
16711      if (rD != 15 && rM != 15) {
16712         IRTemp arg = newTemp(Ity_I32);
16713         assign(arg, getIRegA(rM));
16714         IRTemp res = gen_BITREV(arg);
16715         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16716         DIP("rbit r%u, r%u\n", rD, rM);
16717         goto decode_success;
16718      }
16719   }
16720
16721   /* ------------------- smmul ------------------ */
16722   if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
16723       && INSN(15,12) == BITS4(1,1,1,1)
16724       && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
16725      UInt bitR = INSN(5,5);
16726      UInt rD = INSN(19,16);
16727      UInt rM = INSN(11,8);
16728      UInt rN = INSN(3,0);
16729      if (rD != 15 && rM != 15 && rN != 15) {
16730         IRExpr* res
16731         = unop(Iop_64HIto32,
16732                binop(Iop_Add64,
16733                      binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
16734                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
16735         putIRegA(rD, res, condT, Ijk_Boring);
16736         DIP("smmul%s%s r%u, r%u, r%u\n",
16737             nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
16738         goto decode_success;
16739      }
16740   }
16741
16742   /* ------------------- smmla ------------------ */
16743   if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
16744       && INSN(15,12) != BITS4(1,1,1,1)
16745       && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
16746      UInt bitR = INSN(5,5);
16747      UInt rD = INSN(19,16);
16748      UInt rA = INSN(15,12);
16749      UInt rM = INSN(11,8);
16750      UInt rN = INSN(3,0);
16751      if (rD != 15 && rM != 15 && rN != 15) {
16752         IRExpr* res
16753         = unop(Iop_64HIto32,
16754                binop(Iop_Add64,
16755                      binop(Iop_Add64,
16756                            binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
16757                            binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
16758                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
16759         putIRegA(rD, res, condT, Ijk_Boring);
16760         DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
16761             nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
16762         goto decode_success;
16763      }
16764   }
16765
16766   /* ------------------- NOP ------------------ */
16767   if (0x0320F000 == (insn & 0x0FFFFFFF)) {
16768      DIP("nop%s\n", nCC(INSN_COND));
16769      goto decode_success;
16770   }
16771
16772   /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
16773   /* Load Register Unprivileged:
16774      ldrt<c> Rt, [Rn] {, #+/-imm12}
16775   */
16776   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
16777      UInt rT     = INSN(15,12);
16778      UInt rN     = INSN(19,16);
16779      UInt imm12  = INSN(11,0);
16780      UInt bU     = INSN(23,23);
16781      Bool valid  = True;
16782      if (rT == 15 || rN == 15 || rN == rT) valid = False;
16783      if (valid) {
16784         IRTemp newRt = newTemp(Ity_I32);
16785         loadGuardedLE( newRt,
16786                        ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
16787         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16788         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16789                             getIRegA(rN), mkU32(imm12));
16790         putIRegA(rN, erN, condT, Ijk_Boring);
16791         DIP("ldrt%s r%u, [r%u], #%c%u\n",
16792             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
16793         goto decode_success;
16794      }
16795   }
16796
16797   /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
16798   /* Load Register Unprivileged:
16799      ldrt<c> Rt, [Rn], +/-Rm{, shift}
16800   */
16801   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
16802        && INSN(4,4) == 0 ) {
16803      UInt rT     = INSN(15,12);
16804      UInt rN     = INSN(19,16);
16805      UInt rM     = INSN(3,0);
16806      UInt imm5   = INSN(11,7);
16807      UInt bU     = INSN(23,23);
16808      UInt type   = INSN(6,5);
16809      Bool valid  = True;
16810      if (rT == 15 || rN == 15 || rN == rT || rM == 15
16811          /* || (ArchVersion() < 6 && rM == rN) */)
16812         valid = False;
16813      if (valid) {
16814         IRTemp newRt = newTemp(Ity_I32);
16815         loadGuardedLE( newRt,
16816                        ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
16817         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16818         // dis_buf generated is slightly bogus, in fact.
16819         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
16820                                                       type, imm5, dis_buf);
16821         putIRegA(rN, erN, condT, Ijk_Boring);
16822         DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
16823         goto decode_success;
16824      }
16825   }
16826
16827   /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
16828   /* Load Register Byte Unprivileged:
16829      ldrbt<c> Rt, [Rn], #+/-imm12
16830   */
16831   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
16832      UInt rT     = INSN(15,12);
16833      UInt rN     = INSN(19,16);
16834      UInt imm12  = INSN(11,0);
16835      UInt bU     = INSN(23,23);
16836      Bool valid  = True;
16837      if (rT == 15 || rN == 15 || rN == rT) valid = False;
16838      if (valid) {
16839         IRTemp newRt = newTemp(Ity_I32);
16840         loadGuardedLE( newRt,
16841                        ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
16842         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16843         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16844                             getIRegA(rN), mkU32(imm12));
16845         putIRegA(rN, erN, condT, Ijk_Boring);
16846         DIP("ldrbt%s r%u, [r%u], #%c%u\n",
16847             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
16848         goto decode_success;
16849      }
16850   }
16851
16852   /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
16853   /* Load Register Byte Unprivileged:
16854      ldrbt<c> Rt, [Rn], +/-Rm{, shift}
16855   */
16856   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
16857        && INSN(4,4) == 0 ) {
16858      UInt rT     = INSN(15,12);
16859      UInt rN     = INSN(19,16);
16860      UInt rM     = INSN(3,0);
16861      UInt imm5   = INSN(11,7);
16862      UInt bU     = INSN(23,23);
16863      UInt type   = INSN(6,5);
16864      Bool valid  = True;
16865      if (rT == 15 || rN == 15 || rN == rT || rM == 15
16866          /* || (ArchVersion() < 6 && rM == rN) */)
16867         valid = False;
16868      if (valid) {
16869         IRTemp newRt = newTemp(Ity_I32);
16870         loadGuardedLE( newRt,
16871                        ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
16872         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16873         // dis_buf generated is slightly bogus, in fact.
16874         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
16875                                                       type, imm5, dis_buf);
16876         putIRegA(rN, erN, condT, Ijk_Boring);
16877         DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
16878         goto decode_success;
16879      }
16880   }
16881
16882   /* -------------- (A1) LDRHT reg+#imm8 -------------- */
16883   /* Load Register Halfword Unprivileged:
16884      ldrht<c> Rt, [Rn] {, #+/-imm8}
16885   */
16886   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
16887       && INSN(7,4) == BITS4(1,0,1,1) ) {
16888      UInt rT    = INSN(15,12);
16889      UInt rN    = INSN(19,16);
16890      UInt bU    = INSN(23,23);
16891      UInt imm4H = INSN(11,8);
16892      UInt imm4L = INSN(3,0);
16893      UInt imm8  = (imm4H << 4) | imm4L;
16894      Bool valid = True;
16895      if (rT == 15 || rN == 15 || rN == rT)
16896         valid = False;
16897      if (valid) {
16898         IRTemp newRt = newTemp(Ity_I32);
16899         loadGuardedLE( newRt,
16900                        ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
16901         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16902         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16903                             getIRegA(rN), mkU32(imm8));
16904         putIRegA(rN, erN, condT, Ijk_Boring);
16905         DIP("ldrht%s r%u, [r%u], #%c%u\n",
16906             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
16907         goto decode_success;
16908      }
16909   }
16910
16911   /* -------------- (A2) LDRHT reg+/-reg -------------- */
16912   /* Load Register Halfword Unprivileged:
16913      ldrht<c> Rt, [Rn], +/-Rm
16914   */
16915   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
16916       && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
16917      UInt rT    = INSN(15,12);
16918      UInt rN    = INSN(19,16);
16919      UInt rM    = INSN(3,0);
16920      UInt bU    = INSN(23,23);
16921      Bool valid = True;
16922      if (rT == 15 || rN == 15 || rN == rT || rM == 15)
16923         valid = False;
16924      if (valid) {
16925         IRTemp newRt = newTemp(Ity_I32);
16926         loadGuardedLE( newRt,
16927                        ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
16928         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16929         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16930                             getIRegA(rN), getIRegA(rM));
16931         putIRegA(rN, erN, condT, Ijk_Boring);
16932         DIP("ldrht%s r%u, [r%u], %cr%u\n",
16933             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
16934         goto decode_success;
16935      }
16936   }
16937
16938   /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
16939   /* Load Register Signed Halfword Unprivileged:
16940      ldrsht<c> Rt, [Rn] {, #+/-imm8}
16941   */
16942   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
16943       && INSN(7,4) == BITS4(1,1,1,1)) {
16944      UInt rT    = INSN(15,12);
16945      UInt rN    = INSN(19,16);
16946      UInt bU    = INSN(23,23);
16947      UInt imm4H = INSN(11,8);
16948      UInt imm4L = INSN(3,0);
16949      UInt imm8  = (imm4H << 4) | imm4L;
16950      Bool valid = True;
16951      if (rN == 15 || rT == 15 || rN == rT)
16952         valid = False;
16953      if (valid) {
16954         IRTemp newRt = newTemp(Ity_I32);
16955         loadGuardedLE( newRt,
16956                        ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
16957         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16958         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16959                             getIRegA(rN), mkU32(imm8));
16960         putIRegA(rN, erN, condT, Ijk_Boring);
16961         DIP("ldrsht%s r%u, [r%u], #%c%u\n",
16962             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
16963         goto decode_success;
16964      }
16965   }
16966
16967   /* -------------- (A2) LDRSHT reg+/-reg -------------- */
16968   /* Load Register Signed Halfword Unprivileged:
16969      ldrsht<c> Rt, [Rn], +/-Rm
16970   */
16971   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
16972       && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
16973      UInt rT    = INSN(15,12);
16974      UInt rN    = INSN(19,16);
16975      UInt rM    = INSN(3,0);
16976      UInt bU    = INSN(23,23);
16977      Bool valid = True;
16978      if (rN == 15 || rT == 15 || rN == rT || rM == 15)
16979         valid = False;
16980      if (valid) {
16981         IRTemp newRt = newTemp(Ity_I32);
16982         loadGuardedLE( newRt,
16983                        ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
16984         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16985         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16986                             getIRegA(rN), getIRegA(rM));
16987         putIRegA(rN, erN, condT, Ijk_Boring);
16988         DIP("ldrsht%s r%u, [r%u], %cr%u\n",
16989             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
16990         goto decode_success;
16991      }
16992   }
16993
16994   /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
16995   /* Load Register Signed Byte Unprivileged:
16996      ldrsbt<c> Rt, [Rn] {, #+/-imm8}
16997   */
16998   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
16999       && INSN(7,4) == BITS4(1,1,0,1)) {
17000      UInt rT    = INSN(15,12);
17001      UInt rN    = INSN(19,16);
17002      UInt bU    = INSN(23,23);
17003      UInt imm4H = INSN(11,8);
17004      UInt imm4L = INSN(3,0);
17005      UInt imm8  = (imm4H << 4) | imm4L;
17006      Bool valid = True;
17007      if (rT == 15 || rN == 15 || rN == rT)
17008         valid = False;
17009      if (valid) {
17010         IRTemp newRt = newTemp(Ity_I32);
17011         loadGuardedLE( newRt,
17012                        ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
17013         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
17014         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
17015                             getIRegA(rN), mkU32(imm8));
17016         putIRegA(rN, erN, condT, Ijk_Boring);
17017         DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
17018             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
17019         goto decode_success;
17020      }
17021   }
17022
17023   /* -------------- (A2) LDRSBT reg+/-reg -------------- */
17024   /* Load Register Signed Byte Unprivileged:
17025      ldrsbt<c> Rt, [Rn], +/-Rm
17026   */
17027   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
17028       && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
17029      UInt rT    = INSN(15,12);
17030      UInt rN    = INSN(19,16);
17031      UInt bU    = INSN(23,23);
17032      UInt rM    = INSN(3,0);
17033      Bool valid = True;
17034      if (rT == 15 || rN == 15 || rN == rT || rM == 15)
17035         valid = False;
17036      if (valid) {
17037         IRTemp newRt = newTemp(Ity_I32);
17038         loadGuardedLE( newRt,
17039                        ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
17040         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
17041         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
17042                             getIRegA(rN), getIRegA(rM));
17043         putIRegA(rN, erN, condT, Ijk_Boring);
17044         DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
17045             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
17046         goto decode_success;
17047      }
17048   }
17049
17050   /* -------------- (A1) STRBT reg+#imm12 -------------- */
17051   /* Store Register Byte Unprivileged:
17052      strbt<c> Rt, [Rn], #+/-imm12
17053   */
17054   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
17055      UInt rT     = INSN(15,12);
17056      UInt rN     = INSN(19,16);
17057      UInt imm12  = INSN(11,0);
17058      UInt bU     = INSN(23,23);
17059      Bool valid = True;
17060      if (rT == 15 || rN == 15 || rN == rT) valid = False;
17061      if (valid) {
17062         IRExpr* address = getIRegA(rN);
17063         IRExpr* data = unop(Iop_32to8, getIRegA(rT));
17064         storeGuardedLE( address, data, condT);
17065         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17066                               getIRegA(rN), mkU32(imm12));
17067         putIRegA(rN, newRn, condT, Ijk_Boring);
17068         DIP("strbt%s r%u, [r%u], #%c%u\n",
17069             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
17070         goto decode_success;
17071      }
17072   }
17073
17074   /* -------------- (A2) STRBT reg+/-reg -------------- */
17075   /* Store Register Byte Unprivileged:
17076      strbt<c> Rt, [Rn], +/-Rm{, shift}
17077   */
17078   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
17079       && INSN(4,4) == 0) {
17080      UInt rT     = INSN(15,12);
17081      UInt rN     = INSN(19,16);
17082      UInt imm5   = INSN(11,7);
17083      UInt type   = INSN(6,5);
17084      UInt rM     = INSN(3,0);
17085      UInt bU     = INSN(23,23);
17086      Bool valid  = True;
17087      if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
17088      if (valid) {
17089         IRExpr* address = getIRegA(rN);
17090         IRExpr* data = unop(Iop_32to8, getIRegA(rT));
17091         storeGuardedLE( address, data, condT);
17092         // dis_buf generated is slightly bogus, in fact.
17093         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
17094                                                       type, imm5, dis_buf);
17095         putIRegA(rN, erN, condT, Ijk_Boring);
17096         DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
17097         goto decode_success;
17098      }
17099   }
17100
17101   /* -------------- (A1) STRHT reg+#imm8 -------------- */
17102   /* Store Register Halfword Unprivileged:
17103      strht<c> Rt, [Rn], #+/-imm8
17104   */
17105   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
17106       && INSN(7,4) == BITS4(1,0,1,1) ) {
17107      UInt rT    = INSN(15,12);
17108      UInt rN    = INSN(19,16);
17109      UInt imm4H = INSN(11,8);
17110      UInt imm4L = INSN(3,0);
17111      UInt imm8  = (imm4H << 4) | imm4L;
17112      UInt bU    = INSN(23,23);
17113      Bool valid = True;
17114      if (rT == 15 || rN == 15 || rN == rT) valid = False;
17115      if (valid) {
17116         IRExpr* address = getIRegA(rN);
17117         IRExpr* data = unop(Iop_32to16, getIRegA(rT));
17118         storeGuardedLE( address, data, condT);
17119         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17120                               getIRegA(rN), mkU32(imm8));
17121         putIRegA(rN, newRn, condT, Ijk_Boring);
17122         DIP("strht%s r%u, [r%u], #%c%u\n",
17123             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
17124         goto decode_success;
17125      }
17126   }
17127
17128   /* -------------- (A2) STRHT reg+reg -------------- */
17129   /* Store Register Halfword Unprivileged:
17130      strht<c> Rt, [Rn], +/-Rm
17131   */
17132   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
17133       && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
17134      UInt rT    = INSN(15,12);
17135      UInt rN    = INSN(19,16);
17136      UInt rM    = INSN(3,0);
17137      UInt bU    = INSN(23,23);
17138      Bool valid = True;
17139      if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
17140      if (valid) {
17141         IRExpr* address = getIRegA(rN);
17142         IRExpr* data = unop(Iop_32to16, getIRegA(rT));
17143         storeGuardedLE( address, data, condT);
17144         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17145                               getIRegA(rN), getIRegA(rM));
17146         putIRegA(rN, newRn, condT, Ijk_Boring);
17147         DIP("strht%s r%u, [r%u], %cr%u\n",
17148             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
17149         goto decode_success;
17150      }
17151   }
17152
17153   /* -------------- (A1) STRT reg+imm12 -------------- */
17154   /* Store Register Unprivileged:
17155      strt<c> Rt, [Rn], #+/-imm12
17156   */
17157   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
17158      UInt rT    = INSN(15,12);
17159      UInt rN    = INSN(19,16);
17160      UInt imm12 = INSN(11,0);
17161      UInt bU    = INSN(23,23);
17162      Bool valid = True;
17163      if (rN == 15 || rN == rT) valid = False;
17164      if (valid) {
17165         IRExpr* address = getIRegA(rN);
17166         storeGuardedLE( address, getIRegA(rT), condT);
17167         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17168                               getIRegA(rN), mkU32(imm12));
17169         putIRegA(rN, newRn, condT, Ijk_Boring);
17170         DIP("strt%s r%u, [r%u], %c%u\n",
17171             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
17172         goto decode_success;
17173      }
17174   }
17175
17176   /* -------------- (A2) STRT reg+reg -------------- */
17177   /* Store Register Unprivileged:
17178      strt<c> Rt, [Rn], +/-Rm{, shift}
17179   */
17180   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
17181       && INSN(4,4) == 0 ) {
17182      UInt rT    = INSN(15,12);
17183      UInt rN    = INSN(19,16);
17184      UInt rM    = INSN(3,0);
17185      UInt type  = INSN(6,5);
17186      UInt imm5  = INSN(11,7);
17187      UInt bU    = INSN(23,23);
17188      Bool valid = True;
17189      if (rN == 15 || rN == rT || rM == 15) valid = False;
17190      /* FIXME We didn't do:
17191         if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
17192      if (valid) {
17193         storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
17194         // dis_buf generated is slightly bogus, in fact.
17195         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
17196                                                       type, imm5, dis_buf);
17197         putIRegA(rN, erN, condT, Ijk_Boring);
17198         DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
17199         goto decode_success;
17200      }
17201   }
17202
17203   /* ----------------------------------------------------------- */
17204   /* -- ARMv7 instructions                                    -- */
17205   /* ----------------------------------------------------------- */
17206
17207   /* -------------- read CP15 TPIDRURO register ------------- */
17208   /* mrc     p15, 0, r0, c13, c0, 3  up to
17209      mrc     p15, 0, r14, c13, c0, 3
17210   */
17211   /* I don't know whether this is really v7-only.  But anyway, we
17212      have to support it since arm-linux uses TPIDRURO as a thread
17213      state register. */
17214   if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
17215      UInt rD = INSN(15,12);
17216      if (rD <= 14) {
17217         /* skip r15, that's too stupid to handle */
17218         putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
17219                      condT, Ijk_Boring);
17220         DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
17221         goto decode_success;
17222      }
17223      /* fall through */
17224   }
17225
17226   /* Handle various kinds of barriers.  This is rather indiscriminate
17227      in the sense that they are all turned into an IR Fence, which
17228      means we don't know which they are, so the back end has to
17229      re-emit them all when it comes acrosss an IR Fence.
17230   */
17231   /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
17232   if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
17233      UInt rT = INSN(15,12);
17234      if (rT <= 14) {
17235         /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
17236            Memory Barrier -- ensures ordering of memory accesses. */
17237         stmt( IRStmt_MBE(Imbe_Fence) );
17238         DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
17239         goto decode_success;
17240      }
17241      /* fall through */
17242   }
17243   /* other flavours of barrier */
17244   switch (insn) {
17245      case 0xEE070F9A: /* v6 */
17246         /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
17247            Synch Barrier -- ensures completion of memory accesses. */
17248         stmt( IRStmt_MBE(Imbe_Fence) );
17249         DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
17250         goto decode_success;
17251      case 0xEE070F95: /* v6 */
17252         /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
17253            Instruction Synchronisation Barrier (or Flush Prefetch
17254            Buffer) -- a pipe flush, I think.  I suspect we could
17255            ignore those, but to be on the safe side emit a fence
17256            anyway. */
17257         stmt( IRStmt_MBE(Imbe_Fence) );
17258         DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
17259         goto decode_success;
17260      default:
17261         break;
17262   }
17263
17264   /* ----------------------------------------------------------- */
17265   /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
17266   /* ----------------------------------------------------------- */
17267
17268   if (INSN_COND != ARMCondNV) {
17269      Bool ok_vfp = decode_CP10_CP11_instruction (
17270                       &dres, INSN(27,0), condT, INSN_COND,
17271                       False/*!isT*/
17272                    );
17273      if (ok_vfp)
17274         goto decode_success;
17275   }
17276
17277   /* ----------------------------------------------------------- */
17278   /* -- NEON instructions (in ARM mode)                       -- */
17279   /* ----------------------------------------------------------- */
17280
17281   /* These are all in NV space, and so are taken care of (far) above,
17282      by a call from this function to decode_NV_instruction(). */
17283
17284   /* ----------------------------------------------------------- */
17285   /* -- v6 media instructions (in ARM mode)                   -- */
17286   /* ----------------------------------------------------------- */
17287
17288   { Bool ok_v6m = decode_V6MEDIA_instruction(
17289                       &dres, INSN(27,0), condT, INSN_COND,
17290                       False/*!isT*/
17291                   );
17292     if (ok_v6m)
17293        goto decode_success;
17294   }
17295
17296   /* ----------------------------------------------------------- */
17297   /* -- Undecodable                                           -- */
17298   /* ----------------------------------------------------------- */
17299
17300   goto decode_failure;
17301   /*NOTREACHED*/
17302
17303  decode_failure:
17304   /* All decode failures end up here. */
17305   if (sigill_diag) {
17306      vex_printf("disInstr(arm): unhandled instruction: "
17307                 "0x%x\n", insn);
17308      vex_printf("                 cond=%d(0x%x) 27:20=%u(0x%02x) "
17309                                   "4:4=%d "
17310                                   "3:0=%u(0x%x)\n",
17311                 (Int)INSN_COND, (UInt)INSN_COND,
17312                 (Int)INSN(27,20), (UInt)INSN(27,20),
17313                 (Int)INSN(4,4),
17314                 (Int)INSN(3,0), (UInt)INSN(3,0) );
17315   }
17316
17317   /* Tell the dispatcher that this insn cannot be decoded, and so has
17318      not been executed, and (is currently) the next to be executed.
17319      R15 should be up-to-date since it made so at the start of each
17320      insn, but nevertheless be paranoid and update it again right
17321      now. */
17322   vassert(0 == (guest_R15_curr_instr_notENC & 3));
17323   llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
17324   dres.len         = 0;
17325   dres.whatNext    = Dis_StopHere;
17326   dres.jk_StopHere = Ijk_NoDecode;
17327   dres.continueAt  = 0;
17328   return dres;
17329
17330  decode_success:
17331   /* All decode successes end up here. */
17332   DIP("\n");
17333
17334   vassert(dres.len == 4 || dres.len == 20);
17335
17336   /* Now then.  Do we have an implicit jump to r15 to deal with? */
17337   if (r15written) {
17338      /* If we get jump to deal with, we assume that there's been no
17339         other competing branch stuff previously generated for this
17340         insn.  That's reasonable, in the sense that the ARM insn set
17341         appears to declare as "Unpredictable" any instruction which
17342         generates more than one possible new value for r15.  Hence
17343         just assert.  The decoders themselves should check against
17344         all such instructions which are thusly Unpredictable, and
17345         decline to decode them.  Hence we should never get here if we
17346         have competing new values for r15, and hence it is safe to
17347         assert here. */
17348      vassert(dres.whatNext == Dis_Continue);
17349      vassert(irsb->next == NULL);
17350      vassert(irsb->jumpkind == Ijk_Boring);
17351      /* If r15 is unconditionally written, terminate the block by
17352         jumping to it.  If it's conditionally written, still
17353         terminate the block (a shame, but we can't do side exits to
17354         arbitrary destinations), but first jump to the next
17355         instruction if the condition doesn't hold. */
17356      /* We can't use getIReg(15) to get the destination, since that
17357         will produce r15+8, which isn't what we want.  Must use
17358         llGetIReg(15) instead. */
17359      if (r15guard == IRTemp_INVALID) {
17360         /* unconditional */
17361      } else {
17362         /* conditional */
17363         stmt( IRStmt_Exit(
17364                  unop(Iop_32to1,
17365                       binop(Iop_Xor32,
17366                             mkexpr(r15guard), mkU32(1))),
17367                  r15kind,
17368                  IRConst_U32(guest_R15_curr_instr_notENC + 4),
17369                  OFFB_R15T
17370         ));
17371      }
17372      /* This seems crazy, but we're required to finish the insn with
17373         a write to the guest PC.  As usual we rely on ir_opt to tidy
17374         up later. */
17375      llPutIReg(15, llGetIReg(15));
17376      dres.whatNext    = Dis_StopHere;
17377      dres.jk_StopHere = r15kind;
17378   } else {
17379      /* Set up the end-state in the normal way. */
17380      switch (dres.whatNext) {
17381         case Dis_Continue:
17382            llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
17383            break;
17384         case Dis_ResteerU:
17385         case Dis_ResteerC:
17386            llPutIReg(15, mkU32(dres.continueAt));
17387            break;
17388         case Dis_StopHere:
17389            break;
17390         default:
17391            vassert(0);
17392      }
17393   }
17394
17395   return dres;
17396
17397#  undef INSN_COND
17398#  undef INSN
17399}
17400
17401
17402/*------------------------------------------------------------*/
17403/*--- Disassemble a single Thumb2 instruction              ---*/
17404/*------------------------------------------------------------*/
17405
17406static const UChar it_length_table[256]; /* fwds */
17407
17408/* NB: in Thumb mode we do fetches of regs with getIRegT, which
17409   automagically adds 4 to fetches of r15.  However, writes to regs
17410   are done with putIRegT, which disallows writes to r15.  Hence any
17411   r15 writes and associated jumps have to be done "by hand". */
17412
17413/* Disassemble a single Thumb instruction into IR.  The instruction is
17414   located in host memory at guest_instr, and has (decoded) guest IP
17415   of guest_R15_curr_instr_notENC, which will have been set before the
17416   call here. */
17417
17418static
17419DisResult disInstr_THUMB_WRK (
17420             Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
17421             Bool         resteerCisOk,
17422             void*        callback_opaque,
17423             const UChar* guest_instr,
17424             const VexArchInfo* archinfo,
17425             const VexAbiInfo*  abiinfo,
17426             Bool         sigill_diag
17427          )
17428{
17429   /* A macro to fish bits out of insn0.  There's also INSN1, to fish
17430      bits out of insn1, but that's defined only after the end of the
17431      16-bit insn decoder, so as to stop it mistakenly being used
17432      therein. */
17433#  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
17434
17435   DisResult dres;
17436   UShort    insn0; /*  first 16 bits of the insn */
17437   UShort    insn1; /* second 16 bits of the insn */
17438   //Bool      allow_VFP = False;
17439   //UInt      hwcaps = archinfo->hwcaps;
17440   HChar     dis_buf[128];  // big enough to hold LDMIA etc text
17441
17442   /* Summary result of the ITxxx backwards analysis: False == safe
17443      but suboptimal. */
17444   Bool guaranteedUnconditional = False;
17445
17446   /* What insn variants are we supporting today? */
17447   //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
17448   // etc etc
17449
17450   /* Set result defaults. */
17451   dres.whatNext    = Dis_Continue;
17452   dres.len         = 2;
17453   dres.continueAt  = 0;
17454   dres.jk_StopHere = Ijk_INVALID;
17455
17456   /* Set default actions for post-insn handling of writes to r15, if
17457      required. */
17458   r15written = False;
17459   r15guard   = IRTemp_INVALID; /* unconditional */
17460   r15kind    = Ijk_Boring;
17461
17462   /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
17463      this point.  If we need the second 16, get them later.  We can't
17464      get them both out immediately because it risks a fault (very
17465      unlikely, but ..) if the second 16 bits aren't actually
17466      necessary. */
17467   insn0 = getUShortLittleEndianly( guest_instr );
17468   insn1 = 0; /* We'll get it later, once we know we need it. */
17469
17470   /* Similarly, will set this later. */
17471   IRTemp old_itstate = IRTemp_INVALID;
17472
17473   if (0) vex_printf("insn: 0x%x\n", insn0);
17474
17475   DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
17476
17477   vassert(0 == (guest_R15_curr_instr_notENC & 1));
17478
17479   /* ----------------------------------------------------------- */
17480   /* Spot "Special" instructions (see comment at top of file). */
17481   {
17482      const UChar* code = guest_instr;
17483      /* Spot the 16-byte preamble:
17484
17485         ea4f 0cfc  mov.w   ip, ip, ror #3
17486         ea4f 3c7c  mov.w   ip, ip, ror #13
17487         ea4f 7c7c  mov.w   ip, ip, ror #29
17488         ea4f 4cfc  mov.w   ip, ip, ror #19
17489      */
17490      UInt word1 = 0x0CFCEA4F;
17491      UInt word2 = 0x3C7CEA4F;
17492      UInt word3 = 0x7C7CEA4F;
17493      UInt word4 = 0x4CFCEA4F;
17494      if (getUIntLittleEndianly(code+ 0) == word1 &&
17495          getUIntLittleEndianly(code+ 4) == word2 &&
17496          getUIntLittleEndianly(code+ 8) == word3 &&
17497          getUIntLittleEndianly(code+12) == word4) {
17498         /* Got a "Special" instruction preamble.  Which one is it? */
17499         // 0x 0A 0A EA 4A
17500         if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
17501                                               /* orr.w r10,r10,r10 */) {
17502            /* R3 = client_request ( R4 ) */
17503            DIP("r3 = client_request ( %%r4 )\n");
17504            llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
17505            dres.jk_StopHere = Ijk_ClientReq;
17506            dres.whatNext    = Dis_StopHere;
17507            goto decode_success;
17508         }
17509         else
17510         // 0x 0B 0B EA 4B
17511         if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
17512                                               /* orr r11,r11,r11 */) {
17513            /* R3 = guest_NRADDR */
17514            DIP("r3 = guest_NRADDR\n");
17515            dres.len = 20;
17516            llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
17517            goto decode_success;
17518         }
17519         else
17520         // 0x 0C 0C EA 4C
17521         if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
17522                                               /* orr r12,r12,r12 */) {
17523            /*  branch-and-link-to-noredir R4 */
17524            DIP("branch-and-link-to-noredir r4\n");
17525            llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
17526            llPutIReg(15, getIRegT(4));
17527            dres.jk_StopHere = Ijk_NoRedir;
17528            dres.whatNext    = Dis_StopHere;
17529            goto decode_success;
17530         }
17531         else
17532         // 0x 09 09 EA 49
17533         if (getUIntLittleEndianly(code+16) == 0x0909EA49
17534                                               /* orr r9,r9,r9 */) {
17535            /* IR injection */
17536            DIP("IR injection\n");
17537            vex_inject_ir(irsb, Iend_LE);
17538            // Invalidate the current insn. The reason is that the IRop we're
17539            // injecting here can change. In which case the translation has to
17540            // be redone. For ease of handling, we simply invalidate all the
17541            // time.
17542            stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
17543            stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
17544            llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
17545            dres.whatNext    = Dis_StopHere;
17546            dres.jk_StopHere = Ijk_InvalICache;
17547            goto decode_success;
17548         }
17549         /* We don't know what it is.  Set insn0 so decode_failure
17550            can print the insn following the Special-insn preamble. */
17551         insn0 = getUShortLittleEndianly(code+16);
17552         goto decode_failure;
17553         /*NOTREACHED*/
17554      }
17555
17556   }
17557
17558   /* ----------------------------------------------------------- */
17559
17560   /* Main Thumb instruction decoder starts here.  It's a series of
17561      switches which examine ever longer bit sequences at the MSB of
17562      the instruction word, first for 16-bit insns, then for 32-bit
17563      insns. */
17564
17565   /* --- BEGIN ITxxx optimisation analysis --- */
17566   /* This is a crucial optimisation for the ITState boilerplate that
17567      follows.  Examine the 9 halfwords preceding this instruction,
17568      and if we are absolutely sure that none of them constitute an
17569      'it' instruction, then we can be sure that this instruction is
17570      not under the control of any 'it' instruction, and so
17571      guest_ITSTATE must be zero.  So write zero into ITSTATE right
17572      now, so that iropt can fold out almost all of the resulting
17573      junk.
17574
17575      If we aren't sure, we can always safely skip this step.  So be a
17576      bit conservative about it: only poke around in the same page as
17577      this instruction, lest we get a fault from the previous page
17578      that would not otherwise have happened.  The saving grace is
17579      that such skipping is pretty rare -- it only happens,
17580      statistically, 18/4096ths of the time, so is judged unlikely to
17581      be a performance problems.
17582
17583      FIXME: do better.  Take into account the number of insns covered
17584      by any IT insns we find, to rule out cases where an IT clearly
17585      cannot cover this instruction.  This would improve behaviour for
17586      branch targets immediately following an IT-guarded group that is
17587      not of full length.  Eg, (and completely ignoring issues of 16-
17588      vs 32-bit insn length):
17589
17590             ite cond
17591             insn1
17592             insn2
17593      label: insn3
17594             insn4
17595
17596      The 'it' only conditionalises insn1 and insn2.  However, the
17597      current analysis is conservative and considers insn3 and insn4
17598      also possibly guarded.  Hence if 'label:' is the start of a hot
17599      loop we will get a big performance hit.
17600   */
17601   {
17602      /* Summary result of this analysis: False == safe but
17603         suboptimal. */
17604      vassert(guaranteedUnconditional == False);
17605
17606      UInt pc = guest_R15_curr_instr_notENC;
17607      vassert(0 == (pc & 1));
17608
17609      UInt pageoff = pc & 0xFFF;
17610      if (pageoff >= 18) {
17611         /* It's safe to poke about in the 9 halfwords preceding this
17612            insn.  So, have a look at them. */
17613         guaranteedUnconditional = True; /* assume no 'it' insn found,
17614                                            till we do */
17615         UShort* hwp = (UShort*)(HWord)pc;
17616         Int i;
17617         for (i = -1; i >= -9; i--) {
17618            /* We're in the same page.  (True, but commented out due
17619               to expense.) */
17620            /*
17621            vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
17622                      == ( pc & 0xFFFFF000 ) );
17623            */
17624            /* All valid IT instructions must have the form 0xBFxy,
17625               where x can be anything, but y must be nonzero.  Find
17626               the number of insns covered by it (1 .. 4) and check to
17627               see if it can possibly reach up to the instruction in
17628               question.  Some (x,y) combinations mean UNPREDICTABLE,
17629               and the table is constructed to be conservative by
17630               returning 4 for those cases, so the analysis is safe
17631               even if the code uses unpredictable IT instructions (in
17632               which case its authors are nuts, but hey.)  */
17633            UShort hwp_i = hwp[i];
17634            if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
17635               /* might be an 'it' insn. */
17636               /* # guarded insns */
17637               Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
17638               vassert(n_guarded >= 1 && n_guarded <= 4);
17639               if (n_guarded * 2 /* # guarded HWs, worst case */
17640                   > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
17641                   /* -(i+0) also seems to work, even though I think
17642                      it's wrong.  I don't understand that. */
17643                  guaranteedUnconditional = False;
17644               break;
17645            }
17646         }
17647      }
17648   }
17649   /* --- END ITxxx optimisation analysis --- */
17650
17651   /* Generate the guarding condition for this insn, by examining
17652      ITSTATE.  Assign it to condT.  Also, generate new
17653      values for ITSTATE ready for stuffing back into the
17654      guest state, but don't actually do the Put yet, since it will
17655      need to stuffed back in only after the instruction gets to a
17656      point where it is sure to complete.  Mostly we let the code at
17657      decode_success handle this, but in cases where the insn contains
17658      a side exit, we have to update them before the exit. */
17659
17660   /* If the ITxxx optimisation analysis above could not prove that
17661      this instruction is guaranteed unconditional, we insert a
17662      lengthy IR preamble to compute the guarding condition at
17663      runtime.  If it can prove it (which obviously we hope is the
17664      normal case) then we insert a minimal preamble, which is
17665      equivalent to setting guest_ITSTATE to zero and then folding
17666      that through the full preamble (which completely disappears). */
17667
17668   IRTemp condT              = IRTemp_INVALID;
17669   IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
17670
17671   IRTemp new_itstate        = IRTemp_INVALID;
17672   vassert(old_itstate == IRTemp_INVALID);
17673
17674   if (guaranteedUnconditional) {
17675      /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
17676
17677      // ITSTATE = 0 :: I32
17678      IRTemp z32 = newTemp(Ity_I32);
17679      assign(z32, mkU32(0));
17680      put_ITSTATE(z32);
17681
17682      // old_itstate = 0 :: I32
17683      //
17684      // old_itstate = get_ITSTATE();
17685      old_itstate = z32; /* 0 :: I32 */
17686
17687      // new_itstate = old_itstate >> 8
17688      //             = 0 >> 8
17689      //             = 0 :: I32
17690      //
17691      // new_itstate = newTemp(Ity_I32);
17692      // assign(new_itstate,
17693      //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
17694      new_itstate = z32;
17695
17696      // ITSTATE = 0 :: I32(again)
17697      //
17698      // put_ITSTATE(new_itstate);
17699
17700      // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
17701      //        = calc_cond_dyn( xor(0,0xE0) )
17702      //        = calc_cond_dyn ( 0xE0 )
17703      //        = 1 :: I32
17704      // Not that this matters, since the computed value is not used:
17705      // see condT folding below
17706      //
17707      // IRTemp condT1 = newTemp(Ity_I32);
17708      // assign(condT1,
17709      //        mk_armg_calculate_condition_dyn(
17710      //           binop(Iop_Xor32,
17711      //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
17712      //                 mkU32(0xE0))
17713      //       )
17714      // );
17715
17716      // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
17717      //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
17718      //       = 32to8(0) == 0  ? 1  : condT1
17719      //       = 0 == 0  ? 1  : condT1
17720      //       = 1
17721      //
17722      // condT = newTemp(Ity_I32);
17723      // assign(condT, IRExpr_ITE(
17724      //                  unop(Iop_32to8, binop(Iop_And32,
17725      //                                        mkexpr(old_itstate),
17726      //                                        mkU32(0xF0))),
17727      //                  mkexpr(condT1),
17728      //                  mkU32(1))
17729      //       ));
17730      condT = newTemp(Ity_I32);
17731      assign(condT, mkU32(1));
17732
17733      // notInITt = xor32(and32(old_itstate, 1), 1)
17734      //          = xor32(and32(0, 1), 1)
17735      //          = xor32(0, 1)
17736      //          = 1 :: I32
17737      //
17738      // IRTemp notInITt = newTemp(Ity_I32);
17739      // assign(notInITt,
17740      //        binop(Iop_Xor32,
17741      //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
17742      //              mkU32(1)));
17743
17744      // cond_AND_notInIT_T = and32(notInITt, condT)
17745      //                    = and32(1, 1)
17746      //                    = 1
17747      //
17748      // cond_AND_notInIT_T = newTemp(Ity_I32);
17749      // assign(cond_AND_notInIT_T,
17750      //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
17751      cond_AND_notInIT_T = condT; /* 1 :: I32 */
17752
17753      /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
17754   } else {
17755      /* BEGIN { STANDARD PREAMBLE; } */
17756
17757      old_itstate = get_ITSTATE();
17758
17759      new_itstate = newTemp(Ity_I32);
17760      assign(new_itstate,
17761             binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
17762
17763      put_ITSTATE(new_itstate);
17764
17765      /* Same strategy as for ARM insns: generate a condition
17766         temporary at this point (or IRTemp_INVALID, meaning
17767         unconditional).  We leave it to lower-level instruction
17768         decoders to decide whether they can generate straight-line
17769         code, or whether they must generate a side exit before the
17770         instruction.  condT :: Ity_I32 and is always either zero or
17771         one. */
17772      IRTemp condT1 = newTemp(Ity_I32);
17773      assign(condT1,
17774             mk_armg_calculate_condition_dyn(
17775                binop(Iop_Xor32,
17776                      binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
17777                      mkU32(0xE0))
17778            )
17779      );
17780
17781      /* This is a bit complex, but needed to make Memcheck understand
17782         that, if the condition in old_itstate[7:4] denotes AL (that
17783         is, if this instruction is to be executed unconditionally),
17784         then condT does not depend on the results of calling the
17785         helper.
17786
17787         We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
17788         that case set condT directly to 1.  Else we use the results
17789         of the helper.  Since old_itstate is always defined and
17790         because Memcheck does lazy V-bit propagation through ITE,
17791         this will cause condT to always be a defined 1 if the
17792         condition is 'AL'.  From an execution semantics point of view
17793         this is irrelevant since we're merely duplicating part of the
17794         behaviour of the helper.  But it makes it clear to Memcheck,
17795         in this case, that condT does not in fact depend on the
17796         contents of the condition code thunk.  Without it, we get
17797         quite a lot of false errors.
17798
17799         So, just to clarify: from a straight semantics point of view,
17800         we can simply do "assign(condT, mkexpr(condT1))", and the
17801         simulator still runs fine.  It's just that we get loads of
17802         false errors from Memcheck. */
17803      condT = newTemp(Ity_I32);
17804      assign(condT, IRExpr_ITE(
17805                       binop(Iop_CmpNE32, binop(Iop_And32,
17806                                                mkexpr(old_itstate),
17807                                                mkU32(0xF0)),
17808                                          mkU32(0)),
17809                       mkexpr(condT1),
17810                       mkU32(1)
17811            ));
17812
17813      /* Something we don't have in ARM: generate a 0 or 1 value
17814         indicating whether or not we are in an IT block (NB: 0 = in
17815         IT block, 1 = not in IT block).  This is used to gate
17816         condition code updates in 16-bit Thumb instructions. */
17817      IRTemp notInITt = newTemp(Ity_I32);
17818      assign(notInITt,
17819             binop(Iop_Xor32,
17820                   binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
17821                   mkU32(1)));
17822
17823      /* Compute 'condT && notInITt' -- that is, the instruction is
17824         going to execute, and we're not in an IT block.  This is the
17825         gating condition for updating condition codes in 16-bit Thumb
17826         instructions, except for CMP, CMN and TST. */
17827      cond_AND_notInIT_T = newTemp(Ity_I32);
17828      assign(cond_AND_notInIT_T,
17829             binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
17830      /* END { STANDARD PREAMBLE; } */
17831   }
17832
17833
17834   /* At this point:
17835      * ITSTATE has been updated
17836      * condT holds the guarding condition for this instruction (0 or 1),
17837      * notInITt is 1 if we're in "normal" code, 0 if in an IT block
17838      * cond_AND_notInIT_T is the AND of the above two.
17839
17840      If the instruction proper can't trap, then there's nothing else
17841      to do w.r.t. ITSTATE -- just go and and generate IR for the
17842      insn, taking into account the guarding condition.
17843
17844      If, however, the instruction might trap, then we must back up
17845      ITSTATE to the old value, and re-update it after the potentially
17846      trapping IR section.  A trap can happen either via a memory
17847      reference or because we need to throw SIGILL.
17848
17849      If an instruction has a side exit, we need to be sure that any
17850      ITSTATE backup is re-updated before the side exit.
17851   */
17852
17853   /* ----------------------------------------------------------- */
17854   /* --                                                       -- */
17855   /* -- Thumb 16-bit integer instructions                     -- */
17856   /* --                                                       -- */
17857   /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
17858   /* --            not allowed in this section                -- */
17859   /* --                                                       -- */
17860   /* ----------------------------------------------------------- */
17861
17862   /* 16-bit instructions inside an IT block, apart from CMP, CMN and
17863      TST, do not set the condition codes.  Hence we must dynamically
17864      test for this case for every condition code update. */
17865
17866   IROp   anOp   = Iop_INVALID;
17867   const HChar* anOpNm = NULL;
17868
17869   /* ================ 16-bit 15:6 cases ================ */
17870
17871   switch (INSN0(15,6)) {
17872
17873   case 0x10a:   // CMP
17874   case 0x10b: { // CMN
17875      /* ---------------- CMP Rn, Rm ---------------- */
17876      Bool   isCMN = INSN0(15,6) == 0x10b;
17877      UInt   rN    = INSN0(2,0);
17878      UInt   rM    = INSN0(5,3);
17879      IRTemp argL  = newTemp(Ity_I32);
17880      IRTemp argR  = newTemp(Ity_I32);
17881      assign( argL, getIRegT(rN) );
17882      assign( argR, getIRegT(rM) );
17883      /* Update flags regardless of whether in an IT block or not. */
17884      setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
17885                      argL, argR, condT );
17886      DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
17887      goto decode_success;
17888   }
17889
17890   case 0x108: {
17891      /* ---------------- TST Rn, Rm ---------------- */
17892      UInt   rN   = INSN0(2,0);
17893      UInt   rM   = INSN0(5,3);
17894      IRTemp oldC = newTemp(Ity_I32);
17895      IRTemp oldV = newTemp(Ity_I32);
17896      IRTemp res  = newTemp(Ity_I32);
17897      assign( oldC, mk_armg_calculate_flag_c() );
17898      assign( oldV, mk_armg_calculate_flag_v() );
17899      assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
17900      /* Update flags regardless of whether in an IT block or not. */
17901      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
17902      DIP("tst r%u, r%u\n", rN, rM);
17903      goto decode_success;
17904   }
17905
17906   case 0x109: {
17907      /* ---------------- NEGS Rd, Rm ---------------- */
17908      /* Rd = -Rm */
17909      UInt   rM   = INSN0(5,3);
17910      UInt   rD   = INSN0(2,0);
17911      IRTemp arg  = newTemp(Ity_I32);
17912      IRTemp zero = newTemp(Ity_I32);
17913      assign(arg, getIRegT(rM));
17914      assign(zero, mkU32(0));
17915      // rD can never be r15
17916      putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
17917      setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
17918      DIP("negs r%u, r%u\n", rD, rM);
17919      goto decode_success;
17920   }
17921
17922   case 0x10F: {
17923      /* ---------------- MVNS Rd, Rm ---------------- */
17924      /* Rd = ~Rm */
17925      UInt   rM   = INSN0(5,3);
17926      UInt   rD   = INSN0(2,0);
17927      IRTemp oldV = newTemp(Ity_I32);
17928      IRTemp oldC = newTemp(Ity_I32);
17929      IRTemp res  = newTemp(Ity_I32);
17930      assign( oldV, mk_armg_calculate_flag_v() );
17931      assign( oldC, mk_armg_calculate_flag_c() );
17932      assign(res, unop(Iop_Not32, getIRegT(rM)));
17933      // rD can never be r15
17934      putIRegT(rD, mkexpr(res), condT);
17935      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
17936                         cond_AND_notInIT_T );
17937      DIP("mvns r%u, r%u\n", rD, rM);
17938      goto decode_success;
17939   }
17940
17941   case 0x10C:
17942      /* ---------------- ORRS Rd, Rm ---------------- */
17943      anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
17944   case 0x100:
17945      /* ---------------- ANDS Rd, Rm ---------------- */
17946      anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
17947   case 0x101:
17948      /* ---------------- EORS Rd, Rm ---------------- */
17949      anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
17950   case 0x10d:
17951      /* ---------------- MULS Rd, Rm ---------------- */
17952      anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
17953   and_orr_eor_mul: {
17954      /* Rd = Rd `op` Rm */
17955      UInt   rM   = INSN0(5,3);
17956      UInt   rD   = INSN0(2,0);
17957      IRTemp res  = newTemp(Ity_I32);
17958      IRTemp oldV = newTemp(Ity_I32);
17959      IRTemp oldC = newTemp(Ity_I32);
17960      assign( oldV, mk_armg_calculate_flag_v() );
17961      assign( oldC, mk_armg_calculate_flag_c() );
17962      assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
17963      // not safe to read guest state after here
17964      // rD can never be r15
17965      putIRegT(rD, mkexpr(res), condT);
17966      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
17967                         cond_AND_notInIT_T );
17968      DIP("%s r%u, r%u\n", anOpNm, rD, rM);
17969      goto decode_success;
17970   }
17971
17972   case 0x10E: {
17973      /* ---------------- BICS Rd, Rm ---------------- */
17974      /* Rd = Rd & ~Rm */
17975      UInt   rM   = INSN0(5,3);
17976      UInt   rD   = INSN0(2,0);
17977      IRTemp res  = newTemp(Ity_I32);
17978      IRTemp oldV = newTemp(Ity_I32);
17979      IRTemp oldC = newTemp(Ity_I32);
17980      assign( oldV, mk_armg_calculate_flag_v() );
17981      assign( oldC, mk_armg_calculate_flag_c() );
17982      assign( res, binop(Iop_And32, getIRegT(rD),
17983                                    unop(Iop_Not32, getIRegT(rM) )));
17984      // not safe to read guest state after here
17985      // rD can never be r15
17986      putIRegT(rD, mkexpr(res), condT);
17987      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
17988                         cond_AND_notInIT_T );
17989      DIP("bics r%u, r%u\n", rD, rM);
17990      goto decode_success;
17991   }
17992
17993   case 0x105: {
17994      /* ---------------- ADCS Rd, Rm ---------------- */
17995      /* Rd = Rd + Rm + oldC */
17996      UInt   rM   = INSN0(5,3);
17997      UInt   rD   = INSN0(2,0);
17998      IRTemp argL = newTemp(Ity_I32);
17999      IRTemp argR = newTemp(Ity_I32);
18000      IRTemp oldC = newTemp(Ity_I32);
18001      IRTemp res  = newTemp(Ity_I32);
18002      assign(argL, getIRegT(rD));
18003      assign(argR, getIRegT(rM));
18004      assign(oldC, mk_armg_calculate_flag_c());
18005      assign(res, binop(Iop_Add32,
18006                        binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
18007                        mkexpr(oldC)));
18008      // rD can never be r15
18009      putIRegT(rD, mkexpr(res), condT);
18010      setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
18011                         cond_AND_notInIT_T );
18012      DIP("adcs r%u, r%u\n", rD, rM);
18013      goto decode_success;
18014   }
18015
18016   case 0x106: {
18017      /* ---------------- SBCS Rd, Rm ---------------- */
18018      /* Rd = Rd - Rm - (oldC ^ 1) */
18019      UInt   rM   = INSN0(5,3);
18020      UInt   rD   = INSN0(2,0);
18021      IRTemp argL = newTemp(Ity_I32);
18022      IRTemp argR = newTemp(Ity_I32);
18023      IRTemp oldC = newTemp(Ity_I32);
18024      IRTemp res  = newTemp(Ity_I32);
18025      assign(argL, getIRegT(rD));
18026      assign(argR, getIRegT(rM));
18027      assign(oldC, mk_armg_calculate_flag_c());
18028      assign(res, binop(Iop_Sub32,
18029                        binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
18030                        binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
18031      // rD can never be r15
18032      putIRegT(rD, mkexpr(res), condT);
18033      setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
18034                         cond_AND_notInIT_T );
18035      DIP("sbcs r%u, r%u\n", rD, rM);
18036      goto decode_success;
18037   }
18038
18039   case 0x2CB: {
18040      /* ---------------- UXTB Rd, Rm ---------------- */
18041      /* Rd = 8Uto32(Rm) */
18042      UInt rM = INSN0(5,3);
18043      UInt rD = INSN0(2,0);
18044      putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
18045                   condT);
18046      DIP("uxtb r%u, r%u\n", rD, rM);
18047      goto decode_success;
18048   }
18049
18050   case 0x2C9: {
18051      /* ---------------- SXTB Rd, Rm ---------------- */
18052      /* Rd = 8Sto32(Rm) */
18053      UInt rM = INSN0(5,3);
18054      UInt rD = INSN0(2,0);
18055      putIRegT(rD, binop(Iop_Sar32,
18056                         binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
18057                         mkU8(24)),
18058                   condT);
18059      DIP("sxtb r%u, r%u\n", rD, rM);
18060      goto decode_success;
18061   }
18062
18063   case 0x2CA: {
18064      /* ---------------- UXTH Rd, Rm ---------------- */
18065      /* Rd = 16Uto32(Rm) */
18066      UInt rM = INSN0(5,3);
18067      UInt rD = INSN0(2,0);
18068      putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
18069                   condT);
18070      DIP("uxth r%u, r%u\n", rD, rM);
18071      goto decode_success;
18072   }
18073
18074   case 0x2C8: {
18075      /* ---------------- SXTH Rd, Rm ---------------- */
18076      /* Rd = 16Sto32(Rm) */
18077      UInt rM = INSN0(5,3);
18078      UInt rD = INSN0(2,0);
18079      putIRegT(rD, binop(Iop_Sar32,
18080                         binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
18081                         mkU8(16)),
18082                   condT);
18083      DIP("sxth r%u, r%u\n", rD, rM);
18084      goto decode_success;
18085   }
18086
18087   case 0x102:   // LSLS
18088   case 0x103:   // LSRS
18089   case 0x104:   // ASRS
18090   case 0x107: { // RORS
18091      /* ---------------- LSLS Rs, Rd ---------------- */
18092      /* ---------------- LSRS Rs, Rd ---------------- */
18093      /* ---------------- ASRS Rs, Rd ---------------- */
18094      /* ---------------- RORS Rs, Rd ---------------- */
18095      /* Rd = Rd `op` Rs, and set flags */
18096      UInt   rS   = INSN0(5,3);
18097      UInt   rD   = INSN0(2,0);
18098      IRTemp oldV = newTemp(Ity_I32);
18099      IRTemp rDt  = newTemp(Ity_I32);
18100      IRTemp rSt  = newTemp(Ity_I32);
18101      IRTemp res  = newTemp(Ity_I32);
18102      IRTemp resC = newTemp(Ity_I32);
18103      const HChar* wot  = "???";
18104      assign(rSt, getIRegT(rS));
18105      assign(rDt, getIRegT(rD));
18106      assign(oldV, mk_armg_calculate_flag_v());
18107      /* Does not appear to be the standard 'how' encoding. */
18108      switch (INSN0(15,6)) {
18109         case 0x102:
18110            compute_result_and_C_after_LSL_by_reg(
18111               dis_buf, &res, &resC, rDt, rSt, rD, rS
18112            );
18113            wot = "lsl";
18114            break;
18115         case 0x103:
18116            compute_result_and_C_after_LSR_by_reg(
18117               dis_buf, &res, &resC, rDt, rSt, rD, rS
18118            );
18119            wot = "lsr";
18120            break;
18121         case 0x104:
18122            compute_result_and_C_after_ASR_by_reg(
18123               dis_buf, &res, &resC, rDt, rSt, rD, rS
18124            );
18125            wot = "asr";
18126            break;
18127         case 0x107:
18128            compute_result_and_C_after_ROR_by_reg(
18129               dis_buf, &res, &resC, rDt, rSt, rD, rS
18130            );
18131            wot = "ror";
18132            break;
18133         default:
18134            /*NOTREACHED*/vassert(0);
18135      }
18136      // not safe to read guest state after this point
18137      putIRegT(rD, mkexpr(res), condT);
18138      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
18139                         cond_AND_notInIT_T );
18140      DIP("%ss r%u, r%u\n", wot, rS, rD);
18141      goto decode_success;
18142   }
18143
18144   case 0x2E8:   // REV
18145   case 0x2E9: { // REV16
18146      /* ---------------- REV   Rd, Rm ---------------- */
18147      /* ---------------- REV16 Rd, Rm ---------------- */
18148      UInt rM = INSN0(5,3);
18149      UInt rD = INSN0(2,0);
18150      Bool isREV = INSN0(15,6) == 0x2E8;
18151      IRTemp arg = newTemp(Ity_I32);
18152      assign(arg, getIRegT(rM));
18153      IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
18154      putIRegT(rD, mkexpr(res), condT);
18155      DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
18156      goto decode_success;
18157   }
18158
18159   case 0x2EB: { // REVSH
18160      /* ---------------- REVSH Rd, Rn ---------------- */
18161      UInt rM = INSN0(5,3);
18162      UInt rD = INSN0(2,0);
18163      IRTemp irt_rM  = newTemp(Ity_I32);
18164      IRTemp irt_hi  = newTemp(Ity_I32);
18165      IRTemp irt_low = newTemp(Ity_I32);
18166      IRTemp irt_res = newTemp(Ity_I32);
18167      assign(irt_rM, getIRegT(rM));
18168      assign(irt_hi,
18169             binop(Iop_Sar32,
18170                   binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
18171                   mkU8(16)
18172             )
18173      );
18174      assign(irt_low,
18175             binop(Iop_And32,
18176                   binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
18177                   mkU32(0xFF)
18178             )
18179      );
18180      assign(irt_res,
18181             binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
18182      );
18183      putIRegT(rD, mkexpr(irt_res), condT);
18184      DIP("revsh r%u, r%u\n", rD, rM);
18185      goto decode_success;
18186   }
18187
18188   default:
18189      break; /* examine the next shortest prefix */
18190
18191   }
18192
18193
18194   /* ================ 16-bit 15:7 cases ================ */
18195
18196   switch (INSN0(15,7)) {
18197
18198   case BITS9(1,0,1,1,0,0,0,0,0): {
18199      /* ------------ ADD SP, #imm7 * 4 ------------ */
18200      UInt uimm7 = INSN0(6,0);
18201      putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
18202                   condT);
18203      DIP("add sp, #%u\n", uimm7 * 4);
18204      goto decode_success;
18205   }
18206
18207   case BITS9(1,0,1,1,0,0,0,0,1): {
18208      /* ------------ SUB SP, #imm7 * 4 ------------ */
18209      UInt uimm7 = INSN0(6,0);
18210      putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
18211                   condT);
18212      DIP("sub sp, #%u\n", uimm7 * 4);
18213      goto decode_success;
18214   }
18215
18216   case BITS9(0,1,0,0,0,1,1,1,0): {
18217      /* ---------------- BX rM ---------------- */
18218      /* Branch to reg, and optionally switch modes.  Reg contains a
18219         suitably encoded address therefore (w CPSR.T at the bottom).
18220         Have to special-case r15, as usual. */
18221      UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
18222      if (BITS3(0,0,0) == INSN0(2,0)) {
18223         IRTemp dst = newTemp(Ity_I32);
18224         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18225         mk_skip_over_T16_if_cond_is_false(condT);
18226         condT = IRTemp_INVALID;
18227         // now uncond
18228         if (rM <= 14) {
18229            assign( dst, getIRegT(rM) );
18230         } else {
18231            vassert(rM == 15);
18232            assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
18233         }
18234         llPutIReg(15, mkexpr(dst));
18235         dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
18236         dres.whatNext    = Dis_StopHere;
18237         DIP("bx r%u (possibly switch to ARM mode)\n", rM);
18238         goto decode_success;
18239      }
18240      break;
18241   }
18242
18243   /* ---------------- BLX rM ---------------- */
18244   /* Branch and link to interworking address in rM. */
18245   case BITS9(0,1,0,0,0,1,1,1,1): {
18246      if (BITS3(0,0,0) == INSN0(2,0)) {
18247         UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
18248         IRTemp dst = newTemp(Ity_I32);
18249         if (rM <= 14) {
18250            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18251            mk_skip_over_T16_if_cond_is_false(condT);
18252            condT = IRTemp_INVALID;
18253            // now uncond
18254            /* We're returning to Thumb code, hence "| 1" */
18255            assign( dst, getIRegT(rM) );
18256            putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
18257                          IRTemp_INVALID );
18258            llPutIReg(15, mkexpr(dst));
18259            dres.jk_StopHere = Ijk_Call;
18260            dres.whatNext    = Dis_StopHere;
18261            DIP("blx r%u (possibly switch to ARM mode)\n", rM);
18262            goto decode_success;
18263         }
18264         /* else unpredictable, fall through */
18265      }
18266      break;
18267   }
18268
18269   default:
18270      break; /* examine the next shortest prefix */
18271
18272   }
18273
18274
18275   /* ================ 16-bit 15:8 cases ================ */
18276
18277   switch (INSN0(15,8)) {
18278
18279   case BITS8(1,1,0,1,1,1,1,1): {
18280      /* ---------------- SVC ---------------- */
18281      UInt imm8 = INSN0(7,0);
18282      if (imm8 == 0) {
18283         /* A syscall.  We can't do this conditionally, hence: */
18284         mk_skip_over_T16_if_cond_is_false( condT );
18285         // FIXME: what if we have to back up and restart this insn?
18286         // then ITSTATE will be wrong (we'll have it as "used")
18287         // when it isn't.  Correct is to save ITSTATE in a
18288         // stash pseudo-reg, and back up from that if we have to
18289         // restart.
18290         // uncond after here
18291         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
18292         dres.jk_StopHere = Ijk_Sys_syscall;
18293         dres.whatNext    = Dis_StopHere;
18294         DIP("svc #0x%08x\n", imm8);
18295         goto decode_success;
18296      }
18297      /* else fall through */
18298      break;
18299   }
18300
18301   case BITS8(0,1,0,0,0,1,0,0): {
18302      /* ---------------- ADD(HI) Rd, Rm ---------------- */
18303      UInt h1 = INSN0(7,7);
18304      UInt h2 = INSN0(6,6);
18305      UInt rM = (h2 << 3) | INSN0(5,3);
18306      UInt rD = (h1 << 3) | INSN0(2,0);
18307      //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
18308      if (rD == 15 && rM == 15) {
18309         // then it's invalid
18310      } else {
18311         IRTemp res = newTemp(Ity_I32);
18312         assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
18313         if (rD != 15) {
18314            putIRegT( rD, mkexpr(res), condT );
18315         } else {
18316            /* Only allowed outside or last-in IT block; SIGILL if not so. */
18317            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18318            /* jump over insn if not selected */
18319            mk_skip_over_T16_if_cond_is_false(condT);
18320            condT = IRTemp_INVALID;
18321            // now uncond
18322            /* non-interworking branch */
18323            llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
18324            dres.jk_StopHere = Ijk_Boring;
18325            dres.whatNext    = Dis_StopHere;
18326         }
18327         DIP("add(hi) r%u, r%u\n", rD, rM);
18328         goto decode_success;
18329      }
18330      break;
18331   }
18332
18333   case BITS8(0,1,0,0,0,1,0,1): {
18334      /* ---------------- CMP(HI) Rd, Rm ---------------- */
18335      UInt h1 = INSN0(7,7);
18336      UInt h2 = INSN0(6,6);
18337      UInt rM = (h2 << 3) | INSN0(5,3);
18338      UInt rN = (h1 << 3) | INSN0(2,0);
18339      if (h1 != 0 || h2 != 0) {
18340         IRTemp argL  = newTemp(Ity_I32);
18341         IRTemp argR  = newTemp(Ity_I32);
18342         assign( argL, getIRegT(rN) );
18343         assign( argR, getIRegT(rM) );
18344         /* Update flags regardless of whether in an IT block or not. */
18345         setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
18346         DIP("cmphi r%u, r%u\n", rN, rM);
18347         goto decode_success;
18348      }
18349      break;
18350   }
18351
18352   case BITS8(0,1,0,0,0,1,1,0): {
18353      /* ---------------- MOV(HI) Rd, Rm ---------------- */
18354      UInt h1 = INSN0(7,7);
18355      UInt h2 = INSN0(6,6);
18356      UInt rM = (h2 << 3) | INSN0(5,3);
18357      UInt rD = (h1 << 3) | INSN0(2,0);
18358      /* The old ARM ARM seems to disallow the case where both Rd and
18359         Rm are "low" registers, but newer versions allow it. */
18360      if (1 /*h1 != 0 || h2 != 0*/) {
18361         IRTemp val = newTemp(Ity_I32);
18362         assign( val, getIRegT(rM) );
18363         if (rD != 15) {
18364            putIRegT( rD, mkexpr(val), condT );
18365         } else {
18366            /* Only allowed outside or last-in IT block; SIGILL if not so. */
18367            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18368            /* jump over insn if not selected */
18369            mk_skip_over_T16_if_cond_is_false(condT);
18370            condT = IRTemp_INVALID;
18371            // now uncond
18372            /* non-interworking branch */
18373            llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
18374            dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
18375            dres.whatNext    = Dis_StopHere;
18376         }
18377         DIP("mov r%u, r%u\n", rD, rM);
18378         goto decode_success;
18379      }
18380      break;
18381   }
18382
18383   case BITS8(1,0,1,1,1,1,1,1): {
18384      /* ---------------- IT (if-then) ---------------- */
18385      UInt firstcond = INSN0(7,4);
18386      UInt mask = INSN0(3,0);
18387      UInt newITSTATE = 0;
18388      /* This is the ITSTATE represented as described in
18389         libvex_guest_arm.h.  It is not the ARM ARM representation. */
18390      HChar c1 = '.';
18391      HChar c2 = '.';
18392      HChar c3 = '.';
18393      Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
18394                                    firstcond, mask );
18395      if (valid && firstcond != 0xF/*NV*/) {
18396         /* Not allowed in an IT block; SIGILL if so. */
18397         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
18398
18399         IRTemp t = newTemp(Ity_I32);
18400         assign(t, mkU32(newITSTATE));
18401         put_ITSTATE(t);
18402
18403         DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
18404         goto decode_success;
18405      }
18406      break;
18407   }
18408
18409   case BITS8(1,0,1,1,0,0,0,1):
18410   case BITS8(1,0,1,1,0,0,1,1):
18411   case BITS8(1,0,1,1,1,0,0,1):
18412   case BITS8(1,0,1,1,1,0,1,1): {
18413      /* ---------------- CB{N}Z ---------------- */
18414      UInt rN    = INSN0(2,0);
18415      UInt bOP   = INSN0(11,11);
18416      UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
18417      gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
18418      /* It's a conditional branch forward. */
18419      IRTemp kond = newTemp(Ity_I1);
18420      assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
18421                          getIRegT(rN), mkU32(0)) );
18422
18423      vassert(0 == (guest_R15_curr_instr_notENC & 1));
18424      /* Looks like the nearest insn we can branch to is the one after
18425         next.  That makes sense, as there's no point in being able to
18426         encode a conditional branch to the next instruction. */
18427      UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
18428      stmt(IRStmt_Exit( mkexpr(kond),
18429                        Ijk_Boring,
18430                        IRConst_U32(toUInt(dst)),
18431                        OFFB_R15T ));
18432      DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
18433      goto decode_success;
18434   }
18435
18436   default:
18437      break; /* examine the next shortest prefix */
18438
18439   }
18440
18441
18442   /* ================ 16-bit 15:9 cases ================ */
18443
18444   switch (INSN0(15,9)) {
18445
18446   case BITS7(1,0,1,1,0,1,0): {
18447      /* ---------------- PUSH ---------------- */
18448      /* This is a bit like STMxx, but way simpler. Complications we
18449         don't have to deal with:
18450         * SP being one of the transferred registers
18451         * direction (increment vs decrement)
18452         * before-vs-after-ness
18453      */
18454      Int  i, nRegs;
18455      UInt bitR    = INSN0(8,8);
18456      UInt regList = INSN0(7,0);
18457      if (bitR) regList |= (1 << 14);
18458
18459      /* At least one register must be transferred, else result is
18460         UNPREDICTABLE. */
18461      if (regList != 0) {
18462         /* Since we can't generate a guaranteed non-trapping IR
18463            sequence, (1) jump over the insn if it is gated false, and
18464            (2) back out the ITSTATE update. */
18465         mk_skip_over_T16_if_cond_is_false(condT);
18466         condT = IRTemp_INVALID;
18467         put_ITSTATE(old_itstate);
18468         // now uncond
18469
18470         nRegs = 0;
18471         for (i = 0; i < 16; i++) {
18472            if ((regList & (1 << i)) != 0)
18473               nRegs++;
18474         }
18475         vassert(nRegs >= 1 && nRegs <= 9);
18476
18477         /* Move SP down first of all, so we're "covered".  And don't
18478            mess with its alignment. */
18479         IRTemp newSP = newTemp(Ity_I32);
18480         assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
18481         putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
18482
18483         /* Generate a transfer base address as a forced-aligned
18484            version of the final SP value. */
18485         IRTemp base = newTemp(Ity_I32);
18486         assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
18487
18488         /* Now the transfers */
18489         nRegs = 0;
18490         for (i = 0; i < 16; i++) {
18491            if ((regList & (1 << i)) != 0) {
18492               storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
18493                        getIRegT(i) );
18494               nRegs++;
18495            }
18496         }
18497
18498         /* Reinstate the ITSTATE update. */
18499         put_ITSTATE(new_itstate);
18500
18501         DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
18502         goto decode_success;
18503      }
18504      break;
18505   }
18506
18507   case BITS7(1,0,1,1,1,1,0): {
18508      /* ---------------- POP ---------------- */
18509      Int  i, nRegs;
18510      UInt bitR    = INSN0(8,8);
18511      UInt regList = INSN0(7,0);
18512
18513      /* At least one register must be transferred, else result is
18514         UNPREDICTABLE. */
18515      if (regList != 0 || bitR) {
18516         /* Since we can't generate a guaranteed non-trapping IR
18517            sequence, (1) jump over the insn if it is gated false, and
18518            (2) back out the ITSTATE update. */
18519         mk_skip_over_T16_if_cond_is_false(condT);
18520         condT = IRTemp_INVALID;
18521         put_ITSTATE(old_itstate);
18522         // now uncond
18523
18524         nRegs = 0;
18525         for (i = 0; i < 8; i++) {
18526            if ((regList & (1 << i)) != 0)
18527               nRegs++;
18528         }
18529         vassert(nRegs >= 0 && nRegs <= 8);
18530         vassert(bitR == 0 || bitR == 1);
18531
18532         IRTemp oldSP = newTemp(Ity_I32);
18533         assign(oldSP, getIRegT(13));
18534
18535         /* Generate a transfer base address as a forced-aligned
18536            version of the original SP value. */
18537         IRTemp base = newTemp(Ity_I32);
18538         assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
18539
18540         /* Compute a new value for SP, but don't install it yet, so
18541            that we're "covered" until all the transfers are done.
18542            And don't mess with its alignment. */
18543         IRTemp newSP = newTemp(Ity_I32);
18544         assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
18545                                        mkU32(4 * (nRegs + bitR))));
18546
18547         /* Now the transfers, not including PC */
18548         nRegs = 0;
18549         for (i = 0; i < 8; i++) {
18550            if ((regList & (1 << i)) != 0) {
18551               putIRegT(i, loadLE( Ity_I32,
18552                                   binop(Iop_Add32, mkexpr(base),
18553                                                    mkU32(4 * nRegs))),
18554                           IRTemp_INVALID );
18555               nRegs++;
18556            }
18557         }
18558
18559         IRTemp newPC = IRTemp_INVALID;
18560         if (bitR) {
18561            newPC = newTemp(Ity_I32);
18562            assign( newPC, loadLE( Ity_I32,
18563                                   binop(Iop_Add32, mkexpr(base),
18564                                                    mkU32(4 * nRegs))));
18565         }
18566
18567         /* Now we can safely install the new SP value */
18568         putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
18569
18570         /* Reinstate the ITSTATE update. */
18571         put_ITSTATE(new_itstate);
18572
18573         /* now, do we also have to do a branch?  If so, it turns out
18574            that the new PC value is encoded exactly as we need it to
18575            be -- with CPSR.T in the bottom bit.  So we can simply use
18576            it as is, no need to mess with it.  Note, therefore, this
18577            is an interworking return. */
18578         if (bitR) {
18579            llPutIReg(15, mkexpr(newPC));
18580            dres.jk_StopHere = Ijk_Ret;
18581            dres.whatNext    = Dis_StopHere;
18582         }
18583
18584         DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
18585         goto decode_success;
18586      }
18587      break;
18588   }
18589
18590   case BITS7(0,0,0,1,1,1,0):   /* ADDS */
18591   case BITS7(0,0,0,1,1,1,1): { /* SUBS */
18592      /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
18593      /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
18594      UInt   uimm3 = INSN0(8,6);
18595      UInt   rN    = INSN0(5,3);
18596      UInt   rD    = INSN0(2,0);
18597      UInt   isSub = INSN0(9,9);
18598      IRTemp argL  = newTemp(Ity_I32);
18599      IRTemp argR  = newTemp(Ity_I32);
18600      assign( argL, getIRegT(rN) );
18601      assign( argR, mkU32(uimm3) );
18602      putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
18603                         mkexpr(argL), mkexpr(argR)),
18604                   condT);
18605      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
18606                      argL, argR, cond_AND_notInIT_T );
18607      DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
18608      goto decode_success;
18609   }
18610
18611   case BITS7(0,0,0,1,1,0,0):   /* ADDS */
18612   case BITS7(0,0,0,1,1,0,1): { /* SUBS */
18613      /* ---------------- ADDS Rd, Rn, Rm ---------------- */
18614      /* ---------------- SUBS Rd, Rn, Rm ---------------- */
18615      UInt   rM    = INSN0(8,6);
18616      UInt   rN    = INSN0(5,3);
18617      UInt   rD    = INSN0(2,0);
18618      UInt   isSub = INSN0(9,9);
18619      IRTemp argL  = newTemp(Ity_I32);
18620      IRTemp argR  = newTemp(Ity_I32);
18621      assign( argL, getIRegT(rN) );
18622      assign( argR, getIRegT(rM) );
18623      putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
18624                          mkexpr(argL), mkexpr(argR)),
18625                    condT );
18626      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
18627                      argL, argR, cond_AND_notInIT_T );
18628      DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
18629      goto decode_success;
18630   }
18631
18632   case BITS7(0,1,0,1,0,0,0):   /* STR */
18633   case BITS7(0,1,0,1,1,0,0): { /* LDR */
18634      /* ------------- LDR Rd, [Rn, Rm] ------------- */
18635      /* ------------- STR Rd, [Rn, Rm] ------------- */
18636      /* LDR/STR Rd, [Rn + Rm] */
18637      UInt    rD   = INSN0(2,0);
18638      UInt    rN   = INSN0(5,3);
18639      UInt    rM   = INSN0(8,6);
18640      UInt    isLD = INSN0(11,11);
18641
18642      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18643      put_ITSTATE(old_itstate); // backout
18644      if (isLD) {
18645         IRTemp tD = newTemp(Ity_I32);
18646         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
18647         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18648      } else {
18649         storeGuardedLE(ea, getIRegT(rD), condT);
18650      }
18651      put_ITSTATE(new_itstate); // restore
18652
18653      DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
18654      goto decode_success;
18655   }
18656
18657   case BITS7(0,1,0,1,0,0,1):
18658   case BITS7(0,1,0,1,1,0,1): {
18659      /* ------------- LDRH Rd, [Rn, Rm] ------------- */
18660      /* ------------- STRH Rd, [Rn, Rm] ------------- */
18661      /* LDRH/STRH Rd, [Rn + Rm] */
18662      UInt    rD   = INSN0(2,0);
18663      UInt    rN   = INSN0(5,3);
18664      UInt    rM   = INSN0(8,6);
18665      UInt    isLD = INSN0(11,11);
18666
18667      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18668      put_ITSTATE(old_itstate); // backout
18669      if (isLD) {
18670         IRTemp tD = newTemp(Ity_I32);
18671         loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
18672         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18673      } else {
18674         storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
18675      }
18676      put_ITSTATE(new_itstate); // restore
18677
18678      DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
18679      goto decode_success;
18680   }
18681
18682   case BITS7(0,1,0,1,1,1,1): {
18683      /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
18684      /* LDRSH Rd, [Rn + Rm] */
18685      UInt    rD = INSN0(2,0);
18686      UInt    rN = INSN0(5,3);
18687      UInt    rM = INSN0(8,6);
18688
18689      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18690      put_ITSTATE(old_itstate); // backout
18691      IRTemp tD = newTemp(Ity_I32);
18692      loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
18693      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18694      put_ITSTATE(new_itstate); // restore
18695
18696      DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
18697      goto decode_success;
18698   }
18699
18700   case BITS7(0,1,0,1,0,1,1): {
18701      /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
18702      /* LDRSB Rd, [Rn + Rm] */
18703      UInt    rD = INSN0(2,0);
18704      UInt    rN = INSN0(5,3);
18705      UInt    rM = INSN0(8,6);
18706
18707      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18708      put_ITSTATE(old_itstate); // backout
18709      IRTemp tD = newTemp(Ity_I32);
18710      loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
18711      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18712      put_ITSTATE(new_itstate); // restore
18713
18714      DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
18715      goto decode_success;
18716   }
18717
18718   case BITS7(0,1,0,1,0,1,0):
18719   case BITS7(0,1,0,1,1,1,0): {
18720      /* ------------- LDRB Rd, [Rn, Rm] ------------- */
18721      /* ------------- STRB Rd, [Rn, Rm] ------------- */
18722      /* LDRB/STRB Rd, [Rn + Rm] */
18723      UInt    rD   = INSN0(2,0);
18724      UInt    rN   = INSN0(5,3);
18725      UInt    rM   = INSN0(8,6);
18726      UInt    isLD = INSN0(11,11);
18727
18728      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18729      put_ITSTATE(old_itstate); // backout
18730      if (isLD) {
18731         IRTemp tD = newTemp(Ity_I32);
18732         loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
18733         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18734      } else {
18735         storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
18736      }
18737      put_ITSTATE(new_itstate); // restore
18738
18739      DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
18740      goto decode_success;
18741   }
18742
18743   default:
18744      break; /* examine the next shortest prefix */
18745
18746   }
18747
18748
18749   /* ================ 16-bit 15:11 cases ================ */
18750
18751   switch (INSN0(15,11)) {
18752
18753   case BITS5(0,0,1,1,0):
18754   case BITS5(0,0,1,1,1): {
18755      /* ---------------- ADDS Rn, #uimm8 ---------------- */
18756      /* ---------------- SUBS Rn, #uimm8 ---------------- */
18757      UInt   isSub = INSN0(11,11);
18758      UInt   rN    = INSN0(10,8);
18759      UInt   uimm8 = INSN0(7,0);
18760      IRTemp argL  = newTemp(Ity_I32);
18761      IRTemp argR  = newTemp(Ity_I32);
18762      assign( argL, getIRegT(rN) );
18763      assign( argR, mkU32(uimm8) );
18764      putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
18765                          mkexpr(argL), mkexpr(argR)), condT );
18766      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
18767                      argL, argR, cond_AND_notInIT_T );
18768      DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
18769      goto decode_success;
18770   }
18771
18772   case BITS5(1,0,1,0,0): {
18773      /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
18774      /* a.k.a. ADR */
18775      /* rD = align4(PC) + imm8 * 4 */
18776      UInt rD   = INSN0(10,8);
18777      UInt imm8 = INSN0(7,0);
18778      putIRegT(rD, binop(Iop_Add32,
18779                         binop(Iop_And32, getIRegT(15), mkU32(~3U)),
18780                         mkU32(imm8 * 4)),
18781                   condT);
18782      DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
18783      goto decode_success;
18784   }
18785
18786   case BITS5(1,0,1,0,1): {
18787      /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
18788      UInt rD   = INSN0(10,8);
18789      UInt imm8 = INSN0(7,0);
18790      putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
18791                   condT);
18792      DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
18793      goto decode_success;
18794   }
18795
18796   case BITS5(0,0,1,0,1): {
18797      /* ---------------- CMP Rn, #uimm8 ---------------- */
18798      UInt   rN    = INSN0(10,8);
18799      UInt   uimm8 = INSN0(7,0);
18800      IRTemp argL  = newTemp(Ity_I32);
18801      IRTemp argR  = newTemp(Ity_I32);
18802      assign( argL, getIRegT(rN) );
18803      assign( argR, mkU32(uimm8) );
18804      /* Update flags regardless of whether in an IT block or not. */
18805      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
18806      DIP("cmp r%u, #%u\n", rN, uimm8);
18807      goto decode_success;
18808   }
18809
18810   case BITS5(0,0,1,0,0): {
18811      /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
18812      UInt   rD    = INSN0(10,8);
18813      UInt   uimm8 = INSN0(7,0);
18814      IRTemp oldV  = newTemp(Ity_I32);
18815      IRTemp oldC  = newTemp(Ity_I32);
18816      IRTemp res   = newTemp(Ity_I32);
18817      assign( oldV, mk_armg_calculate_flag_v() );
18818      assign( oldC, mk_armg_calculate_flag_c() );
18819      assign( res, mkU32(uimm8) );
18820      putIRegT(rD, mkexpr(res), condT);
18821      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
18822                         cond_AND_notInIT_T );
18823      DIP("movs r%u, #%u\n", rD, uimm8);
18824      goto decode_success;
18825   }
18826
18827   case BITS5(0,1,0,0,1): {
18828      /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
18829      /* LDR Rd, [align4(PC) + imm8 * 4] */
18830      UInt   rD   = INSN0(10,8);
18831      UInt   imm8 = INSN0(7,0);
18832      IRTemp ea   = newTemp(Ity_I32);
18833
18834      assign(ea, binop(Iop_Add32,
18835                       binop(Iop_And32, getIRegT(15), mkU32(~3U)),
18836                       mkU32(imm8 * 4)));
18837      put_ITSTATE(old_itstate); // backout
18838      IRTemp tD = newTemp(Ity_I32);
18839      loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
18840      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18841      put_ITSTATE(new_itstate); // restore
18842
18843      DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
18844      goto decode_success;
18845   }
18846
18847   case BITS5(0,1,1,0,0):   /* STR */
18848   case BITS5(0,1,1,0,1): { /* LDR */
18849      /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
18850      /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
18851      /* LDR/STR Rd, [Rn + imm5 * 4] */
18852      UInt    rD   = INSN0(2,0);
18853      UInt    rN   = INSN0(5,3);
18854      UInt    imm5 = INSN0(10,6);
18855      UInt    isLD = INSN0(11,11);
18856
18857      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
18858      put_ITSTATE(old_itstate); // backout
18859      if (isLD) {
18860         IRTemp tD = newTemp(Ity_I32);
18861         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
18862         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18863      } else {
18864         storeGuardedLE( ea, getIRegT(rD), condT );
18865      }
18866      put_ITSTATE(new_itstate); // restore
18867
18868      DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
18869      goto decode_success;
18870   }
18871
18872   case BITS5(1,0,0,0,0):   /* STRH */
18873   case BITS5(1,0,0,0,1): { /* LDRH */
18874      /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
18875      /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
18876      /* LDRH/STRH Rd, [Rn + imm5 * 2] */
18877      UInt    rD   = INSN0(2,0);
18878      UInt    rN   = INSN0(5,3);
18879      UInt    imm5 = INSN0(10,6);
18880      UInt    isLD = INSN0(11,11);
18881
18882      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
18883      put_ITSTATE(old_itstate); // backout
18884      if (isLD) {
18885         IRTemp tD = newTemp(Ity_I32);
18886         loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
18887         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18888      } else {
18889         storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
18890      }
18891      put_ITSTATE(new_itstate); // restore
18892
18893      DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
18894      goto decode_success;
18895   }
18896
18897   case BITS5(0,1,1,1,0):   /* STRB */
18898   case BITS5(0,1,1,1,1): { /* LDRB */
18899      /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
18900      /* ------------- STRB Rd, [Rn, #imm5] ------------- */
18901      /* LDRB/STRB Rd, [Rn + imm5] */
18902      UInt    rD   = INSN0(2,0);
18903      UInt    rN   = INSN0(5,3);
18904      UInt    imm5 = INSN0(10,6);
18905      UInt    isLD = INSN0(11,11);
18906
18907      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
18908      put_ITSTATE(old_itstate); // backout
18909      if (isLD) {
18910         IRTemp tD = newTemp(Ity_I32);
18911         loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
18912         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18913      } else {
18914         storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
18915      }
18916      put_ITSTATE(new_itstate); // restore
18917
18918      DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
18919      goto decode_success;
18920   }
18921
18922   case BITS5(1,0,0,1,0):   /* STR */
18923   case BITS5(1,0,0,1,1): { /* LDR */
18924      /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
18925      /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
18926      /* LDR/STR Rd, [SP + imm8 * 4] */
18927      UInt rD    = INSN0(10,8);
18928      UInt imm8  = INSN0(7,0);
18929      UInt isLD  = INSN0(11,11);
18930
18931      IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
18932      put_ITSTATE(old_itstate); // backout
18933      if (isLD) {
18934         IRTemp tD = newTemp(Ity_I32);
18935         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
18936         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18937      } else {
18938         storeGuardedLE(ea, getIRegT(rD), condT);
18939      }
18940      put_ITSTATE(new_itstate); // restore
18941
18942      DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
18943      goto decode_success;
18944   }
18945
18946   case BITS5(1,1,0,0,1): {
18947      /* ------------- LDMIA Rn!, {reglist} ------------- */
18948      Int i, nRegs = 0;
18949      UInt rN   = INSN0(10,8);
18950      UInt list = INSN0(7,0);
18951      /* Empty lists aren't allowed. */
18952      if (list != 0) {
18953         mk_skip_over_T16_if_cond_is_false(condT);
18954         condT = IRTemp_INVALID;
18955         put_ITSTATE(old_itstate);
18956         // now uncond
18957
18958         IRTemp oldRn = newTemp(Ity_I32);
18959         IRTemp base  = newTemp(Ity_I32);
18960         assign(oldRn, getIRegT(rN));
18961         assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
18962         for (i = 0; i < 8; i++) {
18963            if (0 == (list & (1 << i)))
18964               continue;
18965            nRegs++;
18966            putIRegT(
18967               i, loadLE(Ity_I32,
18968                         binop(Iop_Add32, mkexpr(base),
18969                                          mkU32(nRegs * 4 - 4))),
18970               IRTemp_INVALID
18971            );
18972         }
18973         /* Only do the writeback for rN if it isn't in the list of
18974            registers to be transferred. */
18975         if (0 == (list & (1 << rN))) {
18976            putIRegT(rN,
18977                     binop(Iop_Add32, mkexpr(oldRn),
18978                                      mkU32(nRegs * 4)),
18979                     IRTemp_INVALID
18980            );
18981         }
18982
18983         /* Reinstate the ITSTATE update. */
18984         put_ITSTATE(new_itstate);
18985
18986         DIP("ldmia r%u!, {0x%04x}\n", rN, list);
18987         goto decode_success;
18988      }
18989      break;
18990   }
18991
18992   case BITS5(1,1,0,0,0): {
18993      /* ------------- STMIA Rn!, {reglist} ------------- */
18994      Int i, nRegs = 0;
18995      UInt rN   = INSN0(10,8);
18996      UInt list = INSN0(7,0);
18997      /* Empty lists aren't allowed.  Also, if rN is in the list then
18998         it must be the lowest numbered register in the list. */
18999      Bool valid = list != 0;
19000      if (valid && 0 != (list & (1 << rN))) {
19001         for (i = 0; i < rN; i++) {
19002            if (0 != (list & (1 << i)))
19003               valid = False;
19004         }
19005      }
19006      if (valid) {
19007         mk_skip_over_T16_if_cond_is_false(condT);
19008         condT = IRTemp_INVALID;
19009         put_ITSTATE(old_itstate);
19010         // now uncond
19011
19012         IRTemp oldRn = newTemp(Ity_I32);
19013         IRTemp base = newTemp(Ity_I32);
19014         assign(oldRn, getIRegT(rN));
19015         assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
19016         for (i = 0; i < 8; i++) {
19017            if (0 == (list & (1 << i)))
19018               continue;
19019            nRegs++;
19020            storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
19021                     getIRegT(i) );
19022         }
19023         /* Always do the writeback. */
19024         putIRegT(rN,
19025                  binop(Iop_Add32, mkexpr(oldRn),
19026                                   mkU32(nRegs * 4)),
19027                  IRTemp_INVALID);
19028
19029         /* Reinstate the ITSTATE update. */
19030         put_ITSTATE(new_itstate);
19031
19032         DIP("stmia r%u!, {0x%04x}\n", rN, list);
19033         goto decode_success;
19034      }
19035      break;
19036   }
19037
19038   case BITS5(0,0,0,0,0):   /* LSLS */
19039   case BITS5(0,0,0,0,1):   /* LSRS */
19040   case BITS5(0,0,0,1,0): { /* ASRS */
19041      /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
19042      /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
19043      /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
19044      UInt   rD   = INSN0(2,0);
19045      UInt   rM   = INSN0(5,3);
19046      UInt   imm5 = INSN0(10,6);
19047      IRTemp res  = newTemp(Ity_I32);
19048      IRTemp resC = newTemp(Ity_I32);
19049      IRTemp rMt  = newTemp(Ity_I32);
19050      IRTemp oldV = newTemp(Ity_I32);
19051      const HChar* wot  = "???";
19052      assign(rMt, getIRegT(rM));
19053      assign(oldV, mk_armg_calculate_flag_v());
19054      /* Looks like INSN0(12,11) are the standard 'how' encoding.
19055         Could compactify if the ROR case later appears. */
19056      switch (INSN0(15,11)) {
19057         case BITS5(0,0,0,0,0):
19058            compute_result_and_C_after_LSL_by_imm5(
19059               dis_buf, &res, &resC, rMt, imm5, rM
19060            );
19061            wot = "lsl";
19062            break;
19063         case BITS5(0,0,0,0,1):
19064            compute_result_and_C_after_LSR_by_imm5(
19065               dis_buf, &res, &resC, rMt, imm5, rM
19066            );
19067            wot = "lsr";
19068            break;
19069         case BITS5(0,0,0,1,0):
19070            compute_result_and_C_after_ASR_by_imm5(
19071               dis_buf, &res, &resC, rMt, imm5, rM
19072            );
19073            wot = "asr";
19074            break;
19075         default:
19076            /*NOTREACHED*/vassert(0);
19077      }
19078      // not safe to read guest state after this point
19079      putIRegT(rD, mkexpr(res), condT);
19080      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
19081                         cond_AND_notInIT_T );
19082      /* ignore buf and roll our own output */
19083      DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
19084      goto decode_success;
19085   }
19086
19087   case BITS5(1,1,1,0,0): {
19088      /* ---------------- B #simm11 ---------------- */
19089      Int  simm11 = INSN0(10,0);
19090           simm11 = (simm11 << 21) >> 20;
19091      UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
19092      /* Only allowed outside or last-in IT block; SIGILL if not so. */
19093      gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19094      // and skip this insn if not selected; being cleverer is too
19095      // difficult
19096      mk_skip_over_T16_if_cond_is_false(condT);
19097      condT = IRTemp_INVALID;
19098      // now uncond
19099      llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
19100      dres.jk_StopHere = Ijk_Boring;
19101      dres.whatNext    = Dis_StopHere;
19102      DIP("b 0x%x\n", dst);
19103      goto decode_success;
19104   }
19105
19106   default:
19107      break; /* examine the next shortest prefix */
19108
19109   }
19110
19111
19112   /* ================ 16-bit 15:12 cases ================ */
19113
19114   switch (INSN0(15,12)) {
19115
19116   case BITS4(1,1,0,1): {
19117      /* ---------------- Bcond #simm8 ---------------- */
19118      UInt cond  = INSN0(11,8);
19119      Int  simm8 = INSN0(7,0);
19120           simm8 = (simm8 << 24) >> 23;
19121      UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
19122      if (cond != ARMCondAL && cond != ARMCondNV) {
19123         /* Not allowed in an IT block; SIGILL if so. */
19124         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19125
19126         IRTemp kondT = newTemp(Ity_I32);
19127         assign( kondT, mk_armg_calculate_condition(cond) );
19128         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
19129                            Ijk_Boring,
19130                            IRConst_U32(dst | 1/*CPSR.T*/),
19131                            OFFB_R15T ));
19132         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
19133                              | 1 /*CPSR.T*/ ));
19134         dres.jk_StopHere = Ijk_Boring;
19135         dres.whatNext    = Dis_StopHere;
19136         DIP("b%s 0x%x\n", nCC(cond), dst);
19137         goto decode_success;
19138      }
19139      break;
19140   }
19141
19142   default:
19143      break; /* hmm, nothing matched */
19144
19145   }
19146
19147   /* ================ 16-bit misc cases ================ */
19148
19149   switch (INSN0(15,0)) {
19150      case 0xBF00:
19151         /* ------ NOP ------ */
19152         DIP("nop\n");
19153         goto decode_success;
19154      case 0xBF20:
19155         /* ------ WFE ------ */
19156         /* WFE gets used as a spin-loop hint.  Do the usual thing,
19157            which is to continue after yielding. */
19158         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
19159                            Ijk_Yield,
19160                            IRConst_U32((guest_R15_curr_instr_notENC + 2)
19161                                        | 1 /*CPSR.T*/),
19162                            OFFB_R15T ));
19163         DIP("wfe\n");
19164         goto decode_success;
19165      case 0xBF40:
19166         /* ------ SEV ------ */
19167         /* Treat this as a no-op.  Any matching WFEs won't really
19168            cause the host CPU to snooze; they just cause V to try to
19169            run some other thread for a while.  So there's no point in
19170            really doing anything for SEV. */
19171         DIP("sev\n");
19172         goto decode_success;
19173      default:
19174         break; /* fall through */
19175   }
19176
19177   /* ----------------------------------------------------------- */
19178   /* --                                                       -- */
19179   /* -- Thumb 32-bit integer instructions                     -- */
19180   /* --                                                       -- */
19181   /* ----------------------------------------------------------- */
19182
19183#  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
19184
19185   /* second 16 bits of the instruction, if any */
19186   vassert(insn1 == 0);
19187   insn1 = getUShortLittleEndianly( guest_instr+2 );
19188
19189   anOp   = Iop_INVALID; /* paranoia */
19190   anOpNm = NULL;        /* paranoia */
19191
19192   /* Change result defaults to suit 32-bit insns. */
19193   vassert(dres.whatNext   == Dis_Continue);
19194   vassert(dres.len        == 2);
19195   vassert(dres.continueAt == 0);
19196   dres.len = 4;
19197
19198   /* ---------------- BL/BLX simm26 ---------------- */
19199   if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
19200      UInt isBL = INSN1(12,12);
19201      UInt bS   = INSN0(10,10);
19202      UInt bJ1  = INSN1(13,13);
19203      UInt bJ2  = INSN1(11,11);
19204      UInt bI1  = 1 ^ (bJ1 ^ bS);
19205      UInt bI2  = 1 ^ (bJ2 ^ bS);
19206      Int simm25
19207         =   (bS          << (1 + 1 + 10 + 11 + 1))
19208           | (bI1         << (1 + 10 + 11 + 1))
19209           | (bI2         << (10 + 11 + 1))
19210           | (INSN0(9,0)  << (11 + 1))
19211           | (INSN1(10,0) << 1);
19212      simm25 = (simm25 << 7) >> 7;
19213
19214      vassert(0 == (guest_R15_curr_instr_notENC & 1));
19215      UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
19216
19217      /* One further validity case to check: in the case of BLX
19218         (not-BL), that insn1[0] must be zero. */
19219      Bool valid = True;
19220      if (isBL == 0 && INSN1(0,0) == 1) valid = False;
19221      if (valid) {
19222         /* Only allowed outside or last-in IT block; SIGILL if not so. */
19223         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19224         // and skip this insn if not selected; being cleverer is too
19225         // difficult
19226         mk_skip_over_T32_if_cond_is_false(condT);
19227         condT = IRTemp_INVALID;
19228         // now uncond
19229
19230         /* We're returning to Thumb code, hence "| 1" */
19231         putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
19232                   IRTemp_INVALID);
19233         if (isBL) {
19234            /* BL: unconditional T -> T call */
19235            /* we're calling Thumb code, hence "| 1" */
19236            llPutIReg(15, mkU32( dst | 1 ));
19237            DIP("bl 0x%x (stay in Thumb mode)\n", dst);
19238         } else {
19239            /* BLX: unconditional T -> A call */
19240            /* we're calling ARM code, hence "& 3" to align to a
19241               valid ARM insn address */
19242            llPutIReg(15, mkU32( dst & ~3 ));
19243            DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
19244         }
19245         dres.whatNext    = Dis_StopHere;
19246         dres.jk_StopHere = Ijk_Call;
19247         goto decode_success;
19248      }
19249   }
19250
19251   /* ---------------- {LD,ST}M{IA,DB} ---------------- */
19252   if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
19253       || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
19254      UInt bW      = INSN0(5,5); /* writeback Rn ? */
19255      UInt bL      = INSN0(4,4);
19256      UInt rN      = INSN0(3,0);
19257      UInt bP      = INSN1(15,15); /* reglist entry for r15 */
19258      UInt bM      = INSN1(14,14); /* reglist entry for r14 */
19259      UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
19260      UInt rL13    = INSN1(13,13); /* must be zero */
19261      UInt regList = 0;
19262      Bool valid   = True;
19263
19264      UInt bINC    = 1;
19265      UInt bBEFORE = 0;
19266      if (INSN0(15,6) == 0x3a4) {
19267         bINC    = 0;
19268         bBEFORE = 1;
19269      }
19270
19271      /* detect statically invalid cases, and construct the final
19272         reglist */
19273      if (rL13 == 1)
19274         valid = False;
19275
19276      if (bL == 1) {
19277         regList = (bP << 15) | (bM << 14) | rLmost;
19278         if (rN == 15)                       valid = False;
19279         if (popcount32(regList) < 2)        valid = False;
19280         if (bP == 1 && bM == 1)             valid = False;
19281         if (bW == 1 && (regList & (1<<rN))) valid = False;
19282      } else {
19283         regList = (bM << 14) | rLmost;
19284         if (bP == 1)                        valid = False;
19285         if (rN == 15)                       valid = False;
19286         if (popcount32(regList) < 2)        valid = False;
19287         if (bW == 1 && (regList & (1<<rN))) valid = False;
19288      }
19289
19290      if (valid) {
19291         if (bL == 1 && bP == 1) {
19292            // We'll be writing the PC.  Hence:
19293            /* Only allowed outside or last-in IT block; SIGILL if not so. */
19294            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19295         }
19296
19297         /* Go uncond: */
19298         mk_skip_over_T32_if_cond_is_false(condT);
19299         condT = IRTemp_INVALID;
19300         // now uncond
19301
19302         /* Generate the IR.  This might generate a write to R15. */
19303         mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
19304
19305         if (bL == 1 && (regList & (1<<15))) {
19306            // If we wrote to R15, we have an interworking return to
19307            // deal with.
19308            llPutIReg(15, llGetIReg(15));
19309            dres.jk_StopHere = Ijk_Ret;
19310            dres.whatNext    = Dis_StopHere;
19311         }
19312
19313         DIP("%sm%c%c r%u%s, {0x%04x}\n",
19314              bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
19315              rN, bW ? "!" : "", regList);
19316
19317         goto decode_success;
19318      }
19319   }
19320
19321   /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
19322   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19323       && INSN0(9,5) == BITS5(0,1,0,0,0)
19324       && INSN1(15,15) == 0) {
19325      UInt bS = INSN0(4,4);
19326      UInt rN = INSN0(3,0);
19327      UInt rD = INSN1(11,8);
19328      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19329      /* but allow "add.w reg, sp, #constT" for reg != PC */
19330      if (!valid && rD <= 14 && rN == 13)
19331         valid = True;
19332      if (valid) {
19333         IRTemp argL  = newTemp(Ity_I32);
19334         IRTemp argR  = newTemp(Ity_I32);
19335         IRTemp res   = newTemp(Ity_I32);
19336         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19337         assign(argL, getIRegT(rN));
19338         assign(argR, mkU32(imm32));
19339         assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
19340         putIRegT(rD, mkexpr(res), condT);
19341         if (bS == 1)
19342            setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
19343         DIP("add%s.w r%u, r%u, #%u\n",
19344             bS == 1 ? "s" : "", rD, rN, imm32);
19345         goto decode_success;
19346      }
19347   }
19348
19349   /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
19350   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19351       && INSN0(9,4) == BITS6(1,0,0,0,0,0)
19352       && INSN1(15,15) == 0) {
19353      UInt rN = INSN0(3,0);
19354      UInt rD = INSN1(11,8);
19355      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19356      /* but allow "addw reg, sp, #uimm12" for reg != PC */
19357      if (!valid && rD <= 14 && rN == 13)
19358         valid = True;
19359      if (valid) {
19360         IRTemp argL = newTemp(Ity_I32);
19361         IRTemp argR = newTemp(Ity_I32);
19362         IRTemp res  = newTemp(Ity_I32);
19363         UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
19364         assign(argL, getIRegT(rN));
19365         assign(argR, mkU32(imm12));
19366         assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
19367         putIRegT(rD, mkexpr(res), condT);
19368         DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
19369         goto decode_success;
19370      }
19371   }
19372
19373   /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
19374   /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
19375   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19376       && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
19377           || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
19378       && INSN1(15,15) == 0
19379       && INSN1(11,8) == BITS4(1,1,1,1)) {
19380      UInt rN = INSN0(3,0);
19381      if (rN != 15) {
19382         IRTemp argL  = newTemp(Ity_I32);
19383         IRTemp argR  = newTemp(Ity_I32);
19384         Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
19385         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19386         assign(argL, getIRegT(rN));
19387         assign(argR, mkU32(imm32));
19388         setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
19389                         argL, argR, condT );
19390         DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
19391         goto decode_success;
19392      }
19393   }
19394
19395   /* -------------- (T1) TST.W Rn, #constT -------------- */
19396   /* -------------- (T1) TEQ.W Rn, #constT -------------- */
19397   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19398       && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
19399           || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
19400       && INSN1(15,15) == 0
19401       && INSN1(11,8) == BITS4(1,1,1,1)) {
19402      UInt rN = INSN0(3,0);
19403      if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
19404         Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
19405         IRTemp argL  = newTemp(Ity_I32);
19406         IRTemp argR  = newTemp(Ity_I32);
19407         IRTemp res   = newTemp(Ity_I32);
19408         IRTemp oldV  = newTemp(Ity_I32);
19409         IRTemp oldC  = newTemp(Ity_I32);
19410         Bool   updC  = False;
19411         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
19412         assign(argL, getIRegT(rN));
19413         assign(argR, mkU32(imm32));
19414         assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
19415                            mkexpr(argL), mkexpr(argR)));
19416         assign( oldV, mk_armg_calculate_flag_v() );
19417         assign( oldC, updC
19418                       ? mkU32((imm32 >> 31) & 1)
19419                       : mk_armg_calculate_flag_c() );
19420         setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
19421         DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
19422         goto decode_success;
19423      }
19424   }
19425
19426   /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
19427   /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
19428   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19429       && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
19430           || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
19431       && INSN1(15,15) == 0) {
19432      Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
19433      UInt bS    = INSN0(4,4);
19434      UInt rN    = INSN0(3,0);
19435      UInt rD    = INSN1(11,8);
19436      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19437      /* but allow "sub{s}.w reg, sp, #constT
19438         this is (T2) of "SUB (SP minus immediate)" */
19439      if (!valid && !isRSB && rN == 13 && rD != 15)
19440         valid = True;
19441      if (valid) {
19442         IRTemp argL  = newTemp(Ity_I32);
19443         IRTemp argR  = newTemp(Ity_I32);
19444         IRTemp res   = newTemp(Ity_I32);
19445         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19446         assign(argL, getIRegT(rN));
19447         assign(argR, mkU32(imm32));
19448         assign(res,  isRSB
19449                      ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
19450                      : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
19451         putIRegT(rD, mkexpr(res), condT);
19452         if (bS == 1) {
19453            if (isRSB)
19454               setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
19455            else
19456               setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
19457         }
19458         DIP("%s%s.w r%u, r%u, #%u\n",
19459             isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
19460         goto decode_success;
19461      }
19462   }
19463
19464   /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
19465   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19466       && INSN0(9,4) == BITS6(1,0,1,0,1,0)
19467       && INSN1(15,15) == 0) {
19468      UInt rN = INSN0(3,0);
19469      UInt rD = INSN1(11,8);
19470      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19471      /* but allow "subw sp, sp, #uimm12" */
19472      if (!valid && rD == 13 && rN == 13)
19473         valid = True;
19474      if (valid) {
19475         IRTemp argL  = newTemp(Ity_I32);
19476         IRTemp argR  = newTemp(Ity_I32);
19477         IRTemp res   = newTemp(Ity_I32);
19478         UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
19479         assign(argL, getIRegT(rN));
19480         assign(argR, mkU32(imm12));
19481         assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
19482         putIRegT(rD, mkexpr(res), condT);
19483         DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
19484         goto decode_success;
19485      }
19486   }
19487
19488   /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
19489   /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
19490   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19491       && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
19492           || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
19493       && INSN1(15,15) == 0) {
19494      /* ADC:  Rd = Rn + constT + oldC */
19495      /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
19496      UInt bS    = INSN0(4,4);
19497      UInt rN    = INSN0(3,0);
19498      UInt rD    = INSN1(11,8);
19499      if (!isBadRegT(rN) && !isBadRegT(rD)) {
19500         IRTemp argL  = newTemp(Ity_I32);
19501         IRTemp argR  = newTemp(Ity_I32);
19502         IRTemp res   = newTemp(Ity_I32);
19503         IRTemp oldC  = newTemp(Ity_I32);
19504         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19505         assign(argL, getIRegT(rN));
19506         assign(argR, mkU32(imm32));
19507         assign(oldC, mk_armg_calculate_flag_c() );
19508         const HChar* nm  = "???";
19509         switch (INSN0(9,5)) {
19510            case BITS5(0,1,0,1,0): // ADC
19511               nm = "adc";
19512               assign(res,
19513                      binop(Iop_Add32,
19514                            binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
19515                            mkexpr(oldC) ));
19516               putIRegT(rD, mkexpr(res), condT);
19517               if (bS)
19518                  setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
19519                                     argL, argR, oldC, condT );
19520               break;
19521            case BITS5(0,1,0,1,1): // SBC
19522               nm = "sbc";
19523               assign(res,
19524                      binop(Iop_Sub32,
19525                            binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
19526                            binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
19527               putIRegT(rD, mkexpr(res), condT);
19528               if (bS)
19529                  setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
19530                                     argL, argR, oldC, condT );
19531               break;
19532            default:
19533              vassert(0);
19534         }
19535         DIP("%s%s.w r%u, r%u, #%u\n",
19536             nm, bS == 1 ? "s" : "", rD, rN, imm32);
19537         goto decode_success;
19538      }
19539   }
19540
19541   /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
19542   /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
19543   /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
19544   /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
19545   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19546       && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
19547           || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
19548           || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
19549           || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
19550           || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
19551       && INSN1(15,15) == 0) {
19552      UInt bS = INSN0(4,4);
19553      UInt rN = INSN0(3,0);
19554      UInt rD = INSN1(11,8);
19555      if (!isBadRegT(rN) && !isBadRegT(rD)) {
19556         Bool   notArgR = False;
19557         IROp   op      = Iop_INVALID;
19558         const HChar* nm = "???";
19559         switch (INSN0(9,5)) {
19560            case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
19561            case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
19562            case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
19563                                   notArgR = True; break;
19564            case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
19565            case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
19566                                   notArgR = True; break;
19567            default: vassert(0);
19568         }
19569         IRTemp argL  = newTemp(Ity_I32);
19570         IRTemp argR  = newTemp(Ity_I32);
19571         IRTemp res   = newTemp(Ity_I32);
19572         Bool   updC  = False;
19573         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
19574         assign(argL, getIRegT(rN));
19575         assign(argR, mkU32(notArgR ? ~imm32 : imm32));
19576         assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
19577         putIRegT(rD, mkexpr(res), condT);
19578         if (bS) {
19579            IRTemp oldV = newTemp(Ity_I32);
19580            IRTemp oldC = newTemp(Ity_I32);
19581            assign( oldV, mk_armg_calculate_flag_v() );
19582            assign( oldC, updC
19583                          ? mkU32((imm32 >> 31) & 1)
19584                          : mk_armg_calculate_flag_c() );
19585            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19586                               condT );
19587         }
19588         DIP("%s%s.w r%u, r%u, #%u\n",
19589             nm, bS == 1 ? "s" : "", rD, rN, imm32);
19590         goto decode_success;
19591      }
19592   }
19593
19594   /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
19595   /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
19596   /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
19597   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19598       && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
19599           || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
19600           || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
19601       && INSN1(15,15) == 0) {
19602      UInt rN   = INSN0(3,0);
19603      UInt rD   = INSN1(11,8);
19604      UInt rM   = INSN1(3,0);
19605      UInt bS   = INSN0(4,4);
19606      UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19607      UInt how  = INSN1(5,4);
19608
19609      Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
19610      /* but allow "add.w reg, sp, reg, lsl #N for N=0,1,2 or 3
19611         (T3) "ADD (SP plus register) */
19612      if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
19613          && rD != 15 && rN == 13 && imm5 <= 3 && how == 0) {
19614         valid = True;
19615      }
19616      /* also allow "sub.w reg, sp, reg   w/ no shift
19617         (T1) "SUB (SP minus register) */
19618      if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
19619          && rD != 15 && rN == 13 && imm5 == 0 && how == 0) {
19620         valid = True;
19621      }
19622      if (valid) {
19623         Bool   swap = False;
19624         IROp   op   = Iop_INVALID;
19625         const HChar* nm = "???";
19626         switch (INSN0(8,5)) {
19627            case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
19628            case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
19629            case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
19630                                 swap = True; break;
19631            default: vassert(0);
19632         }
19633
19634         IRTemp argL = newTemp(Ity_I32);
19635         assign(argL, getIRegT(rN));
19636
19637         IRTemp rMt = newTemp(Ity_I32);
19638         assign(rMt, getIRegT(rM));
19639
19640         IRTemp argR = newTemp(Ity_I32);
19641         compute_result_and_C_after_shift_by_imm5(
19642            dis_buf, &argR, NULL, rMt, how, imm5, rM
19643         );
19644
19645         IRTemp res = newTemp(Ity_I32);
19646         assign(res, swap
19647                     ? binop(op, mkexpr(argR), mkexpr(argL))
19648                     : binop(op, mkexpr(argL), mkexpr(argR)));
19649
19650         putIRegT(rD, mkexpr(res), condT);
19651         if (bS) {
19652            switch (op) {
19653               case Iop_Add32:
19654                  setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
19655                  break;
19656               case Iop_Sub32:
19657                  if (swap)
19658                     setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
19659                  else
19660                     setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
19661                  break;
19662               default:
19663                  vassert(0);
19664            }
19665         }
19666
19667         DIP("%s%s.w r%u, r%u, %s\n",
19668             nm, bS ? "s" : "", rD, rN, dis_buf);
19669         goto decode_success;
19670      }
19671   }
19672
19673   /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
19674   /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
19675   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19676       && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
19677           || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
19678       && INSN1(15,15) == 0) {
19679      /* ADC:  Rd = Rn + shifter_operand + oldC */
19680      /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
19681      UInt rN = INSN0(3,0);
19682      UInt rD = INSN1(11,8);
19683      UInt rM = INSN1(3,0);
19684      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
19685         UInt bS   = INSN0(4,4);
19686         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19687         UInt how  = INSN1(5,4);
19688
19689         IRTemp argL = newTemp(Ity_I32);
19690         assign(argL, getIRegT(rN));
19691
19692         IRTemp rMt = newTemp(Ity_I32);
19693         assign(rMt, getIRegT(rM));
19694
19695         IRTemp oldC = newTemp(Ity_I32);
19696         assign(oldC, mk_armg_calculate_flag_c());
19697
19698         IRTemp argR = newTemp(Ity_I32);
19699         compute_result_and_C_after_shift_by_imm5(
19700            dis_buf, &argR, NULL, rMt, how, imm5, rM
19701         );
19702
19703         const HChar* nm  = "???";
19704         IRTemp res = newTemp(Ity_I32);
19705         switch (INSN0(8,5)) {
19706            case BITS4(1,0,1,0): // ADC
19707               nm = "adc";
19708               assign(res,
19709                      binop(Iop_Add32,
19710                            binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
19711                            mkexpr(oldC) ));
19712               putIRegT(rD, mkexpr(res), condT);
19713               if (bS)
19714                  setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
19715                                     argL, argR, oldC, condT );
19716               break;
19717            case BITS4(1,0,1,1): // SBC
19718               nm = "sbc";
19719               assign(res,
19720                      binop(Iop_Sub32,
19721                            binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
19722                            binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
19723               putIRegT(rD, mkexpr(res), condT);
19724               if (bS)
19725                  setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
19726                                     argL, argR, oldC, condT );
19727               break;
19728            default:
19729               vassert(0);
19730         }
19731
19732         DIP("%s%s.w r%u, r%u, %s\n",
19733             nm, bS ? "s" : "", rD, rN, dis_buf);
19734         goto decode_success;
19735      }
19736   }
19737
19738   /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
19739   /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
19740   /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
19741   /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
19742   /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
19743   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19744       && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
19745           || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
19746           || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
19747           || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
19748           || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
19749       && INSN1(15,15) == 0) {
19750      UInt rN = INSN0(3,0);
19751      UInt rD = INSN1(11,8);
19752      UInt rM = INSN1(3,0);
19753      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
19754         Bool notArgR = False;
19755         IROp op      = Iop_INVALID;
19756         const HChar* nm  = "???";
19757         switch (INSN0(8,5)) {
19758            case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
19759            case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
19760            case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
19761            case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
19762                                 notArgR = True; break;
19763            case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
19764                                 notArgR = True; break;
19765            default: vassert(0);
19766         }
19767         UInt bS   = INSN0(4,4);
19768         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19769         UInt how  = INSN1(5,4);
19770
19771         IRTemp rNt = newTemp(Ity_I32);
19772         assign(rNt, getIRegT(rN));
19773
19774         IRTemp rMt = newTemp(Ity_I32);
19775         assign(rMt, getIRegT(rM));
19776
19777         IRTemp argR = newTemp(Ity_I32);
19778         IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19779
19780         compute_result_and_C_after_shift_by_imm5(
19781            dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
19782         );
19783
19784         IRTemp res = newTemp(Ity_I32);
19785         if (notArgR) {
19786            vassert(op == Iop_And32 || op == Iop_Or32);
19787            assign(res, binop(op, mkexpr(rNt),
19788                                  unop(Iop_Not32, mkexpr(argR))));
19789         } else {
19790            assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
19791         }
19792
19793         putIRegT(rD, mkexpr(res), condT);
19794         if (bS) {
19795            IRTemp oldV = newTemp(Ity_I32);
19796            assign( oldV, mk_armg_calculate_flag_v() );
19797            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19798                               condT );
19799         }
19800
19801         DIP("%s%s.w r%u, r%u, %s\n",
19802             nm, bS ? "s" : "", rD, rN, dis_buf);
19803         goto decode_success;
19804      }
19805   }
19806
19807   /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
19808   /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
19809   /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
19810   /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
19811   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
19812       && INSN1(15,12) == BITS4(1,1,1,1)
19813       && INSN1(7,4) == BITS4(0,0,0,0)) {
19814      UInt how = INSN0(6,5); // standard encoding
19815      UInt rN  = INSN0(3,0);
19816      UInt rD  = INSN1(11,8);
19817      UInt rM  = INSN1(3,0);
19818      UInt bS  = INSN0(4,4);
19819      Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
19820      if (valid) {
19821         IRTemp rNt    = newTemp(Ity_I32);
19822         IRTemp rMt    = newTemp(Ity_I32);
19823         IRTemp res    = newTemp(Ity_I32);
19824         IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19825         IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19826         const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
19827         const HChar* nm     = nms[how];
19828         assign(rNt, getIRegT(rN));
19829         assign(rMt, getIRegT(rM));
19830         compute_result_and_C_after_shift_by_reg(
19831            dis_buf, &res, bS ? &oldC : NULL,
19832            rNt, how, rMt, rN, rM
19833         );
19834         if (bS)
19835            assign(oldV, mk_armg_calculate_flag_v());
19836         putIRegT(rD, mkexpr(res), condT);
19837         if (bS) {
19838            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19839                               condT );
19840         }
19841         DIP("%s%s.w r%u, r%u, r%u\n",
19842             nm, bS ? "s" : "", rD, rN, rM);
19843         goto decode_success;
19844      }
19845   }
19846
19847   /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
19848   /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
19849   if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
19850       && INSN1(15,15) == 0) {
19851      UInt rD = INSN1(11,8);
19852      UInt rN = INSN1(3,0);
19853      if (!isBadRegT(rD) && !isBadRegT(rN)) {
19854         UInt bS    = INSN0(4,4);
19855         UInt isMVN = INSN0(5,5);
19856         UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
19857         UInt how   = INSN1(5,4);
19858
19859         IRTemp rNt = newTemp(Ity_I32);
19860         assign(rNt, getIRegT(rN));
19861
19862         IRTemp oldRn = newTemp(Ity_I32);
19863         IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19864         compute_result_and_C_after_shift_by_imm5(
19865            dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
19866         );
19867
19868         IRTemp res = newTemp(Ity_I32);
19869         assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
19870                           : mkexpr(oldRn));
19871
19872         putIRegT(rD, mkexpr(res), condT);
19873         if (bS) {
19874            IRTemp oldV = newTemp(Ity_I32);
19875            assign( oldV, mk_armg_calculate_flag_v() );
19876            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
19877         }
19878         DIP("%s%s.w r%u, %s\n",
19879             isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
19880         goto decode_success;
19881      }
19882   }
19883
19884   /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
19885   /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
19886   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19887       && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
19888           || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
19889       && INSN1(15,15) == 0
19890       && INSN1(11,8) == BITS4(1,1,1,1)) {
19891      UInt rN = INSN0(3,0);
19892      UInt rM = INSN1(3,0);
19893      if (!isBadRegT(rN) && !isBadRegT(rM)) {
19894         Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
19895
19896         UInt how  = INSN1(5,4);
19897         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19898
19899         IRTemp argL = newTemp(Ity_I32);
19900         assign(argL, getIRegT(rN));
19901
19902         IRTemp rMt = newTemp(Ity_I32);
19903         assign(rMt, getIRegT(rM));
19904
19905         IRTemp argR = newTemp(Ity_I32);
19906         IRTemp oldC = newTemp(Ity_I32);
19907         compute_result_and_C_after_shift_by_imm5(
19908            dis_buf, &argR, &oldC, rMt, how, imm5, rM
19909         );
19910
19911         IRTemp oldV = newTemp(Ity_I32);
19912         assign( oldV, mk_armg_calculate_flag_v() );
19913
19914         IRTemp res = newTemp(Ity_I32);
19915         assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
19916                           mkexpr(argL), mkexpr(argR)));
19917
19918         setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19919                            condT );
19920         DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
19921         goto decode_success;
19922      }
19923   }
19924
19925   /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
19926   /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
19927   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19928       && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
19929           || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
19930       && INSN1(15,15) == 0
19931       && INSN1(11,8) == BITS4(1,1,1,1)) {
19932      UInt rN = INSN0(3,0);
19933      UInt rM = INSN1(3,0);
19934      if (!isBadRegT(rN) && !isBadRegT(rM)) {
19935         Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
19936         UInt how   = INSN1(5,4);
19937         UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
19938
19939         IRTemp argL = newTemp(Ity_I32);
19940         assign(argL, getIRegT(rN));
19941
19942         IRTemp rMt = newTemp(Ity_I32);
19943         assign(rMt, getIRegT(rM));
19944
19945         IRTemp argR = newTemp(Ity_I32);
19946         compute_result_and_C_after_shift_by_imm5(
19947            dis_buf, &argR, NULL, rMt, how, imm5, rM
19948         );
19949
19950         setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
19951                         argL, argR, condT );
19952
19953         DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
19954         goto decode_success;
19955      }
19956   }
19957
19958   /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
19959   /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
19960   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19961       && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
19962           || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
19963       && INSN0(3,0) == BITS4(1,1,1,1)
19964       && INSN1(15,15) == 0) {
19965      UInt rD = INSN1(11,8);
19966      if (!isBadRegT(rD)) {
19967         Bool   updC  = False;
19968         UInt   bS    = INSN0(4,4);
19969         Bool   isMVN = INSN0(5,5) == 1;
19970         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
19971         IRTemp res   = newTemp(Ity_I32);
19972         assign(res, mkU32(isMVN ? ~imm32 : imm32));
19973         putIRegT(rD, mkexpr(res), condT);
19974         if (bS) {
19975            IRTemp oldV = newTemp(Ity_I32);
19976            IRTemp oldC = newTemp(Ity_I32);
19977            assign( oldV, mk_armg_calculate_flag_v() );
19978            assign( oldC, updC
19979                          ? mkU32((imm32 >> 31) & 1)
19980                          : mk_armg_calculate_flag_c() );
19981            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19982                               condT );
19983         }
19984         DIP("%s%s.w r%u, #%u\n",
19985             isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
19986         goto decode_success;
19987      }
19988   }
19989
19990   /* -------------- (T3) MOVW Rd, #imm16 -------------- */
19991   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19992       && INSN0(9,4) == BITS6(1,0,0,1,0,0)
19993       && INSN1(15,15) == 0) {
19994      UInt rD = INSN1(11,8);
19995      if (!isBadRegT(rD)) {
19996         UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
19997                      | (INSN1(14,12) << 8) | INSN1(7,0);
19998         putIRegT(rD, mkU32(imm16), condT);
19999         DIP("movw r%u, #%u\n", rD, imm16);
20000         goto decode_success;
20001      }
20002   }
20003
20004   /* ---------------- MOVT Rd, #imm16 ---------------- */
20005   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20006       && INSN0(9,4) == BITS6(1,0,1,1,0,0)
20007       && INSN1(15,15) == 0) {
20008      UInt rD = INSN1(11,8);
20009      if (!isBadRegT(rD)) {
20010         UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
20011                      | (INSN1(14,12) << 8) | INSN1(7,0);
20012         IRTemp res = newTemp(Ity_I32);
20013         assign(res,
20014                binop(Iop_Or32,
20015                      binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
20016                      mkU32(imm16 << 16)));
20017         putIRegT(rD, mkexpr(res), condT);
20018         DIP("movt r%u, #%u\n", rD, imm16);
20019         goto decode_success;
20020      }
20021   }
20022
20023   /* ---------------- LD/ST reg+/-#imm8 ---------------- */
20024   /* Loads and stores of the form:
20025         op  Rt, [Rn, #-imm8]      or
20026         op  Rt, [Rn], #+/-imm8    or
20027         op  Rt, [Rn, #+/-imm8]!
20028      where op is one of
20029         ldrb ldrh ldr  ldrsb ldrsh
20030         strb strh str
20031   */
20032   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
20033      Bool   valid  = True;
20034      Bool   syned  = False;
20035      Bool   isST   = False;
20036      IRType ty     = Ity_I8;
20037      const HChar* nm = "???";
20038
20039      switch (INSN0(8,4)) {
20040         case BITS5(0,0,0,0,0):   // strb
20041            nm = "strb"; isST = True; break;
20042         case BITS5(0,0,0,0,1):   // ldrb
20043            nm = "ldrb"; break;
20044         case BITS5(1,0,0,0,1):   // ldrsb
20045            nm = "ldrsb"; syned = True; break;
20046         case BITS5(0,0,0,1,0):   // strh
20047            nm = "strh"; ty = Ity_I16; isST = True; break;
20048         case BITS5(0,0,0,1,1):   // ldrh
20049            nm = "ldrh"; ty = Ity_I16; break;
20050         case BITS5(1,0,0,1,1):   // ldrsh
20051            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
20052         case BITS5(0,0,1,0,0):   // str
20053            nm = "str"; ty = Ity_I32; isST = True; break;
20054         case BITS5(0,0,1,0,1):
20055            nm = "ldr"; ty = Ity_I32; break;  // ldr
20056         default:
20057            valid = False; break;
20058      }
20059
20060      UInt rN      = INSN0(3,0);
20061      UInt rT      = INSN1(15,12);
20062      UInt bP      = INSN1(10,10);
20063      UInt bU      = INSN1(9,9);
20064      UInt bW      = INSN1(8,8);
20065      UInt imm8    = INSN1(7,0);
20066      Bool loadsPC = False;
20067
20068      if (valid) {
20069         if (bP == 1 && bU == 1 && bW == 0)
20070            valid = False;
20071         if (bP == 0 && bW == 0)
20072            valid = False;
20073         if (rN == 15)
20074            valid = False;
20075         if (bW == 1 && rN == rT)
20076            valid = False;
20077         if (ty == Ity_I8 || ty == Ity_I16) {
20078            if (isBadRegT(rT))
20079               valid = False;
20080         } else {
20081            /* ty == Ity_I32 */
20082            if (isST && rT == 15)
20083               valid = False;
20084            if (!isST && rT == 15)
20085               loadsPC = True;
20086         }
20087      }
20088
20089      if (valid) {
20090         // if it's a branch, it can't happen in the middle of an IT block
20091         // Also, if it is a branch, make it unconditional at this point.
20092         // Doing conditional branches in-line is too complex (for now)
20093         if (loadsPC) {
20094            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20095            // go uncond
20096            mk_skip_over_T32_if_cond_is_false(condT);
20097            condT = IRTemp_INVALID;
20098            // now uncond
20099         }
20100
20101         IRTemp preAddr = newTemp(Ity_I32);
20102         assign(preAddr, getIRegT(rN));
20103
20104         IRTemp postAddr = newTemp(Ity_I32);
20105         assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
20106                                mkexpr(preAddr), mkU32(imm8)));
20107
20108         IRTemp transAddr = bP == 1 ? postAddr : preAddr;
20109
20110         if (isST) {
20111
20112            /* Store.  If necessary, update the base register before
20113               the store itself, so that the common idiom of "str rX,
20114               [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
20115               a.k.a "push rX") doesn't cause Memcheck to complain
20116               that the access is below the stack pointer.  Also, not
20117               updating sp before the store confuses Valgrind's
20118               dynamic stack-extending logic.  So do it before the
20119               store.  Hence we need to snarf the store data before
20120               doing the basereg update. */
20121
20122            /* get hold of the data to be stored */
20123            IRTemp oldRt = newTemp(Ity_I32);
20124            assign(oldRt, getIRegT(rT));
20125
20126            /* Update Rn if necessary. */
20127            if (bW == 1) {
20128               vassert(rN != rT); // assured by validity check above
20129               putIRegT(rN, mkexpr(postAddr), condT);
20130            }
20131
20132            /* generate the transfer */
20133            IRExpr* data = NULL;
20134            switch (ty) {
20135               case Ity_I8:
20136                  data = unop(Iop_32to8, mkexpr(oldRt));
20137                  break;
20138               case Ity_I16:
20139                  data = unop(Iop_32to16, mkexpr(oldRt));
20140                  break;
20141               case Ity_I32:
20142                  data = mkexpr(oldRt);
20143                  break;
20144               default:
20145                  vassert(0);
20146            }
20147            storeGuardedLE(mkexpr(transAddr), data, condT);
20148
20149         } else {
20150
20151            /* Load. */
20152            IRTemp llOldRt = newTemp(Ity_I32);
20153            assign(llOldRt, llGetIReg(rT));
20154
20155            /* generate the transfer */
20156            IRTemp    newRt = newTemp(Ity_I32);
20157            IRLoadGOp widen = ILGop_INVALID;
20158            switch (ty) {
20159               case Ity_I8:
20160                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
20161               case Ity_I16:
20162                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
20163               case Ity_I32:
20164                  widen = ILGop_Ident32; break;
20165               default:
20166                  vassert(0);
20167            }
20168            loadGuardedLE(newRt, widen,
20169                          mkexpr(transAddr), mkexpr(llOldRt), condT);
20170            if (rT == 15) {
20171               vassert(loadsPC);
20172               /* We'll do the write to the PC just below */
20173            } else {
20174               vassert(!loadsPC);
20175               /* IRTemp_INVALID is OK here because in the case where
20176                  condT is false at run time, we're just putting the
20177                  old rT value back. */
20178               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
20179            }
20180
20181            /* Update Rn if necessary. */
20182            if (bW == 1) {
20183               vassert(rN != rT); // assured by validity check above
20184               putIRegT(rN, mkexpr(postAddr), condT);
20185            }
20186
20187            if (loadsPC) {
20188               /* Presumably this is an interworking branch. */
20189               vassert(rN != 15); // assured by validity check above
20190               vassert(rT == 15);
20191               vassert(condT == IRTemp_INVALID); /* due to check above */
20192               llPutIReg(15, mkexpr(newRt));
20193               dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
20194               dres.whatNext    = Dis_StopHere;
20195            }
20196         }
20197
20198         if (bP == 1 && bW == 0) {
20199            DIP("%s.w r%u, [r%u, #%c%u]\n",
20200                nm, rT, rN, bU ? '+' : '-', imm8);
20201         }
20202         else if (bP == 1 && bW == 1) {
20203            DIP("%s.w r%u, [r%u, #%c%u]!\n",
20204                nm, rT, rN, bU ? '+' : '-', imm8);
20205         }
20206         else {
20207            vassert(bP == 0 && bW == 1);
20208            DIP("%s.w r%u, [r%u], #%c%u\n",
20209                nm, rT, rN, bU ? '+' : '-', imm8);
20210         }
20211
20212         goto decode_success;
20213      }
20214   }
20215
20216   /* ------------- LD/ST reg+(reg<<imm2) ------------- */
20217   /* Loads and stores of the form:
20218         op  Rt, [Rn, Rm, LSL #imm8]
20219      where op is one of
20220         ldrb ldrh ldr  ldrsb ldrsh
20221         strb strh str
20222   */
20223   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
20224       && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
20225      Bool   valid  = True;
20226      Bool   syned  = False;
20227      Bool   isST   = False;
20228      IRType ty     = Ity_I8;
20229      const HChar* nm = "???";
20230
20231      switch (INSN0(8,4)) {
20232         case BITS5(0,0,0,0,0):   // strb
20233            nm = "strb"; isST = True; break;
20234         case BITS5(0,0,0,0,1):   // ldrb
20235            nm = "ldrb"; break;
20236         case BITS5(1,0,0,0,1):   // ldrsb
20237            nm = "ldrsb"; syned = True; break;
20238         case BITS5(0,0,0,1,0):   // strh
20239            nm = "strh"; ty = Ity_I16; isST = True; break;
20240         case BITS5(0,0,0,1,1):   // ldrh
20241            nm = "ldrh"; ty = Ity_I16; break;
20242         case BITS5(1,0,0,1,1):   // ldrsh
20243            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
20244         case BITS5(0,0,1,0,0):   // str
20245            nm = "str"; ty = Ity_I32; isST = True; break;
20246         case BITS5(0,0,1,0,1):
20247            nm = "ldr"; ty = Ity_I32; break;  // ldr
20248         default:
20249            valid = False; break;
20250      }
20251
20252      UInt rN      = INSN0(3,0);
20253      UInt rM      = INSN1(3,0);
20254      UInt rT      = INSN1(15,12);
20255      UInt imm2    = INSN1(5,4);
20256      Bool loadsPC = False;
20257
20258      if (ty == Ity_I8 || ty == Ity_I16) {
20259         /* all 8- and 16-bit load and store cases have the
20260            same exclusion set. */
20261         if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
20262            valid = False;
20263      } else {
20264         vassert(ty == Ity_I32);
20265         if (rN == 15 || isBadRegT(rM))
20266            valid = False;
20267         if (isST && rT == 15)
20268            valid = False;
20269         /* If it is a load and rT is 15, that's only allowable if we
20270            not in an IT block, or are the last in it.  Need to insert
20271            a dynamic check for that. */
20272         if (!isST && rT == 15)
20273            loadsPC = True;
20274      }
20275
20276      if (valid) {
20277         // if it's a branch, it can't happen in the middle of an IT block
20278         // Also, if it is a branch, make it unconditional at this point.
20279         // Doing conditional branches in-line is too complex (for now)
20280         if (loadsPC) {
20281            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20282            // go uncond
20283            mk_skip_over_T32_if_cond_is_false(condT);
20284            condT = IRTemp_INVALID;
20285            // now uncond
20286         }
20287
20288         IRTemp transAddr = newTemp(Ity_I32);
20289         assign(transAddr,
20290                binop( Iop_Add32,
20291                       getIRegT(rN),
20292                       binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
20293
20294         if (isST) {
20295
20296            /* get hold of the data to be stored */
20297            IRTemp oldRt = newTemp(Ity_I32);
20298            assign(oldRt, getIRegT(rT));
20299
20300            /* generate the transfer */
20301            IRExpr* data = NULL;
20302            switch (ty) {
20303               case Ity_I8:
20304                  data = unop(Iop_32to8, mkexpr(oldRt));
20305                  break;
20306               case Ity_I16:
20307                  data = unop(Iop_32to16, mkexpr(oldRt));
20308                  break;
20309              case Ity_I32:
20310                  data = mkexpr(oldRt);
20311                  break;
20312              default:
20313                 vassert(0);
20314            }
20315            storeGuardedLE(mkexpr(transAddr), data, condT);
20316
20317         } else {
20318
20319            /* Load. */
20320            IRTemp llOldRt = newTemp(Ity_I32);
20321            assign(llOldRt, llGetIReg(rT));
20322
20323            /* generate the transfer */
20324            IRTemp    newRt = newTemp(Ity_I32);
20325            IRLoadGOp widen = ILGop_INVALID;
20326            switch (ty) {
20327               case Ity_I8:
20328                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
20329               case Ity_I16:
20330                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
20331               case Ity_I32:
20332                  widen = ILGop_Ident32; break;
20333               default:
20334                  vassert(0);
20335            }
20336            loadGuardedLE(newRt, widen,
20337                          mkexpr(transAddr), mkexpr(llOldRt), condT);
20338
20339            if (rT == 15) {
20340               vassert(loadsPC);
20341               /* We'll do the write to the PC just below */
20342            } else {
20343               vassert(!loadsPC);
20344               /* IRTemp_INVALID is OK here because in the case where
20345                  condT is false at run time, we're just putting the
20346                  old rT value back. */
20347               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
20348            }
20349
20350            if (loadsPC) {
20351               /* Presumably this is an interworking branch. */
20352               vassert(rN != 15); // assured by validity check above
20353               vassert(rT == 15);
20354               vassert(condT == IRTemp_INVALID); /* due to check above */
20355               llPutIReg(15, mkexpr(newRt));
20356               dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
20357               dres.whatNext    = Dis_StopHere;
20358            }
20359         }
20360
20361         DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
20362             nm, rT, rN, rM, imm2);
20363
20364         goto decode_success;
20365      }
20366   }
20367
20368   /* --------------- LD/ST reg+imm12 --------------- */
20369   /* Loads and stores of the form:
20370         op  Rt, [Rn, #+-imm12]
20371      where op is one of
20372         ldrb ldrh ldr  ldrsb ldrsh
20373         strb strh str
20374   */
20375   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
20376      Bool   valid  = True;
20377      Bool   syned  = INSN0(8,8) == 1;
20378      Bool   isST   = False;
20379      IRType ty     = Ity_I8;
20380      UInt   bU     = INSN0(7,7); // 1: +imm   0: -imm
20381                                  // -imm is only supported by literal versions
20382      const HChar* nm = "???";
20383
20384      switch (INSN0(6,4)) {
20385         case BITS3(0,0,0):   // strb
20386            nm = "strb"; isST = True; break;
20387         case BITS3(0,0,1):   // ldrb
20388            nm = syned ? "ldrsb" : "ldrb"; break;
20389         case BITS3(0,1,0):   // strh
20390            nm = "strh"; ty = Ity_I16; isST = True; break;
20391         case BITS3(0,1,1):   // ldrh
20392            nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break;
20393         case BITS3(1,0,0):   // str
20394            nm = "str"; ty = Ity_I32; isST = True; break;
20395         case BITS3(1,0,1):
20396            nm = "ldr"; ty = Ity_I32; break;  // ldr
20397         default:
20398            valid = False; break;
20399      }
20400
20401      UInt rN      = INSN0(3,0);
20402      UInt rT      = INSN1(15,12);
20403      UInt imm12   = INSN1(11,0);
20404      Bool loadsPC = False;
20405
20406      if (rN != 15 && bU == 0) {
20407         // only pc supports #-imm12
20408         valid = False;
20409      }
20410
20411      if (isST) {
20412         if (syned) valid = False;
20413         if (rN == 15 || rT == 15)
20414            valid = False;
20415      } else {
20416         /* For a 32-bit load, rT == 15 is only allowable if we are not
20417            in an IT block, or are the last in it.  Need to insert
20418            a dynamic check for that.  Also, in this particular
20419            case, rN == 15 is allowable.  In this case however, the
20420            value obtained for rN is (apparently)
20421            "word-align(address of current insn + 4)". */
20422         if (rT == 15) {
20423            if (ty == Ity_I32)
20424               loadsPC = True;
20425            else // Can't do it for B/H loads
20426               valid = False;
20427         }
20428      }
20429
20430      if (valid) {
20431         // if it's a branch, it can't happen in the middle of an IT block
20432         // Also, if it is a branch, make it unconditional at this point.
20433         // Doing conditional branches in-line is too complex (for now)
20434         if (loadsPC) {
20435            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20436            // go uncond
20437            mk_skip_over_T32_if_cond_is_false(condT);
20438            condT = IRTemp_INVALID;
20439            // now uncond
20440         }
20441
20442         IRTemp rNt = newTemp(Ity_I32);
20443         if (rN == 15) {
20444            vassert(!isST);
20445            assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3)));
20446         } else {
20447            assign(rNt, getIRegT(rN));
20448         }
20449
20450         IRTemp transAddr = newTemp(Ity_I32);
20451         assign(transAddr,
20452                binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
20453                      mkexpr(rNt), mkU32(imm12)));
20454
20455         IRTemp oldRt = newTemp(Ity_I32);
20456         assign(oldRt, getIRegT(rT));
20457
20458         IRTemp llOldRt = newTemp(Ity_I32);
20459         assign(llOldRt, llGetIReg(rT));
20460
20461         if (isST) {
20462            IRExpr* data = NULL;
20463            switch (ty) {
20464               case Ity_I8:
20465                  data = unop(Iop_32to8, mkexpr(oldRt));
20466                  break;
20467               case Ity_I16:
20468                  data = unop(Iop_32to16, mkexpr(oldRt));
20469                  break;
20470              case Ity_I32:
20471                  data = mkexpr(oldRt);
20472                  break;
20473              default:
20474                 vassert(0);
20475            }
20476            storeGuardedLE(mkexpr(transAddr), data, condT);
20477         } else {
20478            IRTemp    newRt = newTemp(Ity_I32);
20479            IRLoadGOp widen = ILGop_INVALID;
20480            switch (ty) {
20481               case Ity_I8:
20482                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
20483               case Ity_I16:
20484                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
20485               case Ity_I32:
20486                  widen = ILGop_Ident32; break;
20487               default:
20488                  vassert(0);
20489            }
20490            loadGuardedLE(newRt, widen,
20491                          mkexpr(transAddr), mkexpr(llOldRt), condT);
20492            if (rT == 15) {
20493               vassert(loadsPC);
20494               /* We'll do the write to the PC just below */
20495            } else {
20496               vassert(!loadsPC);
20497               /* IRTemp_INVALID is OK here because in the case where
20498                  condT is false at run time, we're just putting the
20499                  old rT value back. */
20500               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
20501            }
20502
20503            if (loadsPC) {
20504               /* Presumably this is an interworking branch. */
20505               vassert(rT == 15);
20506               vassert(condT == IRTemp_INVALID); /* due to check above */
20507               llPutIReg(15, mkexpr(newRt));
20508               dres.jk_StopHere = Ijk_Boring;
20509               dres.whatNext    = Dis_StopHere;
20510            }
20511         }
20512
20513         DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
20514
20515         goto decode_success;
20516      }
20517   }
20518
20519   /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
20520   /* Doubleword loads and stores of the form:
20521         ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
20522         ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
20523         ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
20524   */
20525   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
20526      UInt bP   = INSN0(8,8);
20527      UInt bU   = INSN0(7,7);
20528      UInt bW   = INSN0(5,5);
20529      UInt bL   = INSN0(4,4);  // 1: load  0: store
20530      UInt rN   = INSN0(3,0);
20531      UInt rT   = INSN1(15,12);
20532      UInt rT2  = INSN1(11,8);
20533      UInt imm8 = INSN1(7,0);
20534
20535      Bool valid = True;
20536      if (bP == 0 && bW == 0)                 valid = False;
20537      if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
20538      if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
20539      if (bL == 1 && rT == rT2)               valid = False;
20540      /* It's OK to use PC as the base register only in the
20541         following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
20542      if (rN == 15 && (bL == 0/*store*/
20543                       || bW == 1/*wb*/))     valid = False;
20544
20545      if (valid) {
20546         IRTemp preAddr = newTemp(Ity_I32);
20547         assign(preAddr, 15 == rN
20548                           ? binop(Iop_And32, getIRegT(15), mkU32(~3U))
20549                           : getIRegT(rN));
20550
20551         IRTemp postAddr = newTemp(Ity_I32);
20552         assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
20553                                mkexpr(preAddr), mkU32(imm8 << 2)));
20554
20555         IRTemp transAddr = bP == 1 ? postAddr : preAddr;
20556
20557         /* For almost all cases, we do the writeback after the transfers.
20558            However, that leaves the stack "uncovered" in this case:
20559               strd    rD, [sp, #-8]
20560            In which case, do the writeback to SP now, instead of later.
20561            This is bad in that it makes the insn non-restartable if the
20562            accesses fault, but at least keeps Memcheck happy. */
20563         Bool writeback_already_done = False;
20564         if (bL == 0/*store*/ && bW == 1/*wb*/
20565             && rN == 13 && rN != rT && rN != rT2
20566             && bU == 0/*minus*/ && (imm8 << 2) == 8) {
20567            putIRegT(rN, mkexpr(postAddr), condT);
20568            writeback_already_done = True;
20569         }
20570
20571         if (bL == 0) {
20572            IRTemp oldRt  = newTemp(Ity_I32);
20573            IRTemp oldRt2 = newTemp(Ity_I32);
20574            assign(oldRt,  getIRegT(rT));
20575            assign(oldRt2, getIRegT(rT2));
20576            storeGuardedLE( mkexpr(transAddr),
20577                            mkexpr(oldRt), condT );
20578            storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
20579                            mkexpr(oldRt2), condT );
20580         } else {
20581            IRTemp oldRt  = newTemp(Ity_I32);
20582            IRTemp oldRt2 = newTemp(Ity_I32);
20583            IRTemp newRt  = newTemp(Ity_I32);
20584            IRTemp newRt2 = newTemp(Ity_I32);
20585            assign(oldRt,  llGetIReg(rT));
20586            assign(oldRt2, llGetIReg(rT2));
20587            loadGuardedLE( newRt, ILGop_Ident32,
20588                           mkexpr(transAddr),
20589                           mkexpr(oldRt), condT );
20590            loadGuardedLE( newRt2, ILGop_Ident32,
20591                           binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
20592                           mkexpr(oldRt2), condT );
20593            /* Put unconditionally, since we already switched on the condT
20594               in the guarded loads. */
20595            putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
20596            putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
20597         }
20598
20599         if (bW == 1 && !writeback_already_done) {
20600            putIRegT(rN, mkexpr(postAddr), condT);
20601         }
20602
20603         const HChar* nm = bL ? "ldrd" : "strd";
20604
20605         if (bP == 1 && bW == 0) {
20606            DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
20607                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
20608         }
20609         else if (bP == 1 && bW == 1) {
20610            DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
20611                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
20612         }
20613         else {
20614            vassert(bP == 0 && bW == 1);
20615            DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
20616                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
20617         }
20618
20619         goto decode_success;
20620      }
20621   }
20622
20623   /* -------------- (T3) Bcond.W label -------------- */
20624   /* This variant carries its own condition, so can't be part of an
20625      IT block ... */
20626   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20627       && INSN1(15,14) == BITS2(1,0)
20628       && INSN1(12,12) == 0) {
20629      UInt cond = INSN0(9,6);
20630      if (cond != ARMCondAL && cond != ARMCondNV) {
20631         Int simm21
20632            =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
20633              | (INSN1(11,11) << (1 + 6 + 11 + 1))
20634              | (INSN1(13,13) << (6 + 11 + 1))
20635              | (INSN0(5,0)   << (11 + 1))
20636              | (INSN1(10,0)  << 1);
20637         simm21 = (simm21 << 11) >> 11;
20638
20639         vassert(0 == (guest_R15_curr_instr_notENC & 1));
20640         UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
20641
20642         /* Not allowed in an IT block; SIGILL if so. */
20643         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
20644
20645         IRTemp kondT = newTemp(Ity_I32);
20646         assign( kondT, mk_armg_calculate_condition(cond) );
20647         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
20648                            Ijk_Boring,
20649                            IRConst_U32(dst | 1/*CPSR.T*/),
20650                            OFFB_R15T ));
20651         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
20652                              | 1 /*CPSR.T*/ ));
20653         dres.jk_StopHere = Ijk_Boring;
20654         dres.whatNext    = Dis_StopHere;
20655         DIP("b%s.w 0x%x\n", nCC(cond), dst);
20656         goto decode_success;
20657      }
20658   }
20659
20660   /* ---------------- (T4) B.W label ---------------- */
20661   /* ... whereas this variant doesn't carry its own condition, so it
20662      has to be either unconditional or the conditional by virtue of
20663      being the last in an IT block.  The upside is that there's 4
20664      more bits available for the jump offset, so it has a 16-times
20665      greater branch range than the T3 variant. */
20666   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20667       && INSN1(15,14) == BITS2(1,0)
20668       && INSN1(12,12) == 1) {
20669      if (1) {
20670         UInt bS  = INSN0(10,10);
20671         UInt bJ1 = INSN1(13,13);
20672         UInt bJ2 = INSN1(11,11);
20673         UInt bI1 = 1 ^ (bJ1 ^ bS);
20674         UInt bI2 = 1 ^ (bJ2 ^ bS);
20675         Int simm25
20676            =   (bS          << (1 + 1 + 10 + 11 + 1))
20677              | (bI1         << (1 + 10 + 11 + 1))
20678              | (bI2         << (10 + 11 + 1))
20679              | (INSN0(9,0)  << (11 + 1))
20680              | (INSN1(10,0) << 1);
20681         simm25 = (simm25 << 7) >> 7;
20682
20683         vassert(0 == (guest_R15_curr_instr_notENC & 1));
20684         UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
20685
20686         /* If in an IT block, must be the last insn. */
20687         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20688
20689         // go uncond
20690         mk_skip_over_T32_if_cond_is_false(condT);
20691         condT = IRTemp_INVALID;
20692         // now uncond
20693
20694         // branch to dst
20695         llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
20696         dres.jk_StopHere = Ijk_Boring;
20697         dres.whatNext    = Dis_StopHere;
20698         DIP("b.w 0x%x\n", dst);
20699         goto decode_success;
20700      }
20701   }
20702
20703   /* ------------------ TBB, TBH ------------------ */
20704   if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
20705      UInt rN = INSN0(3,0);
20706      UInt rM = INSN1(3,0);
20707      UInt bH = INSN1(4,4);
20708      if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
20709         /* Must be last or not-in IT block */
20710         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20711         /* Go uncond */
20712         mk_skip_over_T32_if_cond_is_false(condT);
20713         condT = IRTemp_INVALID;
20714
20715         IRExpr* ea
20716             = binop(Iop_Add32,
20717                     getIRegT(rN),
20718                     bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
20719                        : getIRegT(rM));
20720
20721         IRTemp delta = newTemp(Ity_I32);
20722         if (bH) {
20723            assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
20724         } else {
20725            assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
20726         }
20727
20728         llPutIReg(
20729            15,
20730            binop(Iop_Or32,
20731                  binop(Iop_Add32,
20732                        getIRegT(15),
20733                        binop(Iop_Shl32, mkexpr(delta), mkU8(1))
20734                  ),
20735                  mkU32(1)
20736         ));
20737         dres.jk_StopHere = Ijk_Boring;
20738         dres.whatNext    = Dis_StopHere;
20739         DIP("tb%c [r%u, r%u%s]\n",
20740             bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
20741         goto decode_success;
20742      }
20743   }
20744
20745   /* ------------------ UBFX ------------------ */
20746   /* ------------------ SBFX ------------------ */
20747   /* There's also ARM versions of same, but it doesn't seem worth the
20748      hassle to common up the handling (it's only a couple of C
20749      statements). */
20750   if ((INSN0(15,4) == 0xF3C // UBFX
20751        || INSN0(15,4) == 0xF34) // SBFX
20752       && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
20753      UInt rN  = INSN0(3,0);
20754      UInt rD  = INSN1(11,8);
20755      UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
20756      UInt wm1 = INSN1(4,0);
20757      UInt msb =  lsb + wm1;
20758      if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
20759         Bool   isU  = INSN0(15,4) == 0xF3C;
20760         IRTemp src  = newTemp(Ity_I32);
20761         IRTemp tmp  = newTemp(Ity_I32);
20762         IRTemp res  = newTemp(Ity_I32);
20763         UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
20764         vassert(msb >= 0 && msb <= 31);
20765         vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
20766
20767         assign(src, getIRegT(rN));
20768         assign(tmp, binop(Iop_And32,
20769                           binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
20770                           mkU32(mask)));
20771         assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
20772                           binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
20773                           mkU8(31-wm1)));
20774
20775         putIRegT(rD, mkexpr(res), condT);
20776
20777         DIP("%s r%u, r%u, #%u, #%u\n",
20778             isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
20779         goto decode_success;
20780      }
20781   }
20782
20783   /* ------------------ UXTB ------------------ */
20784   /* ------------------ UXTH ------------------ */
20785   /* ------------------ SXTB ------------------ */
20786   /* ------------------ SXTH ------------------ */
20787   /* ----------------- UXTB16 ----------------- */
20788   /* ----------------- SXTB16 ----------------- */
20789   /* FIXME: this is an exact duplicate of the ARM version.  They
20790      should be commoned up. */
20791   if ((INSN0(15,0) == 0xFA5F     // UXTB
20792        || INSN0(15,0) == 0xFA1F  // UXTH
20793        || INSN0(15,0) == 0xFA4F  // SXTB
20794        || INSN0(15,0) == 0xFA0F  // SXTH
20795        || INSN0(15,0) == 0xFA3F  // UXTB16
20796        || INSN0(15,0) == 0xFA2F) // SXTB16
20797       && INSN1(15,12) == BITS4(1,1,1,1)
20798       && INSN1(7,6) == BITS2(1,0)) {
20799      UInt rD = INSN1(11,8);
20800      UInt rM = INSN1(3,0);
20801      UInt rot = INSN1(5,4);
20802      if (!isBadRegT(rD) && !isBadRegT(rM)) {
20803         const HChar* nm = "???";
20804         IRTemp srcT = newTemp(Ity_I32);
20805         IRTemp rotT = newTemp(Ity_I32);
20806         IRTemp dstT = newTemp(Ity_I32);
20807         assign(srcT, getIRegT(rM));
20808         assign(rotT, genROR32(srcT, 8 * rot));
20809         switch (INSN0(15,0)) {
20810            case 0xFA5F: // UXTB
20811               nm = "uxtb";
20812               assign(dstT, unop(Iop_8Uto32,
20813                                 unop(Iop_32to8, mkexpr(rotT))));
20814               break;
20815            case 0xFA1F: // UXTH
20816               nm = "uxth";
20817               assign(dstT, unop(Iop_16Uto32,
20818                                 unop(Iop_32to16, mkexpr(rotT))));
20819               break;
20820            case 0xFA4F: // SXTB
20821               nm = "sxtb";
20822               assign(dstT, unop(Iop_8Sto32,
20823                                 unop(Iop_32to8, mkexpr(rotT))));
20824               break;
20825            case 0xFA0F: // SXTH
20826               nm = "sxth";
20827               assign(dstT, unop(Iop_16Sto32,
20828                                 unop(Iop_32to16, mkexpr(rotT))));
20829               break;
20830            case 0xFA3F: // UXTB16
20831               nm = "uxtb16";
20832               assign(dstT, binop(Iop_And32, mkexpr(rotT),
20833                                             mkU32(0x00FF00FF)));
20834               break;
20835            case 0xFA2F: { // SXTB16
20836               nm = "sxtb16";
20837               IRTemp lo32 = newTemp(Ity_I32);
20838               IRTemp hi32 = newTemp(Ity_I32);
20839               assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
20840               assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
20841               assign(
20842                  dstT,
20843                  binop(Iop_Or32,
20844                        binop(Iop_And32,
20845                              unop(Iop_8Sto32,
20846                                   unop(Iop_32to8, mkexpr(lo32))),
20847                              mkU32(0xFFFF)),
20848                        binop(Iop_Shl32,
20849                              unop(Iop_8Sto32,
20850                                   unop(Iop_32to8, mkexpr(hi32))),
20851                              mkU8(16))
20852               ));
20853               break;
20854            }
20855            default:
20856               vassert(0);
20857         }
20858         putIRegT(rD, mkexpr(dstT), condT);
20859         DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
20860         goto decode_success;
20861      }
20862   }
20863
20864   /* -------------- MUL.W Rd, Rn, Rm -------------- */
20865   if (INSN0(15,4) == 0xFB0
20866       && (INSN1(15,0) & 0xF0F0) == 0xF000) {
20867      UInt rN = INSN0(3,0);
20868      UInt rD = INSN1(11,8);
20869      UInt rM = INSN1(3,0);
20870      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
20871         IRTemp res = newTemp(Ity_I32);
20872         assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
20873         putIRegT(rD, mkexpr(res), condT);
20874         DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
20875         goto decode_success;
20876      }
20877   }
20878
20879   /* -------------- SDIV.W Rd, Rn, Rm -------------- */
20880   if (INSN0(15,4) == 0xFB9
20881       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
20882      UInt rN = INSN0(3,0);
20883      UInt rD = INSN1(11,8);
20884      UInt rM = INSN1(3,0);
20885      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
20886         IRTemp res  = newTemp(Ity_I32);
20887         IRTemp argL = newTemp(Ity_I32);
20888         IRTemp argR = newTemp(Ity_I32);
20889         assign(argL, getIRegT(rN));
20890         assign(argR, getIRegT(rM));
20891         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
20892         putIRegT(rD, mkexpr(res), condT);
20893         DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
20894         goto decode_success;
20895      }
20896   }
20897
20898   /* -------------- UDIV.W Rd, Rn, Rm -------------- */
20899   if (INSN0(15,4) == 0xFBB
20900       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
20901      UInt rN = INSN0(3,0);
20902      UInt rD = INSN1(11,8);
20903      UInt rM = INSN1(3,0);
20904      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
20905         IRTemp res  = newTemp(Ity_I32);
20906         IRTemp argL = newTemp(Ity_I32);
20907         IRTemp argR = newTemp(Ity_I32);
20908         assign(argL, getIRegT(rN));
20909         assign(argR, getIRegT(rM));
20910         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
20911         putIRegT(rD, mkexpr(res), condT);
20912         DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
20913         goto decode_success;
20914      }
20915   }
20916
20917   /* ------------------ {U,S}MULL ------------------ */
20918   if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
20919       && INSN1(7,4) == BITS4(0,0,0,0)) {
20920      UInt isU  = INSN0(5,5);
20921      UInt rN   = INSN0(3,0);
20922      UInt rDlo = INSN1(15,12);
20923      UInt rDhi = INSN1(11,8);
20924      UInt rM   = INSN1(3,0);
20925      if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
20926          && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
20927         IRTemp res   = newTemp(Ity_I64);
20928         assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
20929                           getIRegT(rN), getIRegT(rM)));
20930         putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
20931         putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
20932         DIP("%cmull r%u, r%u, r%u, r%u\n",
20933             isU ? 'u' : 's', rDlo, rDhi, rN, rM);
20934         goto decode_success;
20935      }
20936   }
20937
20938   /* ------------------ ML{A,S} ------------------ */
20939   if (INSN0(15,4) == 0xFB0
20940       && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
20941           || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
20942      UInt rN = INSN0(3,0);
20943      UInt rA = INSN1(15,12);
20944      UInt rD = INSN1(11,8);
20945      UInt rM = INSN1(3,0);
20946      if (!isBadRegT(rD) && !isBadRegT(rN)
20947          && !isBadRegT(rM) && !isBadRegT(rA)) {
20948         Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
20949         IRTemp res   = newTemp(Ity_I32);
20950         assign(res,
20951                binop(isMLA ? Iop_Add32 : Iop_Sub32,
20952                      getIRegT(rA),
20953                      binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
20954         putIRegT(rD, mkexpr(res), condT);
20955         DIP("%s r%u, r%u, r%u, r%u\n",
20956             isMLA ? "mla" : "mls", rD, rN, rM, rA);
20957         goto decode_success;
20958      }
20959   }
20960
20961   /* ------------------ (T3) ADR ------------------ */
20962   if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
20963       && INSN1(15,15) == 0) {
20964      /* rD = align4(PC) + imm32 */
20965      UInt rD = INSN1(11,8);
20966      if (!isBadRegT(rD)) {
20967         UInt imm32 = (INSN0(10,10) << 11)
20968                      | (INSN1(14,12) << 8) | INSN1(7,0);
20969         putIRegT(rD, binop(Iop_Add32,
20970                            binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20971                            mkU32(imm32)),
20972                      condT);
20973         DIP("add r%u, pc, #%u\n", rD, imm32);
20974         goto decode_success;
20975      }
20976   }
20977
20978   /* ----------------- (T1) UMLAL ----------------- */
20979   /* ----------------- (T1) SMLAL ----------------- */
20980   if ((INSN0(15,4) == 0xFBE // UMLAL
20981        || INSN0(15,4) == 0xFBC) // SMLAL
20982       && INSN1(7,4) == BITS4(0,0,0,0)) {
20983      UInt rN   = INSN0(3,0);
20984      UInt rDlo = INSN1(15,12);
20985      UInt rDhi = INSN1(11,8);
20986      UInt rM   = INSN1(3,0);
20987      if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
20988          && !isBadRegT(rM) && rDhi != rDlo) {
20989         Bool   isS   = INSN0(15,4) == 0xFBC;
20990         IRTemp argL  = newTemp(Ity_I32);
20991         IRTemp argR  = newTemp(Ity_I32);
20992         IRTemp old   = newTemp(Ity_I64);
20993         IRTemp res   = newTemp(Ity_I64);
20994         IRTemp resHi = newTemp(Ity_I32);
20995         IRTemp resLo = newTemp(Ity_I32);
20996         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
20997         assign( argL, getIRegT(rM));
20998         assign( argR, getIRegT(rN));
20999         assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
21000         assign( res, binop(Iop_Add64,
21001                            mkexpr(old),
21002                            binop(mulOp, mkexpr(argL), mkexpr(argR))) );
21003         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
21004         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
21005         putIRegT( rDhi, mkexpr(resHi), condT );
21006         putIRegT( rDlo, mkexpr(resLo), condT );
21007         DIP("%cmlal r%u, r%u, r%u, r%u\n",
21008             isS ? 's' : 'u', rDlo, rDhi, rN, rM);
21009         goto decode_success;
21010      }
21011   }
21012
21013   /* ------------------ (T1) UMAAL ------------------ */
21014   if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
21015      UInt rN   = INSN0(3,0);
21016      UInt rDlo = INSN1(15,12);
21017      UInt rDhi = INSN1(11,8);
21018      UInt rM   = INSN1(3,0);
21019      if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
21020          && !isBadRegT(rM) && rDhi != rDlo) {
21021         IRTemp argN   = newTemp(Ity_I32);
21022         IRTemp argM   = newTemp(Ity_I32);
21023         IRTemp argDhi = newTemp(Ity_I32);
21024         IRTemp argDlo = newTemp(Ity_I32);
21025         IRTemp res    = newTemp(Ity_I64);
21026         IRTemp resHi  = newTemp(Ity_I32);
21027         IRTemp resLo  = newTemp(Ity_I32);
21028         assign( argN,   getIRegT(rN) );
21029         assign( argM,   getIRegT(rM) );
21030         assign( argDhi, getIRegT(rDhi) );
21031         assign( argDlo, getIRegT(rDlo) );
21032         assign( res,
21033                 binop(Iop_Add64,
21034                       binop(Iop_Add64,
21035                             binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
21036                             unop(Iop_32Uto64, mkexpr(argDhi))),
21037                       unop(Iop_32Uto64, mkexpr(argDlo))) );
21038         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
21039         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
21040         putIRegT( rDhi, mkexpr(resHi), condT );
21041         putIRegT( rDlo, mkexpr(resLo), condT );
21042         DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
21043         goto decode_success;
21044      }
21045   }
21046
21047   /* ------------------- (T1) SMMUL{R} ------------------ */
21048   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
21049       && INSN0(6,4) == BITS3(1,0,1)
21050       && INSN1(15,12) == BITS4(1,1,1,1)
21051       && INSN1(7,5) == BITS3(0,0,0)) {
21052      UInt bitR = INSN1(4,4);
21053      UInt rD = INSN1(11,8);
21054      UInt rM = INSN1(3,0);
21055      UInt rN = INSN0(3,0);
21056      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21057         IRExpr* res
21058         = unop(Iop_64HIto32,
21059                binop(Iop_Add64,
21060                      binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
21061                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
21062         putIRegT(rD, res, condT);
21063         DIP("smmul%s r%u, r%u, r%u\n",
21064             bitR ? "r" : "", rD, rN, rM);
21065         goto decode_success;
21066      }
21067   }
21068
21069   /* ------------------- (T1) SMMLA{R} ------------------ */
21070   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
21071       && INSN0(6,4) == BITS3(1,0,1)
21072       && INSN1(7,5) == BITS3(0,0,0)) {
21073      UInt bitR = INSN1(4,4);
21074      UInt rA = INSN1(15,12);
21075      UInt rD = INSN1(11,8);
21076      UInt rM = INSN1(3,0);
21077      UInt rN = INSN0(3,0);
21078      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
21079         IRExpr* res
21080         = unop(Iop_64HIto32,
21081                binop(Iop_Add64,
21082                      binop(Iop_Add64,
21083                            binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
21084                            binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
21085                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
21086         putIRegT(rD, res, condT);
21087         DIP("smmla%s r%u, r%u, r%u, r%u\n",
21088             bitR ? "r" : "", rD, rN, rM, rA);
21089         goto decode_success;
21090      }
21091   }
21092
21093   /* ------------------ (T2) ADR ------------------ */
21094   if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
21095       && INSN1(15,15) == 0) {
21096      /* rD = align4(PC) - imm32 */
21097      UInt rD = INSN1(11,8);
21098      if (!isBadRegT(rD)) {
21099         UInt imm32 = (INSN0(10,10) << 11)
21100                      | (INSN1(14,12) << 8) | INSN1(7,0);
21101         putIRegT(rD, binop(Iop_Sub32,
21102                            binop(Iop_And32, getIRegT(15), mkU32(~3U)),
21103                            mkU32(imm32)),
21104                      condT);
21105         DIP("sub r%u, pc, #%u\n", rD, imm32);
21106         goto decode_success;
21107      }
21108   }
21109
21110   /* ------------------- (T1) BFI ------------------- */
21111   /* ------------------- (T1) BFC ------------------- */
21112   if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
21113      UInt rD  = INSN1(11,8);
21114      UInt rN  = INSN0(3,0);
21115      UInt msb = INSN1(4,0);
21116      UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
21117      if (isBadRegT(rD) || rN == 13 || msb < lsb) {
21118         /* undecodable; fall through */
21119      } else {
21120         IRTemp src    = newTemp(Ity_I32);
21121         IRTemp olddst = newTemp(Ity_I32);
21122         IRTemp newdst = newTemp(Ity_I32);
21123         UInt   mask = 1 << (msb - lsb);
21124         mask = (mask - 1) + mask;
21125         vassert(mask != 0); // guaranteed by "msb < lsb" check above
21126         mask <<= lsb;
21127
21128         assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
21129         assign(olddst, getIRegT(rD));
21130         assign(newdst,
21131                binop(Iop_Or32,
21132                   binop(Iop_And32,
21133                         binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
21134                         mkU32(mask)),
21135                   binop(Iop_And32,
21136                         mkexpr(olddst),
21137                         mkU32(~mask)))
21138               );
21139
21140         putIRegT(rD, mkexpr(newdst), condT);
21141
21142         if (rN == 15) {
21143            DIP("bfc r%u, #%u, #%u\n",
21144                rD, lsb, msb-lsb+1);
21145         } else {
21146            DIP("bfi r%u, r%u, #%u, #%u\n",
21147                rD, rN, lsb, msb-lsb+1);
21148         }
21149         goto decode_success;
21150      }
21151   }
21152
21153   /* ------------------- (T1) SXTAH ------------------- */
21154   /* ------------------- (T1) UXTAH ------------------- */
21155   if ((INSN0(15,4) == 0xFA1      // UXTAH
21156        || INSN0(15,4) == 0xFA0)  // SXTAH
21157       && INSN1(15,12) == BITS4(1,1,1,1)
21158       && INSN1(7,6) == BITS2(1,0)) {
21159      Bool isU = INSN0(15,4) == 0xFA1;
21160      UInt rN  = INSN0(3,0);
21161      UInt rD  = INSN1(11,8);
21162      UInt rM  = INSN1(3,0);
21163      UInt rot = INSN1(5,4);
21164      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21165         IRTemp srcL = newTemp(Ity_I32);
21166         IRTemp srcR = newTemp(Ity_I32);
21167         IRTemp res  = newTemp(Ity_I32);
21168         assign(srcR, getIRegT(rM));
21169         assign(srcL, getIRegT(rN));
21170         assign(res,  binop(Iop_Add32,
21171                            mkexpr(srcL),
21172                            unop(isU ? Iop_16Uto32 : Iop_16Sto32,
21173                                 unop(Iop_32to16,
21174                                      genROR32(srcR, 8 * rot)))));
21175         putIRegT(rD, mkexpr(res), condT);
21176         DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
21177             isU ? 'u' : 's', rD, rN, rM, rot);
21178         goto decode_success;
21179      }
21180   }
21181
21182   /* ------------------- (T1) SXTAB ------------------- */
21183   /* ------------------- (T1) UXTAB ------------------- */
21184   if ((INSN0(15,4) == 0xFA5      // UXTAB
21185        || INSN0(15,4) == 0xFA4)  // SXTAB
21186       && INSN1(15,12) == BITS4(1,1,1,1)
21187       && INSN1(7,6) == BITS2(1,0)) {
21188      Bool isU = INSN0(15,4) == 0xFA5;
21189      UInt rN  = INSN0(3,0);
21190      UInt rD  = INSN1(11,8);
21191      UInt rM  = INSN1(3,0);
21192      UInt rot = INSN1(5,4);
21193      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21194         IRTemp srcL = newTemp(Ity_I32);
21195         IRTemp srcR = newTemp(Ity_I32);
21196         IRTemp res  = newTemp(Ity_I32);
21197         assign(srcR, getIRegT(rM));
21198         assign(srcL, getIRegT(rN));
21199         assign(res,  binop(Iop_Add32,
21200                            mkexpr(srcL),
21201                            unop(isU ? Iop_8Uto32 : Iop_8Sto32,
21202                                 unop(Iop_32to8,
21203                                      genROR32(srcR, 8 * rot)))));
21204         putIRegT(rD, mkexpr(res), condT);
21205         DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
21206             isU ? 'u' : 's', rD, rN, rM, rot);
21207         goto decode_success;
21208      }
21209   }
21210
21211   /* ------------------- (T1) CLZ ------------------- */
21212   if (INSN0(15,4) == 0xFAB
21213       && INSN1(15,12) == BITS4(1,1,1,1)
21214       && INSN1(7,4) == BITS4(1,0,0,0)) {
21215      UInt rM1 = INSN0(3,0);
21216      UInt rD  = INSN1(11,8);
21217      UInt rM2 = INSN1(3,0);
21218      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21219         IRTemp arg = newTemp(Ity_I32);
21220         IRTemp res = newTemp(Ity_I32);
21221         assign(arg, getIRegT(rM1));
21222         assign(res, IRExpr_ITE(
21223                        binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
21224                        mkU32(32),
21225                        unop(Iop_Clz32, mkexpr(arg))
21226         ));
21227         putIRegT(rD, mkexpr(res), condT);
21228         DIP("clz r%u, r%u\n", rD, rM1);
21229         goto decode_success;
21230      }
21231   }
21232
21233   /* ------------------- (T1) RBIT ------------------- */
21234   if (INSN0(15,4) == 0xFA9
21235       && INSN1(15,12) == BITS4(1,1,1,1)
21236       && INSN1(7,4) == BITS4(1,0,1,0)) {
21237      UInt rM1 = INSN0(3,0);
21238      UInt rD  = INSN1(11,8);
21239      UInt rM2 = INSN1(3,0);
21240      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21241         IRTemp arg = newTemp(Ity_I32);
21242         assign(arg, getIRegT(rM1));
21243         IRTemp res = gen_BITREV(arg);
21244         putIRegT(rD, mkexpr(res), condT);
21245         DIP("rbit r%u, r%u\n", rD, rM1);
21246         goto decode_success;
21247      }
21248   }
21249
21250   /* ------------------- (T2) REV   ------------------- */
21251   /* ------------------- (T2) REV16 ------------------- */
21252   if (INSN0(15,4) == 0xFA9
21253       && INSN1(15,12) == BITS4(1,1,1,1)
21254       && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
21255           || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
21256      UInt rM1   = INSN0(3,0);
21257      UInt rD    = INSN1(11,8);
21258      UInt rM2   = INSN1(3,0);
21259      Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
21260      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21261         IRTemp arg = newTemp(Ity_I32);
21262         assign(arg, getIRegT(rM1));
21263         IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
21264         putIRegT(rD, mkexpr(res), condT);
21265         DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
21266         goto decode_success;
21267      }
21268   }
21269
21270   /* ------------------- (T2) REVSH ------------------ */
21271   if (INSN0(15,4) == 0xFA9
21272       && INSN1(15,12) == BITS4(1,1,1,1)
21273       && INSN1(7,4) == BITS4(1,0,1,1)) {
21274      UInt rM1 = INSN0(3,0);
21275      UInt rM2 = INSN1(3,0);
21276      UInt rD  = INSN1(11,8);
21277      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21278         IRTemp irt_rM  = newTemp(Ity_I32);
21279         IRTemp irt_hi  = newTemp(Ity_I32);
21280         IRTemp irt_low = newTemp(Ity_I32);
21281         IRTemp irt_res = newTemp(Ity_I32);
21282         assign(irt_rM, getIRegT(rM1));
21283         assign(irt_hi,
21284                binop(Iop_Sar32,
21285                      binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
21286                      mkU8(16)
21287                )
21288         );
21289         assign(irt_low,
21290                binop(Iop_And32,
21291                      binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
21292                      mkU32(0xFF)
21293                )
21294         );
21295         assign(irt_res,
21296                binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
21297         );
21298         putIRegT(rD, mkexpr(irt_res), condT);
21299         DIP("revsh r%u, r%u\n", rD, rM1);
21300         goto decode_success;
21301      }
21302   }
21303
21304   /* -------------- (T1) MSR apsr, reg -------------- */
21305   if (INSN0(15,4) == 0xF38
21306       && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
21307      UInt rN          = INSN0(3,0);
21308      UInt write_ge    = INSN1(10,10);
21309      UInt write_nzcvq = INSN1(11,11);
21310      if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
21311         IRTemp rNt = newTemp(Ity_I32);
21312         assign(rNt, getIRegT(rN));
21313         desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
21314         DIP("msr cpsr_%s%s, r%u\n",
21315             write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
21316         goto decode_success;
21317      }
21318   }
21319
21320   /* -------------- (T1) MRS reg, apsr -------------- */
21321   if (INSN0(15,0) == 0xF3EF
21322       && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
21323      UInt rD = INSN1(11,8);
21324      if (!isBadRegT(rD)) {
21325         IRTemp apsr = synthesise_APSR();
21326         putIRegT( rD, mkexpr(apsr), condT );
21327         DIP("mrs r%u, cpsr\n", rD);
21328         goto decode_success;
21329      }
21330   }
21331
21332   /* ----------------- (T1) LDREX ----------------- */
21333   if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
21334      UInt rN   = INSN0(3,0);
21335      UInt rT   = INSN1(15,12);
21336      UInt imm8 = INSN1(7,0);
21337      if (!isBadRegT(rT) && rN != 15) {
21338         IRTemp res;
21339         // go uncond
21340         mk_skip_over_T32_if_cond_is_false( condT );
21341         // now uncond
21342         res = newTemp(Ity_I32);
21343         stmt( IRStmt_LLSC(Iend_LE,
21344                           res,
21345                           binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
21346                           NULL/*this is a load*/ ));
21347         putIRegT(rT, mkexpr(res), IRTemp_INVALID);
21348         DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
21349         goto decode_success;
21350      }
21351   }
21352
21353   /* --------------- (T1) LDREX{B,H} --------------- */
21354   if (INSN0(15,4) == 0xE8D
21355       && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
21356      UInt rN  = INSN0(3,0);
21357      UInt rT  = INSN1(15,12);
21358      Bool isH = INSN1(11,0) == 0xF5F;
21359      if (!isBadRegT(rT) && rN != 15) {
21360         IRTemp res;
21361         // go uncond
21362         mk_skip_over_T32_if_cond_is_false( condT );
21363         // now uncond
21364         res = newTemp(isH ? Ity_I16 : Ity_I8);
21365         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
21366                           NULL/*this is a load*/ ));
21367         putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
21368                      IRTemp_INVALID);
21369         DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
21370         goto decode_success;
21371      }
21372   }
21373
21374   /* --------------- (T1) LDREXD --------------- */
21375   if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
21376      UInt rN  = INSN0(3,0);
21377      UInt rT  = INSN1(15,12);
21378      UInt rT2 = INSN1(11,8);
21379      if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
21380         IRTemp res;
21381         // go uncond
21382         mk_skip_over_T32_if_cond_is_false( condT );
21383         // now uncond
21384         res = newTemp(Ity_I64);
21385         // FIXME: assumes little-endian guest
21386         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
21387                           NULL/*this is a load*/ ));
21388         // FIXME: assumes little-endian guest
21389         putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
21390         putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
21391         DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
21392         goto decode_success;
21393      }
21394   }
21395
21396   /* ----------------- (T1) STREX ----------------- */
21397   if (INSN0(15,4) == 0xE84) {
21398      UInt rN   = INSN0(3,0);
21399      UInt rT   = INSN1(15,12);
21400      UInt rD   = INSN1(11,8);
21401      UInt imm8 = INSN1(7,0);
21402      if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
21403          && rD != rN && rD != rT) {
21404         IRTemp resSC1, resSC32;
21405         // go uncond
21406         mk_skip_over_T32_if_cond_is_false( condT );
21407         // now uncond
21408         /* Ok, now we're unconditional.  Do the store. */
21409         resSC1 = newTemp(Ity_I1);
21410         stmt( IRStmt_LLSC(Iend_LE,
21411                           resSC1,
21412                           binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
21413                           getIRegT(rT)) );
21414         /* Set rD to 1 on failure, 0 on success.  Currently we have
21415            resSC1 == 0 on failure, 1 on success. */
21416         resSC32 = newTemp(Ity_I32);
21417         assign(resSC32,
21418                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
21419         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
21420         DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
21421         goto decode_success;
21422      }
21423   }
21424
21425   /* --------------- (T1) STREX{B,H} --------------- */
21426   if (INSN0(15,4) == 0xE8C
21427       && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
21428      UInt rN  = INSN0(3,0);
21429      UInt rT  = INSN1(15,12);
21430      UInt rD  = INSN1(3,0);
21431      Bool isH = INSN1(11,4) == 0xF5;
21432      if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
21433          && rD != rN && rD != rT) {
21434         IRTemp resSC1, resSC32;
21435         // go uncond
21436         mk_skip_over_T32_if_cond_is_false( condT );
21437         // now uncond
21438         /* Ok, now we're unconditional.  Do the store. */
21439         resSC1 = newTemp(Ity_I1);
21440         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
21441                           unop(isH ? Iop_32to16 : Iop_32to8,
21442                                getIRegT(rT))) );
21443         /* Set rD to 1 on failure, 0 on success.  Currently we have
21444            resSC1 == 0 on failure, 1 on success. */
21445         resSC32 = newTemp(Ity_I32);
21446         assign(resSC32,
21447                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
21448         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
21449         DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
21450         goto decode_success;
21451      }
21452   }
21453
21454   /* ---------------- (T1) STREXD ---------------- */
21455   if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
21456      UInt rN  = INSN0(3,0);
21457      UInt rT  = INSN1(15,12);
21458      UInt rT2 = INSN1(11,8);
21459      UInt rD  = INSN1(3,0);
21460      if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
21461          && rN != 15 && rD != rN && rD != rT && rD != rT) {
21462         IRTemp resSC1, resSC32, data;
21463         // go uncond
21464         mk_skip_over_T32_if_cond_is_false( condT );
21465         // now uncond
21466         /* Ok, now we're unconditional.  Do the store. */
21467         resSC1 = newTemp(Ity_I1);
21468         data = newTemp(Ity_I64);
21469         // FIXME: assumes little-endian guest
21470         assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
21471         // FIXME: assumes little-endian guest
21472         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
21473         /* Set rD to 1 on failure, 0 on success.  Currently we have
21474            resSC1 == 0 on failure, 1 on success. */
21475         resSC32 = newTemp(Ity_I32);
21476         assign(resSC32,
21477                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
21478         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
21479         DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
21480         goto decode_success;
21481      }
21482   }
21483
21484   /* -------------- v7 barrier insns -------------- */
21485   if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
21486      /* FIXME: should this be unconditional? */
21487      /* XXX this isn't really right, is it?  The generated IR does
21488         them unconditionally.  I guess it doesn't matter since it
21489         doesn't do any harm to do them even when the guarding
21490         condition is false -- it's just a performance loss. */
21491      switch (INSN1(7,0)) {
21492         case 0x4F: /* DSB sy */
21493         case 0x4E: /* DSB st */
21494         case 0x4B: /* DSB ish */
21495         case 0x4A: /* DSB ishst */
21496         case 0x47: /* DSB nsh */
21497         case 0x46: /* DSB nshst */
21498         case 0x43: /* DSB osh */
21499         case 0x42: /* DSB oshst */
21500            stmt( IRStmt_MBE(Imbe_Fence) );
21501            DIP("DSB\n");
21502            goto decode_success;
21503         case 0x5F: /* DMB sy */
21504         case 0x5E: /* DMB st */
21505         case 0x5B: /* DMB ish */
21506         case 0x5A: /* DMB ishst */
21507         case 0x57: /* DMB nsh */
21508         case 0x56: /* DMB nshst */
21509         case 0x53: /* DMB osh */
21510         case 0x52: /* DMB oshst */
21511            stmt( IRStmt_MBE(Imbe_Fence) );
21512            DIP("DMB\n");
21513            goto decode_success;
21514         case 0x6F: /* ISB */
21515            stmt( IRStmt_MBE(Imbe_Fence) );
21516            DIP("ISB\n");
21517            goto decode_success;
21518         default:
21519            break;
21520      }
21521   }
21522
21523   /* ---------------------- PLD{,W} ---------------------- */
21524   if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
21525      /* FIXME: should this be unconditional? */
21526      /* PLD/PLDW immediate, encoding T1 */
21527      UInt rN    = INSN0(3,0);
21528      UInt bW    = INSN0(5,5);
21529      UInt imm12 = INSN1(11,0);
21530      DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
21531      goto decode_success;
21532   }
21533
21534   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
21535      /* FIXME: should this be unconditional? */
21536      /* PLD/PLDW immediate, encoding T2 */
21537      UInt rN    = INSN0(3,0);
21538      UInt bW    = INSN0(5,5);
21539      UInt imm8  = INSN1(7,0);
21540      DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
21541      goto decode_success;
21542   }
21543
21544   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
21545      /* FIXME: should this be unconditional? */
21546      /* PLD/PLDW register, encoding T1 */
21547      UInt rN   = INSN0(3,0);
21548      UInt rM   = INSN1(3,0);
21549      UInt bW   = INSN0(5,5);
21550      UInt imm2 = INSN1(5,4);
21551      if (!isBadRegT(rM)) {
21552         DIP("pld%s [r%u, r%u, lsl %d]\n", bW ? "w" : "", rN, rM, imm2);
21553         goto decode_success;
21554      }
21555      /* fall through */
21556   }
21557
21558   /* -------------- read CP15 TPIDRURO register ------------- */
21559   /* mrc     p15, 0,  r0, c13, c0, 3  up to
21560      mrc     p15, 0, r14, c13, c0, 3
21561   */
21562   /* I don't know whether this is really v7-only.  But anyway, we
21563      have to support it since arm-linux uses TPIDRURO as a thread
21564      state register. */
21565   if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
21566      /* FIXME: should this be unconditional? */
21567      UInt rD = INSN1(15,12);
21568      if (!isBadRegT(rD)) {
21569         putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), IRTemp_INVALID);
21570         DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
21571         goto decode_success;
21572      }
21573      /* fall through */
21574   }
21575
21576   /* ------------------- CLREX ------------------ */
21577   if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
21578      /* AFAICS, this simply cancels a (all?) reservations made by a
21579         (any?) preceding LDREX(es).  Arrange to hand it through to
21580         the back end. */
21581      mk_skip_over_T32_if_cond_is_false( condT );
21582      stmt( IRStmt_MBE(Imbe_CancelReservation) );
21583      DIP("clrex\n");
21584      goto decode_success;
21585   }
21586
21587   /* ------------------- NOP ------------------ */
21588   if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
21589      DIP("nop\n");
21590      goto decode_success;
21591   }
21592
21593   /* -------------- (T1) LDRT reg+#imm8 -------------- */
21594   /* Load Register Unprivileged:
21595      ldrt Rt, [Rn, #imm8]
21596   */
21597   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
21598       && INSN1(11,8) == BITS4(1,1,1,0)) {
21599      UInt rT    = INSN1(15,12);
21600      UInt rN    = INSN0(3,0);
21601      UInt imm8  = INSN1(7,0);
21602      Bool valid = True;
21603      if (rN == 15 || isBadRegT(rT)) valid = False;
21604      if (valid) {
21605         put_ITSTATE(old_itstate);
21606         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21607         IRTemp newRt = newTemp(Ity_I32);
21608         loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
21609         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21610         put_ITSTATE(new_itstate);
21611         DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
21612         goto decode_success;
21613      }
21614   }
21615
21616   /* -------------- (T1) STRT reg+#imm8 -------------- */
21617   /* Store Register Unprivileged:
21618      strt Rt, [Rn, #imm8]
21619   */
21620   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
21621       && INSN1(11,8) == BITS4(1,1,1,0)) {
21622      UInt rT    = INSN1(15,12);
21623      UInt rN    = INSN0(3,0);
21624      UInt imm8  = INSN1(7,0);
21625      Bool valid = True;
21626      if (rN == 15 || isBadRegT(rT)) valid = False;
21627      if (valid) {
21628         put_ITSTATE(old_itstate);
21629         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21630         storeGuardedLE( address, llGetIReg(rT), condT );
21631         put_ITSTATE(new_itstate);
21632         DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
21633         goto decode_success;
21634      }
21635   }
21636
21637   /* -------------- (T1) STRBT reg+#imm8 -------------- */
21638   /* Store Register Byte Unprivileged:
21639      strbt Rt, [Rn, #imm8]
21640   */
21641   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
21642       && INSN1(11,8) == BITS4(1,1,1,0)) {
21643      UInt rT    = INSN1(15,12);
21644      UInt rN    = INSN0(3,0);
21645      UInt imm8  = INSN1(7,0);
21646      Bool valid = True;
21647      if (rN == 15 || isBadRegT(rT)) valid = False;
21648      if (valid) {
21649         put_ITSTATE(old_itstate);
21650         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21651         IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
21652         storeGuardedLE( address, data, condT );
21653         put_ITSTATE(new_itstate);
21654         DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
21655         goto decode_success;
21656      }
21657   }
21658
21659   /* -------------- (T1) LDRHT reg+#imm8 -------------- */
21660   /* Load Register Halfword Unprivileged:
21661      ldrht Rt, [Rn, #imm8]
21662   */
21663   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
21664       && INSN1(11,8) == BITS4(1,1,1,0)) {
21665      UInt rN    = INSN0(3,0);
21666      Bool valid = True;
21667      if (rN == 15) {
21668         /* In this case our instruction is LDRH (literal), in fact:
21669            LDRH (literal) was realized earlier, so we don't want to
21670            make it twice. */
21671         valid = False;
21672      }
21673      UInt rT    = INSN1(15,12);
21674      UInt imm8  = INSN1(7,0);
21675      if (isBadRegT(rT)) valid = False;
21676      if (valid) {
21677         put_ITSTATE(old_itstate);
21678         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21679         IRTemp newRt = newTemp(Ity_I32);
21680         loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
21681         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21682         put_ITSTATE(new_itstate);
21683         DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
21684         goto decode_success;
21685      }
21686   }
21687
21688   /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
21689   /* Load Register Signed Halfword Unprivileged:
21690      ldrsht Rt, [Rn, #imm8]
21691   */
21692   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
21693       && INSN1(11,8) == BITS4(1,1,1,0)) {
21694      UInt rN    = INSN0(3,0);
21695      Bool valid = True;
21696      if (rN == 15) {
21697         /* In this case our instruction is LDRSH (literal), in fact:
21698            LDRSH (literal) was realized earlier, so we don't want to
21699            make it twice. */
21700         valid = False;
21701      }
21702      UInt rT    = INSN1(15,12);
21703      UInt imm8  = INSN1(7,0);
21704      if (isBadRegT(rT)) valid = False;
21705      if (valid) {
21706         put_ITSTATE(old_itstate);
21707         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21708         IRTemp newRt = newTemp(Ity_I32);
21709         loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
21710         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21711         put_ITSTATE(new_itstate);
21712         DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
21713         goto decode_success;
21714      }
21715   }
21716
21717   /* -------------- (T1) STRHT reg+#imm8 -------------- */
21718   /* Store Register Halfword Unprivileged:
21719      strht Rt, [Rn, #imm8]
21720   */
21721   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
21722       && INSN1(11,8) == BITS4(1,1,1,0)) {
21723      UInt rT    = INSN1(15,12);
21724      UInt rN    = INSN0(3,0);
21725      UInt imm8  = INSN1(7,0);
21726      Bool valid = True;
21727      if (rN == 15 || isBadRegT(rT)) valid = False;
21728      if (valid) {
21729         put_ITSTATE(old_itstate);
21730         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21731         IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
21732         storeGuardedLE( address, data, condT );
21733         put_ITSTATE(new_itstate);
21734         DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
21735         goto decode_success;
21736      }
21737   }
21738
21739   /* -------------- (T1) LDRBT reg+#imm8 -------------- */
21740   /* Load Register Byte Unprivileged:
21741      ldrbt Rt, [Rn, #imm8]
21742   */
21743   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
21744       && INSN1(11,8) == BITS4(1,1,1,0)) {
21745      UInt rN    = INSN0(3,0);
21746      UInt rT    = INSN1(15,12);
21747      UInt imm8  = INSN1(7,0);
21748      Bool valid = True;
21749      if (rN == 15 /* insn is LDRB (literal) */) valid = False;
21750      if (isBadRegT(rT)) valid = False;
21751      if (valid) {
21752         put_ITSTATE(old_itstate);
21753         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21754         IRTemp newRt = newTemp(Ity_I32);
21755         loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
21756         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21757         put_ITSTATE(new_itstate);
21758         DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
21759         goto decode_success;
21760      }
21761   }
21762
21763   /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
21764   /* Load Register Signed Byte Unprivileged:
21765      ldrsbt Rt, [Rn, #imm8]
21766   */
21767   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
21768       && INSN1(11,8) == BITS4(1,1,1,0)) {
21769      UInt rN    = INSN0(3,0);
21770      Bool valid = True;
21771      UInt rT    = INSN1(15,12);
21772      UInt imm8  = INSN1(7,0);
21773      if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
21774      if (isBadRegT(rT)) valid = False;
21775      if (valid) {
21776         put_ITSTATE(old_itstate);
21777         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21778         IRTemp newRt = newTemp(Ity_I32);
21779         loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
21780         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21781         put_ITSTATE(new_itstate);
21782         DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
21783         goto decode_success;
21784      }
21785   }
21786
21787   /* -------------- (T1) PLI reg+#imm12 -------------- */
21788   /* Preload Instruction:
21789      pli [Rn, #imm12]
21790   */
21791   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
21792       && INSN1(15,12) == BITS4(1,1,1,1)) {
21793      UInt rN    = INSN0(3,0);
21794      UInt imm12 = INSN1(11,0);
21795      if (rN != 15) {
21796         DIP("pli [r%u, #%u]\n", rN, imm12);
21797         goto decode_success;
21798      }
21799   }
21800
21801   /* -------------- (T2) PLI reg-#imm8 -------------- */
21802   /* Preload Instruction:
21803      pli [Rn, #-imm8]
21804   */
21805   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
21806       && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
21807      UInt rN   = INSN0(3,0);
21808      UInt imm8 = INSN1(7,0);
21809      if (rN != 15) {
21810         DIP("pli [r%u, #-%u]\n", rN, imm8);
21811         goto decode_success;
21812      }
21813   }
21814
21815   /* -------------- (T3) PLI PC+/-#imm12 -------------- */
21816   /* Preload Instruction:
21817      pli [PC, #+/-imm12]
21818   */
21819   if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
21820       && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
21821       && INSN1(15,12) == BITS4(1,1,1,1)) {
21822      UInt imm12 = INSN1(11,0);
21823      UInt bU    = INSN0(7,7);
21824      DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
21825      goto decode_success;
21826   }
21827
21828   /* ----------------------------------------------------------- */
21829   /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
21830   /* ----------------------------------------------------------- */
21831
21832   if (INSN0(15,12) == BITS4(1,1,1,0)) {
21833      UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
21834      Bool ok_vfp = decode_CP10_CP11_instruction (
21835                       &dres, insn28, condT, ARMCondAL/*bogus*/,
21836                       True/*isT*/
21837                    );
21838      if (ok_vfp)
21839         goto decode_success;
21840   }
21841
21842   /* ----------------------------------------------------------- */
21843   /* -- NEON instructions (in Thumb mode)                     -- */
21844   /* ----------------------------------------------------------- */
21845
21846   if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
21847      UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
21848      Bool ok_neon = decode_NEON_instruction(
21849                        &dres, insn32, condT, True/*isT*/
21850                     );
21851      if (ok_neon)
21852         goto decode_success;
21853   }
21854
21855   /* ----------------------------------------------------------- */
21856   /* -- v6 media instructions (in Thumb mode)                 -- */
21857   /* ----------------------------------------------------------- */
21858
21859   { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
21860     Bool ok_v6m = decode_V6MEDIA_instruction(
21861                      &dres, insn32, condT, ARMCondAL/*bogus*/,
21862                      True/*isT*/
21863                   );
21864     if (ok_v6m)
21865        goto decode_success;
21866   }
21867
21868   /* ----------------------------------------------------------- */
21869   /* -- Undecodable                                           -- */
21870   /* ----------------------------------------------------------- */
21871
21872   goto decode_failure;
21873   /*NOTREACHED*/
21874
21875  decode_failure:
21876   /* All decode failures end up here. */
21877   if (sigill_diag)
21878      vex_printf("disInstr(thumb): unhandled instruction: "
21879                 "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
21880
21881   /* Back up ITSTATE to the initial value for this instruction.
21882      If we don't do that, any subsequent restart of the instruction
21883      will restart with the wrong value. */
21884   if (old_itstate != IRTemp_INVALID)
21885      put_ITSTATE(old_itstate);
21886
21887   /* Tell the dispatcher that this insn cannot be decoded, and so has
21888      not been executed, and (is currently) the next to be executed.
21889      R15 should be up-to-date since it made so at the start of each
21890      insn, but nevertheless be paranoid and update it again right
21891      now. */
21892   vassert(0 == (guest_R15_curr_instr_notENC & 1));
21893   llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
21894   dres.len         = 0;
21895   dres.whatNext    = Dis_StopHere;
21896   dres.jk_StopHere = Ijk_NoDecode;
21897   dres.continueAt  = 0;
21898   return dres;
21899
21900  decode_success:
21901   /* All decode successes end up here. */
21902   vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
21903   switch (dres.whatNext) {
21904      case Dis_Continue:
21905         llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
21906         break;
21907      case Dis_ResteerU:
21908      case Dis_ResteerC:
21909         llPutIReg(15, mkU32(dres.continueAt));
21910         break;
21911      case Dis_StopHere:
21912         break;
21913      default:
21914         vassert(0);
21915   }
21916
21917   DIP("\n");
21918
21919   return dres;
21920
21921#  undef INSN0
21922#  undef INSN1
21923}
21924
21925#undef DIP
21926#undef DIS
21927
21928
21929/* Helper table for figuring out how many insns an IT insn
21930   conditionalises.
21931
21932   An ITxyz instruction of the format "1011 1111 firstcond mask"
21933   conditionalises some number of instructions, as indicated by the
21934   following table.  A value of zero indicates the instruction is
21935   invalid in some way.
21936
21937   mask = 0 means this isn't an IT instruction
21938   fc = 15 (NV) means unpredictable
21939
21940   The line fc = 14 (AL) is different from the others; there are
21941   additional constraints in this case.
21942
21943          mask(0 ..                   15)
21944        +--------------------------------
21945   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21946   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21947        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21948        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21949        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21950        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21951        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21952        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21953        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21954        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21955        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21956        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21957        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21958        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21959        | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
21960   15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
21961
21962   To be conservative with the analysis, let's rule out the mask = 0
21963   case, since that isn't an IT insn at all.  But for all the other
21964   cases where the table contains zero, that means unpredictable, so
21965   let's say 4 to be conservative.  Hence we have a safe value for any
21966   IT (mask,fc) pair that the CPU would actually identify as an IT
21967   instruction.  The final table is
21968
21969          mask(0 ..                   15)
21970        +--------------------------------
21971   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21972   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21973        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21974        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21975        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21976        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21977        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21978        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21979        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21980        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21981        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21982        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21983        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21984        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21985        | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
21986   15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
21987*/
21988static const UChar it_length_table[256]
21989   = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21990       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21991       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21992       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21993       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21994       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21995       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21996       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21997       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21998       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21999       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
22000       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
22001       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
22002       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
22003       0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
22004       0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
22005     };
22006
22007
22008/*------------------------------------------------------------*/
22009/*--- Top-level fn                                         ---*/
22010/*------------------------------------------------------------*/
22011
22012/* Disassemble a single instruction into IR.  The instruction
22013   is located in host memory at &guest_code[delta]. */
22014
22015DisResult disInstr_ARM ( IRSB*        irsb_IN,
22016                         Bool         (*resteerOkFn) ( void*, Addr ),
22017                         Bool         resteerCisOk,
22018                         void*        callback_opaque,
22019                         const UChar* guest_code_IN,
22020                         Long         delta_ENCODED,
22021                         Addr         guest_IP_ENCODED,
22022                         VexArch      guest_arch,
22023                         const VexArchInfo* archinfo,
22024                         const VexAbiInfo*  abiinfo,
22025                         VexEndness   host_endness_IN,
22026                         Bool         sigill_diag_IN )
22027{
22028   DisResult dres;
22029   Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
22030
22031   /* Set globals (see top of this file) */
22032   vassert(guest_arch == VexArchARM);
22033
22034   irsb            = irsb_IN;
22035   host_endness    = host_endness_IN;
22036   __curr_is_Thumb = isThumb;
22037
22038   if (isThumb) {
22039      guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
22040   } else {
22041      guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
22042   }
22043
22044   if (isThumb) {
22045      dres = disInstr_THUMB_WRK ( resteerOkFn,
22046                                  resteerCisOk, callback_opaque,
22047                                  &guest_code_IN[delta_ENCODED - 1],
22048                                  archinfo, abiinfo, sigill_diag_IN );
22049   } else {
22050      dres = disInstr_ARM_WRK ( resteerOkFn,
22051                                resteerCisOk, callback_opaque,
22052                                &guest_code_IN[delta_ENCODED],
22053                                archinfo, abiinfo, sigill_diag_IN );
22054   }
22055
22056   return dres;
22057}
22058
22059/* Test program for the conversion of IRCmpF64Result values to VFP
22060   nzcv values.  See handling of FCMPD et al above. */
22061/*
22062UInt foo ( UInt x )
22063{
22064   UInt ix    = ((x >> 5) & 3) | (x & 1);
22065   UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
22066   UInt termR = (ix & (ix >> 1) & 1);
22067   return termL  -  termR;
22068}
22069
22070void try ( char* s, UInt ir, UInt req )
22071{
22072   UInt act = foo(ir);
22073   printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
22074          s, ir, (req >> 3) & 1, (req >> 2) & 1,
22075                 (req >> 1) & 1, (req >> 0) & 1,
22076                 (act >> 3) & 1, (act >> 2) & 1,
22077                 (act >> 1) & 1, (act >> 0) & 1, act);
22078
22079}
22080
22081int main ( void )
22082{
22083   printf("\n");
22084   try("UN", 0x45, 0b0011);
22085   try("LT", 0x01, 0b1000);
22086   try("GT", 0x00, 0b0010);
22087   try("EQ", 0x40, 0b0110);
22088   printf("\n");
22089   return 0;
22090}
22091*/
22092
22093/* Spare code for doing reference implementations of various 64-bit
22094   SIMD interleaves/deinterleaves/concatenation ops. */
22095/*
22096// Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
22097// the top halves guaranteed to be zero.
22098static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
22099                           IRTemp* out0, IRTemp v64 )
22100{
22101  if (out3) *out3 = newTemp(Ity_I32);
22102  if (out2) *out2 = newTemp(Ity_I32);
22103  if (out1) *out1 = newTemp(Ity_I32);
22104  if (out0) *out0 = newTemp(Ity_I32);
22105  IRTemp hi32 = newTemp(Ity_I32);
22106  IRTemp lo32 = newTemp(Ity_I32);
22107  assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
22108  assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
22109  if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
22110  if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
22111  if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
22112  if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
22113}
22114
22115// Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
22116// IRTemp.
22117static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
22118{
22119  IRTemp hi32 = newTemp(Ity_I32);
22120  IRTemp lo32 = newTemp(Ity_I32);
22121  assign(hi32,
22122         binop(Iop_Or32,
22123               binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
22124               binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
22125  assign(lo32,
22126         binop(Iop_Or32,
22127               binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
22128               binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
22129  IRTemp res = newTemp(Ity_I64);
22130  assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
22131  return res;
22132}
22133
22134static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
22135{
22136  // returns a1 b1 a0 b0
22137  IRTemp a1, a0, b1, b0;
22138  break64to16s(NULL, NULL, &a1, &a0, a3210);
22139  break64to16s(NULL, NULL, &b1, &b0, b3210);
22140  return mkexpr(mk64from16s(a1, b1, a0, b0));
22141}
22142
22143static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
22144{
22145  // returns a3 b3 a2 b2
22146  IRTemp a3, a2, b3, b2;
22147  break64to16s(&a3, &a2, NULL, NULL, a3210);
22148  break64to16s(&b3, &b2, NULL, NULL, b3210);
22149  return mkexpr(mk64from16s(a3, b3, a2, b2));
22150}
22151
22152static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22153{
22154  // returns a2 a0 b2 b0
22155  IRTemp a2, a0, b2, b0;
22156  break64to16s(NULL, &a2, NULL, &a0, a3210);
22157  break64to16s(NULL, &b2, NULL, &b0, b3210);
22158  return mkexpr(mk64from16s(a2, a0, b2, b0));
22159}
22160
22161static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22162{
22163  // returns a3 a1 b3 b1
22164  IRTemp a3, a1, b3, b1;
22165  break64to16s(&a3, NULL, &a1, NULL, a3210);
22166  break64to16s(&b3, NULL, &b1, NULL, b3210);
22167  return mkexpr(mk64from16s(a3, a1, b3, b1));
22168}
22169
22170static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22171{
22172  // returns a3 b3 a1 b1
22173  IRTemp a3, b3, a1, b1;
22174  break64to16s(&a3, NULL, &a1, NULL, a3210);
22175  break64to16s(&b3, NULL, &b1, NULL, b3210);
22176  return mkexpr(mk64from16s(a3, b3, a1, b1));
22177}
22178
22179static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22180{
22181  // returns a2 b2 a0 b0
22182  IRTemp a2, b2, a0, b0;
22183  break64to16s(NULL, &a2, NULL, &a0, a3210);
22184  break64to16s(NULL, &b2, NULL, &b0, b3210);
22185  return mkexpr(mk64from16s(a2, b2, a0, b0));
22186}
22187
22188static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
22189                          IRTemp* out4, IRTemp* out3, IRTemp* out2,
22190                          IRTemp* out1,IRTemp* out0, IRTemp v64 )
22191{
22192  if (out7) *out7 = newTemp(Ity_I32);
22193  if (out6) *out6 = newTemp(Ity_I32);
22194  if (out5) *out5 = newTemp(Ity_I32);
22195  if (out4) *out4 = newTemp(Ity_I32);
22196  if (out3) *out3 = newTemp(Ity_I32);
22197  if (out2) *out2 = newTemp(Ity_I32);
22198  if (out1) *out1 = newTemp(Ity_I32);
22199  if (out0) *out0 = newTemp(Ity_I32);
22200  IRTemp hi32 = newTemp(Ity_I32);
22201  IRTemp lo32 = newTemp(Ity_I32);
22202  assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
22203  assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
22204  if (out7)
22205    assign(*out7, binop(Iop_And32,
22206                        binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
22207                        mkU32(0xFF)));
22208  if (out6)
22209    assign(*out6, binop(Iop_And32,
22210                        binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
22211                        mkU32(0xFF)));
22212  if (out5)
22213    assign(*out5, binop(Iop_And32,
22214                        binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
22215                        mkU32(0xFF)));
22216  if (out4)
22217    assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
22218  if (out3)
22219    assign(*out3, binop(Iop_And32,
22220                        binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
22221                        mkU32(0xFF)));
22222  if (out2)
22223    assign(*out2, binop(Iop_And32,
22224                        binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
22225                        mkU32(0xFF)));
22226  if (out1)
22227    assign(*out1, binop(Iop_And32,
22228                        binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
22229                        mkU32(0xFF)));
22230  if (out0)
22231    assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
22232}
22233
22234static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
22235                           IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
22236{
22237  IRTemp hi32 = newTemp(Ity_I32);
22238  IRTemp lo32 = newTemp(Ity_I32);
22239  assign(hi32,
22240         binop(Iop_Or32,
22241               binop(Iop_Or32,
22242                     binop(Iop_Shl32,
22243                           binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
22244                           mkU8(24)),
22245                     binop(Iop_Shl32,
22246                           binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
22247                           mkU8(16))),
22248               binop(Iop_Or32,
22249                     binop(Iop_Shl32,
22250                           binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
22251                     binop(Iop_And32,
22252                           mkexpr(in4), mkU32(0xFF)))));
22253  assign(lo32,
22254         binop(Iop_Or32,
22255               binop(Iop_Or32,
22256                     binop(Iop_Shl32,
22257                           binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
22258                           mkU8(24)),
22259                     binop(Iop_Shl32,
22260                           binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
22261                           mkU8(16))),
22262               binop(Iop_Or32,
22263                     binop(Iop_Shl32,
22264                           binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
22265                     binop(Iop_And32,
22266                           mkexpr(in0), mkU32(0xFF)))));
22267  IRTemp res = newTemp(Ity_I64);
22268  assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
22269  return res;
22270}
22271
22272static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
22273{
22274  // returns a3 b3 a2 b2 a1 b1 a0 b0
22275  IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
22276  break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
22277  break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
22278  return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
22279}
22280
22281static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
22282{
22283  // returns a7 b7 a6 b6 a5 b5 a4 b4
22284  IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
22285  break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
22286  break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
22287  return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
22288}
22289
22290static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22291{
22292  // returns a6 a4 a2 a0 b6 b4 b2 b0
22293  IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
22294  break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
22295  break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
22296  return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
22297}
22298
22299static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22300{
22301  // returns a7 a5 a3 a1 b7 b5 b3 b1
22302  IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
22303  break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
22304  break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
22305  return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
22306}
22307
22308static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22309{
22310  // returns a6 b6 a4 b4 a2 b2 a0 b0
22311  IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
22312  break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
22313  break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
22314  return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
22315}
22316
22317static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22318{
22319  // returns a7 b7 a5 b5 a3 b3 a1 b1
22320  IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
22321  break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
22322  break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
22323  return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
22324}
22325
22326static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
22327{
22328  // returns a0 b0
22329  return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
22330                             unop(Iop_64to32, mkexpr(b10)));
22331}
22332
22333static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
22334{
22335  // returns a1 b1
22336  return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
22337                             unop(Iop_64HIto32, mkexpr(b10)));
22338}
22339*/
22340
22341/*--------------------------------------------------------------------*/
22342/*--- end                                         guest_arm_toIR.c ---*/
22343/*--------------------------------------------------------------------*/
22344