1
2/*--------------------------------------------------------------------*/
3/*--- begin                                       guest_arm_toIR.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2011 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2011 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36/* XXXX thumb to check:
37   that all cases where putIRegT writes r15, we generate a jump.
38
39   All uses of newTemp assign to an IRTemp and not a UInt
40
41   For all thumb loads and stores, including VFP ones, new-ITSTATE is
42   backed out before the memory op, and restored afterwards.  This
43   needs to happen even after we go uncond.  (and for sure it doesn't
44   happen for VFP loads/stores right now).
45
46   VFP on thumb: check that we exclude all r13/r15 cases that we
47   should.
48
49   XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
50   taking into account the number of insns guarded by an IT.
51
52   remove the nasty hack, in the spechelper, of looking for Or32(...,
53   0xE0) in as the first arg to armg_calculate_condition, and instead
54   use Slice44 as specified in comments in the spechelper.
55
56   add specialisations for armg_calculate_flag_c and _v, as they
57   are moderately often needed in Thumb code.
58
59   Correctness: ITSTATE handling in Thumb SVCs is wrong.
60
61   Correctness (obscure): in m_transtab, when invalidating code
62   address ranges, invalidate up to 18 bytes after the end of the
63   range.  This is because the ITSTATE optimisation at the top of
64   _THUMB_WRK below analyses up to 18 bytes before the start of any
65   given instruction, and so might depend on the invalidated area.
66*/
67
68/* Limitations, etc
69
70   - pretty dodgy exception semantics for {LD,ST}Mxx, no doubt
71
72   - SWP: the restart jump back is Ijk_Boring; it should be
73     Ijk_NoRedir but that's expensive.  See comments on casLE() in
74     guest_x86_toIR.c.
75*/
76
77/* "Special" instructions.
78
79   This instruction decoder can decode four special instructions
80   which mean nothing natively (are no-ops as far as regs/mem are
81   concerned) but have meaning for supporting Valgrind.  A special
82   instruction is flagged by a 16-byte preamble:
83
84      E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
85      (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
86       mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
87
88   Following that, one of the following 3 are allowed
89   (standard interpretation in parentheses):
90
91      E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
92      E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
93      E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
94
95   Any other bytes following the 16-byte preamble are illegal and
96   constitute a failure in instruction decoding.  This all assumes
97   that the preamble will never occur except in specific code
98   fragments designed for Valgrind to catch.
99*/
100
101/* Translates ARM(v5) code to IR. */
102
103#include "libvex_basictypes.h"
104#include "libvex_ir.h"
105#include "libvex.h"
106#include "libvex_guest_arm.h"
107
108#include "main_util.h"
109#include "main_globals.h"
110#include "guest_generic_bb_to_IR.h"
111#include "guest_arm_defs.h"
112
113
114/*------------------------------------------------------------*/
115/*--- Globals                                              ---*/
116/*------------------------------------------------------------*/
117
118/* These are set at the start of the translation of a instruction, so
119   that we don't have to pass them around endlessly.  CONST means does
120   not change during translation of the instruction.
121*/
122
123/* CONST: is the host bigendian?  This has to do with float vs double
124   register accesses on VFP, but it's complex and not properly thought
125   out. */
126static Bool host_is_bigendian;
127
128/* CONST: The guest address for the instruction currently being
129   translated.  This is the real, "decoded" address (not subject
130   to the CPSR.T kludge). */
131static Addr32 guest_R15_curr_instr_notENC;
132
133/* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
134   insn is Thumb (True) or ARM (False). */
135static Bool __curr_is_Thumb;
136
137/* MOD: The IRSB* into which we're generating code. */
138static IRSB* irsb;
139
140/* These are to do with handling writes to r15.  They are initially
141   set at the start of disInstr_ARM_WRK to indicate no update,
142   possibly updated during the routine, and examined again at the end.
143   If they have been set to indicate a r15 update then a jump is
144   generated.  Note, "explicit" jumps (b, bx, etc) are generated
145   directly, not using this mechanism -- this is intended to handle
146   the implicit-style jumps resulting from (eg) assigning to r15 as
147   the result of insns we wouldn't normally consider branchy. */
148
149/* MOD.  Initially False; set to True iff abovementioned handling is
150   required. */
151static Bool r15written;
152
153/* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
154   is conditional, this holds the gating IRTemp :: Ity_I32.  If the
155   branch to be generated is unconditional, this remains
156   IRTemp_INVALID. */
157static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
158
159/* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
160   this holds the jump kind. */
161static IRTemp r15kind;
162
163
164/*------------------------------------------------------------*/
165/*--- Debugging output                                     ---*/
166/*------------------------------------------------------------*/
167
168#define DIP(format, args...)           \
169   if (vex_traceflags & VEX_TRACE_FE)  \
170      vex_printf(format, ## args)
171
172#define DIS(buf, format, args...)      \
173   if (vex_traceflags & VEX_TRACE_FE)  \
174      vex_sprintf(buf, format, ## args)
175
176#define ASSERT_IS_THUMB \
177   do { vassert(__curr_is_Thumb); } while (0)
178
179#define ASSERT_IS_ARM \
180   do { vassert(! __curr_is_Thumb); } while (0)
181
182
183/*------------------------------------------------------------*/
184/*--- Helper bits and pieces for deconstructing the        ---*/
185/*--- arm insn stream.                                     ---*/
186/*------------------------------------------------------------*/
187
188/* Do a little-endian load of a 32-bit word, regardless of the
189   endianness of the underlying host. */
190static inline UInt getUIntLittleEndianly ( UChar* p )
191{
192   UInt w = 0;
193   w = (w << 8) | p[3];
194   w = (w << 8) | p[2];
195   w = (w << 8) | p[1];
196   w = (w << 8) | p[0];
197   return w;
198}
199
200/* Do a little-endian load of a 16-bit word, regardless of the
201   endianness of the underlying host. */
202static inline UShort getUShortLittleEndianly ( UChar* p )
203{
204   UShort w = 0;
205   w = (w << 8) | p[1];
206   w = (w << 8) | p[0];
207   return w;
208}
209
210static UInt ROR32 ( UInt x, UInt sh ) {
211   vassert(sh >= 0 && sh < 32);
212   if (sh == 0)
213      return x;
214   else
215      return (x << (32-sh)) | (x >> sh);
216}
217
218static Int popcount32 ( UInt x )
219{
220   Int res = 0, i;
221   for (i = 0; i < 32; i++) {
222      res += (x & 1);
223      x >>= 1;
224   }
225   return res;
226}
227
228static UInt setbit32 ( UInt x, Int ix, UInt b )
229{
230   UInt mask = 1 << ix;
231   x &= ~mask;
232   x |= ((b << ix) & mask);
233   return x;
234}
235
236#define BITS2(_b1,_b0) \
237   (((_b1) << 1) | (_b0))
238
239#define BITS3(_b2,_b1,_b0)                      \
240  (((_b2) << 2) | ((_b1) << 1) | (_b0))
241
242#define BITS4(_b3,_b2,_b1,_b0) \
243   (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
244
245#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
246   ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
247    | BITS4((_b3),(_b2),(_b1),(_b0)))
248
249#define BITS5(_b4,_b3,_b2,_b1,_b0)  \
250   (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
251#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
252   (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
253#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
254   (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
255
256#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
257   (((_b8) << 8) \
258    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
259
260#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
261   (((_b9) << 9) | ((_b8) << 8)                                \
262    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
263
264/* produces _uint[_bMax:_bMin] */
265#define SLICE_UInt(_uint,_bMax,_bMin) \
266   (( ((UInt)(_uint)) >> (_bMin)) \
267    & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
268
269
270/*------------------------------------------------------------*/
271/*--- Helper bits and pieces for creating IR fragments.    ---*/
272/*------------------------------------------------------------*/
273
274static IRExpr* mkU64 ( ULong i )
275{
276   return IRExpr_Const(IRConst_U64(i));
277}
278
279static IRExpr* mkU32 ( UInt i )
280{
281   return IRExpr_Const(IRConst_U32(i));
282}
283
284static IRExpr* mkU8 ( UInt i )
285{
286   vassert(i < 256);
287   return IRExpr_Const(IRConst_U8( (UChar)i ));
288}
289
290static IRExpr* mkexpr ( IRTemp tmp )
291{
292   return IRExpr_RdTmp(tmp);
293}
294
295static IRExpr* unop ( IROp op, IRExpr* a )
296{
297   return IRExpr_Unop(op, a);
298}
299
300static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
301{
302   return IRExpr_Binop(op, a1, a2);
303}
304
305static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
306{
307   return IRExpr_Triop(op, a1, a2, a3);
308}
309
310static IRExpr* loadLE ( IRType ty, IRExpr* addr )
311{
312   return IRExpr_Load(Iend_LE, ty, addr);
313}
314
315/* Add a statement to the list held by "irbb". */
316static void stmt ( IRStmt* st )
317{
318   addStmtToIRSB( irsb, st );
319}
320
321static void assign ( IRTemp dst, IRExpr* e )
322{
323   stmt( IRStmt_WrTmp(dst, e) );
324}
325
326static void storeLE ( IRExpr* addr, IRExpr* data )
327{
328   stmt( IRStmt_Store(Iend_LE, addr, data) );
329}
330
331/* Generate a new temporary of the given type. */
332static IRTemp newTemp ( IRType ty )
333{
334   vassert(isPlausibleIRType(ty));
335   return newIRTemp( irsb->tyenv, ty );
336}
337
338/* Produces a value in 0 .. 3, which is encoded as per the type
339   IRRoundingMode. */
340static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
341{
342   return mkU32(Irrm_NEAREST);
343}
344
345/* Generate an expression for SRC rotated right by ROT. */
346static IRExpr* genROR32( IRTemp src, Int rot )
347{
348   vassert(rot >= 0 && rot < 32);
349   if (rot == 0)
350      return mkexpr(src);
351   return
352      binop(Iop_Or32,
353            binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
354            binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
355}
356
357static IRExpr* mkU128 ( ULong i )
358{
359   return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
360}
361
362/* Generate a 4-aligned version of the given expression if
363   the given condition is true.  Else return it unchanged. */
364static IRExpr* align4if ( IRExpr* e, Bool b )
365{
366   if (b)
367      return binop(Iop_And32, e, mkU32(~3));
368   else
369      return e;
370}
371
372
373/*------------------------------------------------------------*/
374/*--- Helpers for accessing guest registers.               ---*/
375/*------------------------------------------------------------*/
376
377#define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
378#define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
379#define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
380#define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
381#define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
382#define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
383#define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
384#define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
385#define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
386#define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
387#define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
388#define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
389#define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
390#define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
391#define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
392#define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
393
394#define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
395#define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
396#define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
397#define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
398#define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
399
400#define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
401#define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
402#define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
403#define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
404#define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
405#define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
406#define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
407#define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
408#define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
409#define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
410#define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
411#define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
412#define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
413#define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
414#define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
415#define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
416#define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
417#define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
418#define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
419#define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
420#define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
421#define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
422#define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
423#define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
424#define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
425#define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
426#define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
427#define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
428#define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
429#define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
430#define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
431#define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
432
433#define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
434#define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
435#define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
436#define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
437#define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
438#define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
439#define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
440#define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
441
442
443/* ---------------- Integer registers ---------------- */
444
445static Int integerGuestRegOffset ( UInt iregNo )
446{
447   /* Do we care about endianness here?  We do if sub-parts of integer
448      registers are accessed, but I don't think that ever happens on
449      ARM. */
450   switch (iregNo) {
451      case 0:  return OFFB_R0;
452      case 1:  return OFFB_R1;
453      case 2:  return OFFB_R2;
454      case 3:  return OFFB_R3;
455      case 4:  return OFFB_R4;
456      case 5:  return OFFB_R5;
457      case 6:  return OFFB_R6;
458      case 7:  return OFFB_R7;
459      case 8:  return OFFB_R8;
460      case 9:  return OFFB_R9;
461      case 10: return OFFB_R10;
462      case 11: return OFFB_R11;
463      case 12: return OFFB_R12;
464      case 13: return OFFB_R13;
465      case 14: return OFFB_R14;
466      case 15: return OFFB_R15T;
467      default: vassert(0);
468   }
469}
470
471/* Plain ("low level") read from a reg; no +8 offset magic for r15. */
472static IRExpr* llGetIReg ( UInt iregNo )
473{
474   vassert(iregNo < 16);
475   return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
476}
477
478/* Architected read from a reg in ARM mode.  This automagically adds 8
479   to all reads of r15. */
480static IRExpr* getIRegA ( UInt iregNo )
481{
482   IRExpr* e;
483   ASSERT_IS_ARM;
484   vassert(iregNo < 16);
485   if (iregNo == 15) {
486      /* If asked for r15, don't read the guest state value, as that
487         may not be up to date in the case where loop unrolling has
488         happened, because the first insn's write to the block is
489         omitted; hence in the 2nd and subsequent unrollings we don't
490         have a correct value in guest r15.  Instead produce the
491         constant that we know would be produced at this point. */
492      vassert(0 == (guest_R15_curr_instr_notENC & 3));
493      e = mkU32(guest_R15_curr_instr_notENC + 8);
494   } else {
495      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
496   }
497   return e;
498}
499
500/* Architected read from a reg in Thumb mode.  This automagically adds
501   4 to all reads of r15. */
502static IRExpr* getIRegT ( UInt iregNo )
503{
504   IRExpr* e;
505   ASSERT_IS_THUMB;
506   vassert(iregNo < 16);
507   if (iregNo == 15) {
508      /* Ditto comment in getIReg. */
509      vassert(0 == (guest_R15_curr_instr_notENC & 1));
510      e = mkU32(guest_R15_curr_instr_notENC + 4);
511   } else {
512      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
513   }
514   return e;
515}
516
517/* Plain ("low level") write to a reg; no jump or alignment magic for
518   r15. */
519static void llPutIReg ( UInt iregNo, IRExpr* e )
520{
521   vassert(iregNo < 16);
522   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
523   stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
524}
525
526/* Architected write to an integer register in ARM mode.  If it is to
527   r15, record info so at the end of this insn's translation, a branch
528   to it can be made.  Also handles conditional writes to the
529   register: if guardT == IRTemp_INVALID then the write is
530   unconditional.  If writing r15, also 4-align it. */
531static void putIRegA ( UInt       iregNo,
532                       IRExpr*    e,
533                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
534                       IRJumpKind jk /* if a jump is generated */ )
535{
536   /* if writing r15, force e to be 4-aligned. */
537   // INTERWORKING FIXME.  this needs to be relaxed so that
538   // puts caused by LDMxx which load r15 interwork right.
539   // but is no aligned too relaxed?
540   //if (iregNo == 15)
541   //   e = binop(Iop_And32, e, mkU32(~3));
542   ASSERT_IS_ARM;
543   /* So, generate either an unconditional or a conditional write to
544      the reg. */
545   if (guardT == IRTemp_INVALID) {
546      /* unconditional write */
547      llPutIReg( iregNo, e );
548   } else {
549      llPutIReg( iregNo,
550                 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
551                               llGetIReg(iregNo),
552                               e ));
553   }
554   if (iregNo == 15) {
555      // assert against competing r15 updates.  Shouldn't
556      // happen; should be ruled out by the instr matching
557      // logic.
558      vassert(r15written == False);
559      vassert(r15guard   == IRTemp_INVALID);
560      vassert(r15kind    == Ijk_Boring);
561      r15written = True;
562      r15guard   = guardT;
563      r15kind    = jk;
564   }
565}
566
567
568/* Architected write to an integer register in Thumb mode.  Writes to
569   r15 are not allowed.  Handles conditional writes to the register:
570   if guardT == IRTemp_INVALID then the write is unconditional. */
571static void putIRegT ( UInt       iregNo,
572                       IRExpr*    e,
573                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
574{
575   /* So, generate either an unconditional or a conditional write to
576      the reg. */
577   ASSERT_IS_THUMB;
578   vassert(iregNo >= 0 && iregNo <= 14);
579   if (guardT == IRTemp_INVALID) {
580      /* unconditional write */
581      llPutIReg( iregNo, e );
582   } else {
583      llPutIReg( iregNo,
584                 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
585                               llGetIReg(iregNo),
586                               e ));
587   }
588}
589
590
591/* Thumb16 and Thumb32 only.
592   Returns true if reg is 13 or 15.  Implements the BadReg
593   predicate in the ARM ARM. */
594static Bool isBadRegT ( UInt r )
595{
596   vassert(r <= 15);
597   ASSERT_IS_THUMB;
598   return r == 13 || r == 15;
599}
600
601
602/* ---------------- Double registers ---------------- */
603
604static Int doubleGuestRegOffset ( UInt dregNo )
605{
606   /* Do we care about endianness here?  Probably do if we ever get
607      into the situation of dealing with the single-precision VFP
608      registers. */
609   switch (dregNo) {
610      case 0:  return OFFB_D0;
611      case 1:  return OFFB_D1;
612      case 2:  return OFFB_D2;
613      case 3:  return OFFB_D3;
614      case 4:  return OFFB_D4;
615      case 5:  return OFFB_D5;
616      case 6:  return OFFB_D6;
617      case 7:  return OFFB_D7;
618      case 8:  return OFFB_D8;
619      case 9:  return OFFB_D9;
620      case 10: return OFFB_D10;
621      case 11: return OFFB_D11;
622      case 12: return OFFB_D12;
623      case 13: return OFFB_D13;
624      case 14: return OFFB_D14;
625      case 15: return OFFB_D15;
626      case 16: return OFFB_D16;
627      case 17: return OFFB_D17;
628      case 18: return OFFB_D18;
629      case 19: return OFFB_D19;
630      case 20: return OFFB_D20;
631      case 21: return OFFB_D21;
632      case 22: return OFFB_D22;
633      case 23: return OFFB_D23;
634      case 24: return OFFB_D24;
635      case 25: return OFFB_D25;
636      case 26: return OFFB_D26;
637      case 27: return OFFB_D27;
638      case 28: return OFFB_D28;
639      case 29: return OFFB_D29;
640      case 30: return OFFB_D30;
641      case 31: return OFFB_D31;
642      default: vassert(0);
643   }
644}
645
646/* Plain ("low level") read from a VFP Dreg. */
647static IRExpr* llGetDReg ( UInt dregNo )
648{
649   vassert(dregNo < 32);
650   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
651}
652
653/* Architected read from a VFP Dreg. */
654static IRExpr* getDReg ( UInt dregNo ) {
655   return llGetDReg( dregNo );
656}
657
658/* Plain ("low level") write to a VFP Dreg. */
659static void llPutDReg ( UInt dregNo, IRExpr* e )
660{
661   vassert(dregNo < 32);
662   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
663   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
664}
665
666/* Architected write to a VFP Dreg.  Handles conditional writes to the
667   register: if guardT == IRTemp_INVALID then the write is
668   unconditional. */
669static void putDReg ( UInt    dregNo,
670                      IRExpr* e,
671                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
672{
673   /* So, generate either an unconditional or a conditional write to
674      the reg. */
675   if (guardT == IRTemp_INVALID) {
676      /* unconditional write */
677      llPutDReg( dregNo, e );
678   } else {
679      llPutDReg( dregNo,
680                 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
681                               llGetDReg(dregNo),
682                               e ));
683   }
684}
685
686/* And now exactly the same stuff all over again, but this time
687   taking/returning I64 rather than F64, to support 64-bit Neon
688   ops. */
689
690/* Plain ("low level") read from a Neon Integer Dreg. */
691static IRExpr* llGetDRegI64 ( UInt dregNo )
692{
693   vassert(dregNo < 32);
694   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
695}
696
697/* Architected read from a Neon Integer Dreg. */
698static IRExpr* getDRegI64 ( UInt dregNo ) {
699   return llGetDRegI64( dregNo );
700}
701
702/* Plain ("low level") write to a Neon Integer Dreg. */
703static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
704{
705   vassert(dregNo < 32);
706   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
707   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
708}
709
710/* Architected write to a Neon Integer Dreg.  Handles conditional
711   writes to the register: if guardT == IRTemp_INVALID then the write
712   is unconditional. */
713static void putDRegI64 ( UInt    dregNo,
714                         IRExpr* e,
715                         IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
716{
717   /* So, generate either an unconditional or a conditional write to
718      the reg. */
719   if (guardT == IRTemp_INVALID) {
720      /* unconditional write */
721      llPutDRegI64( dregNo, e );
722   } else {
723      llPutDRegI64( dregNo,
724                    IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
725                                  llGetDRegI64(dregNo),
726                                  e ));
727   }
728}
729
730/* ---------------- Quad registers ---------------- */
731
732static Int quadGuestRegOffset ( UInt qregNo )
733{
734   /* Do we care about endianness here?  Probably do if we ever get
735      into the situation of dealing with the 64 bit Neon registers. */
736   switch (qregNo) {
737      case 0:  return OFFB_D0;
738      case 1:  return OFFB_D2;
739      case 2:  return OFFB_D4;
740      case 3:  return OFFB_D6;
741      case 4:  return OFFB_D8;
742      case 5:  return OFFB_D10;
743      case 6:  return OFFB_D12;
744      case 7:  return OFFB_D14;
745      case 8:  return OFFB_D16;
746      case 9:  return OFFB_D18;
747      case 10: return OFFB_D20;
748      case 11: return OFFB_D22;
749      case 12: return OFFB_D24;
750      case 13: return OFFB_D26;
751      case 14: return OFFB_D28;
752      case 15: return OFFB_D30;
753      default: vassert(0);
754   }
755}
756
757/* Plain ("low level") read from a Neon Qreg. */
758static IRExpr* llGetQReg ( UInt qregNo )
759{
760   vassert(qregNo < 16);
761   return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
762}
763
764/* Architected read from a Neon Qreg. */
765static IRExpr* getQReg ( UInt qregNo ) {
766   return llGetQReg( qregNo );
767}
768
769/* Plain ("low level") write to a Neon Qreg. */
770static void llPutQReg ( UInt qregNo, IRExpr* e )
771{
772   vassert(qregNo < 16);
773   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
774   stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
775}
776
777/* Architected write to a Neon Qreg.  Handles conditional writes to the
778   register: if guardT == IRTemp_INVALID then the write is
779   unconditional. */
780static void putQReg ( UInt    qregNo,
781                      IRExpr* e,
782                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
783{
784   /* So, generate either an unconditional or a conditional write to
785      the reg. */
786   if (guardT == IRTemp_INVALID) {
787      /* unconditional write */
788      llPutQReg( qregNo, e );
789   } else {
790      llPutQReg( qregNo,
791                 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
792                               llGetQReg(qregNo),
793                               e ));
794   }
795}
796
797
798/* ---------------- Float registers ---------------- */
799
800static Int floatGuestRegOffset ( UInt fregNo )
801{
802   /* Start with the offset of the containing double, and then correct
803      for endianness.  Actually this is completely bogus and needs
804      careful thought. */
805   Int off;
806   vassert(fregNo < 32);
807   off = doubleGuestRegOffset(fregNo >> 1);
808   if (host_is_bigendian) {
809      vassert(0);
810   } else {
811      if (fregNo & 1)
812         off += 4;
813   }
814   return off;
815}
816
817/* Plain ("low level") read from a VFP Freg. */
818static IRExpr* llGetFReg ( UInt fregNo )
819{
820   vassert(fregNo < 32);
821   return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
822}
823
824/* Architected read from a VFP Freg. */
825static IRExpr* getFReg ( UInt fregNo ) {
826   return llGetFReg( fregNo );
827}
828
829/* Plain ("low level") write to a VFP Freg. */
830static void llPutFReg ( UInt fregNo, IRExpr* e )
831{
832   vassert(fregNo < 32);
833   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
834   stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
835}
836
837/* Architected write to a VFP Freg.  Handles conditional writes to the
838   register: if guardT == IRTemp_INVALID then the write is
839   unconditional. */
840static void putFReg ( UInt    fregNo,
841                      IRExpr* e,
842                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
843{
844   /* So, generate either an unconditional or a conditional write to
845      the reg. */
846   if (guardT == IRTemp_INVALID) {
847      /* unconditional write */
848      llPutFReg( fregNo, e );
849   } else {
850      llPutFReg( fregNo,
851                 IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
852                               llGetFReg(fregNo),
853                               e ));
854   }
855}
856
857
858/* ---------------- Misc registers ---------------- */
859
860static void putMiscReg32 ( UInt    gsoffset,
861                           IRExpr* e, /* :: Ity_I32 */
862                           IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
863{
864   switch (gsoffset) {
865      case OFFB_FPSCR:   break;
866      case OFFB_QFLAG32: break;
867      case OFFB_GEFLAG0: break;
868      case OFFB_GEFLAG1: break;
869      case OFFB_GEFLAG2: break;
870      case OFFB_GEFLAG3: break;
871      default: vassert(0); /* awaiting more cases */
872   }
873   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
874
875   if (guardT == IRTemp_INVALID) {
876      /* unconditional write */
877      stmt(IRStmt_Put(gsoffset, e));
878   } else {
879      stmt(IRStmt_Put(
880         gsoffset,
881         IRExpr_Mux0X( unop(Iop_32to8, mkexpr(guardT)),
882                       IRExpr_Get(gsoffset, Ity_I32),
883                       e
884         )
885      ));
886   }
887}
888
889static IRTemp get_ITSTATE ( void )
890{
891   ASSERT_IS_THUMB;
892   IRTemp t = newTemp(Ity_I32);
893   assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
894   return t;
895}
896
897static void put_ITSTATE ( IRTemp t )
898{
899   ASSERT_IS_THUMB;
900   stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
901}
902
903static IRTemp get_QFLAG32 ( void )
904{
905   IRTemp t = newTemp(Ity_I32);
906   assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
907   return t;
908}
909
910static void put_QFLAG32 ( IRTemp t, IRTemp condT )
911{
912   putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
913}
914
915/* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
916   Status Register) to indicate that overflow or saturation occurred.
917   Nb: t must be zero to denote no saturation, and any nonzero
918   value to indicate saturation. */
919static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
920{
921   IRTemp old = get_QFLAG32();
922   IRTemp nyu = newTemp(Ity_I32);
923   assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
924   put_QFLAG32(nyu, condT);
925}
926
927/* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
928   flagNo: which flag bit to set [3...0]
929   lowbits_to_ignore:  0 = look at all 32 bits
930                       8 = look at top 24 bits only
931                      16 = look at top 16 bits only
932                      31 = look at the top bit only
933   e: input value to be evaluated.
934   The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
935   masked out.  If the resulting value is zero then the GE flag is
936   set to 0; any other value sets the flag to 1. */
937static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
938                           Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
939                           IRExpr* e,             /* Ity_I32 */
940                           IRTemp condT )
941{
942   vassert( flagNo >= 0 && flagNo <= 3 );
943   vassert( lowbits_to_ignore == 0  ||
944            lowbits_to_ignore == 8  ||
945            lowbits_to_ignore == 16 ||
946            lowbits_to_ignore == 31 );
947   IRTemp masked = newTemp(Ity_I32);
948   assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
949
950   switch (flagNo) {
951      case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
952      case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
953      case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
954      case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
955      default: vassert(0);
956   }
957}
958
959/* Return the (32-bit, zero-or-nonzero representation scheme) of
960   the specified GE flag. */
961static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
962{
963   switch (flagNo) {
964      case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
965      case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
966      case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
967      case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
968      default: vassert(0);
969   }
970}
971
972/* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
973   2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
974   15 of the value.  All other bits are ignored. */
975static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
976{
977   IRTemp ge10 = newTemp(Ity_I32);
978   IRTemp ge32 = newTemp(Ity_I32);
979   assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
980   assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
981   put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
982   put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
983   put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
984   put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
985}
986
987
988/* Set all 4 GE flags from the given 32-bit value as follows: GE 3
989   from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
990   bit 7.  All other bits are ignored. */
991static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
992{
993   IRTemp ge0 = newTemp(Ity_I32);
994   IRTemp ge1 = newTemp(Ity_I32);
995   IRTemp ge2 = newTemp(Ity_I32);
996   IRTemp ge3 = newTemp(Ity_I32);
997   assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
998   assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
999   assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
1000   assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1001   put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
1002   put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
1003   put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
1004   put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
1005}
1006
1007
1008/* ---------------- FPSCR stuff ---------------- */
1009
1010/* Generate IR to get hold of the rounding mode bits in FPSCR, and
1011   convert them to IR format.  Bind the final result to the
1012   returned temp. */
1013static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1014{
1015   /* The ARMvfp encoding for rounding mode bits is:
1016         00  to nearest
1017         01  to +infinity
1018         10  to -infinity
1019         11  to zero
1020      We need to convert that to the IR encoding:
1021         00  to nearest (the default)
1022         10  to +infinity
1023         01  to -infinity
1024         11  to zero
1025      Which can be done by swapping bits 0 and 1.
1026      The rmode bits are at 23:22 in FPSCR.
1027   */
1028   IRTemp armEncd = newTemp(Ity_I32);
1029   IRTemp swapped = newTemp(Ity_I32);
1030   /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
1031      we don't zero out bits 24 and above, since the assignment to
1032      'swapped' will mask them out anyway. */
1033   assign(armEncd,
1034          binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
1035   /* Now swap them. */
1036   assign(swapped,
1037          binop(Iop_Or32,
1038                binop(Iop_And32,
1039                      binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1040                      mkU32(2)),
1041                binop(Iop_And32,
1042                      binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1043                      mkU32(1))
1044         ));
1045   return swapped;
1046}
1047
1048
1049/*------------------------------------------------------------*/
1050/*--- Helpers for flag handling and conditional insns      ---*/
1051/*------------------------------------------------------------*/
1052
1053static HChar* name_ARMCondcode ( ARMCondcode cond )
1054{
1055   switch (cond) {
1056      case ARMCondEQ:  return "{eq}";
1057      case ARMCondNE:  return "{ne}";
1058      case ARMCondHS:  return "{hs}";  // or 'cs'
1059      case ARMCondLO:  return "{lo}";  // or 'cc'
1060      case ARMCondMI:  return "{mi}";
1061      case ARMCondPL:  return "{pl}";
1062      case ARMCondVS:  return "{vs}";
1063      case ARMCondVC:  return "{vc}";
1064      case ARMCondHI:  return "{hi}";
1065      case ARMCondLS:  return "{ls}";
1066      case ARMCondGE:  return "{ge}";
1067      case ARMCondLT:  return "{lt}";
1068      case ARMCondGT:  return "{gt}";
1069      case ARMCondLE:  return "{le}";
1070      case ARMCondAL:  return ""; // {al}: is the default
1071      case ARMCondNV:  return "{nv}";
1072      default: vpanic("name_ARMCondcode");
1073   }
1074}
1075/* and a handy shorthand for it */
1076static HChar* nCC ( ARMCondcode cond ) {
1077   return name_ARMCondcode(cond);
1078}
1079
1080
1081/* Build IR to calculate some particular condition from stored
1082   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1083   Ity_I32, suitable for narrowing.  Although the return type is
1084   Ity_I32, the returned value is either 0 or 1.  'cond' must be
1085   :: Ity_I32 and must denote the condition to compute in
1086   bits 7:4, and be zero everywhere else.
1087*/
1088static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
1089{
1090   vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
1091   /* And 'cond' had better produce a value in which only bits 7:4 are
1092      nonzero.  However, obviously we can't assert for that. */
1093
1094   /* So what we're constructing for the first argument is
1095      "(cond << 4) | stored-operation".
1096      However, as per comments above, 'cond' must be supplied
1097      pre-shifted to this function.
1098
1099      This pairing scheme requires that the ARM_CC_OP_ values all fit
1100      in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1101      8 bits of the first argument. */
1102   IRExpr** args
1103      = mkIRExprVec_4(
1104           binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
1105           IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1106           IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1107           IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
1108        );
1109   IRExpr* call
1110      = mkIRExprCCall(
1111           Ity_I32,
1112           0/*regparm*/,
1113           "armg_calculate_condition", &armg_calculate_condition,
1114           args
1115        );
1116
1117   /* Exclude the requested condition, OP and NDEP from definedness
1118      checking.  We're only interested in DEP1 and DEP2. */
1119   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1120   return call;
1121}
1122
1123
1124/* Build IR to calculate some particular condition from stored
1125   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1126   Ity_I32, suitable for narrowing.  Although the return type is
1127   Ity_I32, the returned value is either 0 or 1.
1128*/
1129static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
1130{
1131  /* First arg is "(cond << 4) | condition".  This requires that the
1132     ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1133     (COND, OP) pair in the lowest 8 bits of the first argument. */
1134   vassert(cond >= 0 && cond <= 15);
1135   return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
1136}
1137
1138
1139/* Build IR to calculate just the carry flag from stored
1140   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1141   Ity_I32. */
1142static IRExpr* mk_armg_calculate_flag_c ( void )
1143{
1144   IRExpr** args
1145      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1146                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1147                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1148                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1149   IRExpr* call
1150      = mkIRExprCCall(
1151           Ity_I32,
1152           0/*regparm*/,
1153           "armg_calculate_flag_c", &armg_calculate_flag_c,
1154           args
1155        );
1156   /* Exclude OP and NDEP from definedness checking.  We're only
1157      interested in DEP1 and DEP2. */
1158   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1159   return call;
1160}
1161
1162
1163/* Build IR to calculate just the overflow flag from stored
1164   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1165   Ity_I32. */
1166static IRExpr* mk_armg_calculate_flag_v ( void )
1167{
1168   IRExpr** args
1169      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1170                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1171                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1172                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1173   IRExpr* call
1174      = mkIRExprCCall(
1175           Ity_I32,
1176           0/*regparm*/,
1177           "armg_calculate_flag_v", &armg_calculate_flag_v,
1178           args
1179        );
1180   /* Exclude OP and NDEP from definedness checking.  We're only
1181      interested in DEP1 and DEP2. */
1182   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1183   return call;
1184}
1185
1186
1187/* Build IR to calculate N Z C V in bits 31:28 of the
1188   returned word. */
1189static IRExpr* mk_armg_calculate_flags_nzcv ( void )
1190{
1191   IRExpr** args
1192      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1193                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1194                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1195                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1196   IRExpr* call
1197      = mkIRExprCCall(
1198           Ity_I32,
1199           0/*regparm*/,
1200           "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
1201           args
1202        );
1203   /* Exclude OP and NDEP from definedness checking.  We're only
1204      interested in DEP1 and DEP2. */
1205   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1206   return call;
1207}
1208
1209static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
1210{
1211   IRExpr** args1;
1212   IRExpr** args2;
1213   IRExpr *call1, *call2, *res;
1214
1215   if (Q) {
1216      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
1217                              binop(Iop_GetElem32x4, resL, mkU8(1)),
1218                              binop(Iop_GetElem32x4, resR, mkU8(0)),
1219                              binop(Iop_GetElem32x4, resR, mkU8(1)) );
1220      args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
1221                              binop(Iop_GetElem32x4, resL, mkU8(3)),
1222                              binop(Iop_GetElem32x4, resR, mkU8(2)),
1223                              binop(Iop_GetElem32x4, resR, mkU8(3)) );
1224   } else {
1225      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
1226                              binop(Iop_GetElem32x2, resL, mkU8(1)),
1227                              binop(Iop_GetElem32x2, resR, mkU8(0)),
1228                              binop(Iop_GetElem32x2, resR, mkU8(1)) );
1229   }
1230
1231#if 1
1232   call1 = mkIRExprCCall(
1233             Ity_I32,
1234             0/*regparm*/,
1235             "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1236             args1
1237          );
1238   if (Q) {
1239      call2 = mkIRExprCCall(
1240                Ity_I32,
1241                0/*regparm*/,
1242                "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1243                args2
1244             );
1245   }
1246   if (Q) {
1247      res = binop(Iop_Or32, call1, call2);
1248   } else {
1249      res = call1;
1250   }
1251#else
1252   if (Q) {
1253      res = unop(Iop_1Uto32,
1254                 binop(Iop_CmpNE32,
1255                       binop(Iop_Or32,
1256                             binop(Iop_Or32,
1257                                   binop(Iop_Xor32,
1258                                         args1[0],
1259                                         args1[2]),
1260                                   binop(Iop_Xor32,
1261                                         args1[1],
1262                                         args1[3])),
1263                             binop(Iop_Or32,
1264                                   binop(Iop_Xor32,
1265                                         args2[0],
1266                                         args2[2]),
1267                                   binop(Iop_Xor32,
1268                                         args2[1],
1269                                         args2[3]))),
1270                       mkU32(0)));
1271   } else {
1272      res = unop(Iop_1Uto32,
1273                 binop(Iop_CmpNE32,
1274                       binop(Iop_Or32,
1275                             binop(Iop_Xor32,
1276                                   args1[0],
1277                                   args1[2]),
1278                             binop(Iop_Xor32,
1279                                   args1[1],
1280                                   args1[3])),
1281                       mkU32(0)));
1282   }
1283#endif
1284   return res;
1285}
1286
1287// FIXME: this is named wrongly .. looks like a sticky set of
1288// QC, not a write to it.
1289static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
1290                         IRTemp condT )
1291{
1292   putMiscReg32 (OFFB_FPSCR,
1293                 binop(Iop_Or32,
1294                       IRExpr_Get(OFFB_FPSCR, Ity_I32),
1295                       binop(Iop_Shl32,
1296                             mk_armg_calculate_flag_qc(resL, resR, Q),
1297                             mkU8(27))),
1298                 condT);
1299}
1300
1301/* Build IR to conditionally set the flags thunk.  As with putIReg, if
1302   guard is IRTemp_INVALID then it's unconditional, else it holds a
1303   condition :: Ity_I32. */
1304static
1305void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
1306                         IRTemp t_dep2, IRTemp t_ndep,
1307                         IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1308{
1309   IRTemp c8;
1310   vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
1311   vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
1312   vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
1313   vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
1314   if (guardT == IRTemp_INVALID) {
1315      /* unconditional */
1316      stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
1317      stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1318      stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1319      stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1320   } else {
1321      /* conditional */
1322      c8 = newTemp(Ity_I8);
1323      assign( c8, unop(Iop_32to8, mkexpr(guardT)) );
1324      stmt( IRStmt_Put(
1325               OFFB_CC_OP,
1326               IRExpr_Mux0X( mkexpr(c8),
1327                             IRExpr_Get(OFFB_CC_OP, Ity_I32),
1328                             mkU32(cc_op) )));
1329      stmt( IRStmt_Put(
1330               OFFB_CC_DEP1,
1331               IRExpr_Mux0X( mkexpr(c8),
1332                             IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1333                             mkexpr(t_dep1) )));
1334      stmt( IRStmt_Put(
1335               OFFB_CC_DEP2,
1336               IRExpr_Mux0X( mkexpr(c8),
1337                             IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1338                             mkexpr(t_dep2) )));
1339      stmt( IRStmt_Put(
1340               OFFB_CC_NDEP,
1341               IRExpr_Mux0X( mkexpr(c8),
1342                             IRExpr_Get(OFFB_CC_NDEP, Ity_I32),
1343                             mkexpr(t_ndep) )));
1344   }
1345}
1346
1347
1348/* Minor variant of the above that sets NDEP to zero (if it
1349   sets it at all) */
1350static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1351                             IRTemp t_dep2,
1352                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1353{
1354   IRTemp z32 = newTemp(Ity_I32);
1355   assign( z32, mkU32(0) );
1356   setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1357}
1358
1359
1360/* Minor variant of the above that sets DEP2 to zero (if it
1361   sets it at all) */
1362static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1363                             IRTemp t_ndep,
1364                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1365{
1366   IRTemp z32 = newTemp(Ity_I32);
1367   assign( z32, mkU32(0) );
1368   setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1369}
1370
1371
1372/* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1373   sets them at all) */
1374static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1375                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1376{
1377   IRTemp z32 = newTemp(Ity_I32);
1378   assign( z32, mkU32(0) );
1379   setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1380}
1381
1382
1383/* ARM only */
1384/* Generate a side-exit to the next instruction, if the given guard
1385   expression :: Ity_I32 is 0 (note!  the side exit is taken if the
1386   condition is false!)  This is used to skip over conditional
1387   instructions which we can't generate straight-line code for, either
1388   because they are too complex or (more likely) they potentially
1389   generate exceptions.
1390*/
1391static void mk_skip_over_A32_if_cond_is_false (
1392               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1393            )
1394{
1395   ASSERT_IS_ARM;
1396   vassert(guardT != IRTemp_INVALID);
1397   vassert(0 == (guest_R15_curr_instr_notENC & 3));
1398   stmt( IRStmt_Exit(
1399            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1400            Ijk_Boring,
1401            IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4))
1402       ));
1403}
1404
1405/* Thumb16 only */
1406/* ditto, but jump over a 16-bit thumb insn */
1407static void mk_skip_over_T16_if_cond_is_false (
1408               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1409            )
1410{
1411   ASSERT_IS_THUMB;
1412   vassert(guardT != IRTemp_INVALID);
1413   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1414   stmt( IRStmt_Exit(
1415            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1416            Ijk_Boring,
1417            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1))
1418       ));
1419}
1420
1421
1422/* Thumb32 only */
1423/* ditto, but jump over a 32-bit thumb insn */
1424static void mk_skip_over_T32_if_cond_is_false (
1425               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1426            )
1427{
1428   ASSERT_IS_THUMB;
1429   vassert(guardT != IRTemp_INVALID);
1430   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1431   stmt( IRStmt_Exit(
1432            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1433            Ijk_Boring,
1434            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1))
1435       ));
1436}
1437
1438
1439/* Thumb16 and Thumb32 only
1440   Generate a SIGILL followed by a restart of the current instruction
1441   if the given temp is nonzero. */
1442static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
1443{
1444   ASSERT_IS_THUMB;
1445   vassert(t != IRTemp_INVALID);
1446   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1447   stmt(
1448      IRStmt_Exit(
1449         binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
1450         Ijk_NoDecode,
1451         IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1))
1452      )
1453   );
1454}
1455
1456
1457/* Inspect the old_itstate, and generate a SIGILL if it indicates that
1458   we are currently in an IT block and are not the last in the block.
1459   This also rolls back guest_ITSTATE to its old value before the exit
1460   and restores it to its new value afterwards.  This is so that if
1461   the exit is taken, we have an up to date version of ITSTATE
1462   available.  Without doing that, we have no hope of making precise
1463   exceptions work. */
1464static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
1465               IRTemp old_itstate /* :: Ity_I32 */,
1466               IRTemp new_itstate /* :: Ity_I32 */
1467            )
1468{
1469   ASSERT_IS_THUMB;
1470   put_ITSTATE(old_itstate); // backout
1471   IRTemp guards_for_next3 = newTemp(Ity_I32);
1472   assign(guards_for_next3,
1473          binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
1474   gen_SIGILL_T_if_nonzero(guards_for_next3);
1475   put_ITSTATE(new_itstate); //restore
1476}
1477
1478
1479/* Simpler version of the above, which generates a SIGILL if
1480   we're anywhere within an IT block. */
1481static void gen_SIGILL_T_if_in_ITBlock (
1482               IRTemp old_itstate /* :: Ity_I32 */,
1483               IRTemp new_itstate /* :: Ity_I32 */
1484            )
1485{
1486   put_ITSTATE(old_itstate); // backout
1487   gen_SIGILL_T_if_nonzero(old_itstate);
1488   put_ITSTATE(new_itstate); //restore
1489}
1490
1491
1492/* Generate an APSR value, from the NZCV thunk, and
1493   from QFLAG32 and GEFLAG0 .. GEFLAG3. */
1494static IRTemp synthesise_APSR ( void )
1495{
1496   IRTemp res1 = newTemp(Ity_I32);
1497   // Get NZCV
1498   assign( res1, mk_armg_calculate_flags_nzcv() );
1499   // OR in the Q value
1500   IRTemp res2 = newTemp(Ity_I32);
1501   assign(
1502      res2,
1503      binop(Iop_Or32,
1504            mkexpr(res1),
1505            binop(Iop_Shl32,
1506                  unop(Iop_1Uto32,
1507                       binop(Iop_CmpNE32,
1508                             mkexpr(get_QFLAG32()),
1509                             mkU32(0))),
1510                  mkU8(ARMG_CC_SHIFT_Q)))
1511   );
1512   // OR in GE0 .. GE3
1513   IRExpr* ge0
1514      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
1515   IRExpr* ge1
1516      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
1517   IRExpr* ge2
1518      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
1519   IRExpr* ge3
1520      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
1521   IRTemp res3 = newTemp(Ity_I32);
1522   assign(res3,
1523          binop(Iop_Or32,
1524                mkexpr(res2),
1525                binop(Iop_Or32,
1526                      binop(Iop_Or32,
1527                            binop(Iop_Shl32, ge0, mkU8(16)),
1528                            binop(Iop_Shl32, ge1, mkU8(17))),
1529                      binop(Iop_Or32,
1530                            binop(Iop_Shl32, ge2, mkU8(18)),
1531                            binop(Iop_Shl32, ge3, mkU8(19))) )));
1532   return res3;
1533}
1534
1535
1536/* and the inverse transformation: given an APSR value,
1537   set the NZCV thunk, the Q flag, and the GE flags. */
1538static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
1539                                IRTemp apsrT, IRTemp condT )
1540{
1541   vassert(write_nzcvq || write_ge);
1542   if (write_nzcvq) {
1543      // Do NZCV
1544      IRTemp immT = newTemp(Ity_I32);
1545      assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
1546      setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
1547      // Do Q
1548      IRTemp qnewT = newTemp(Ity_I32);
1549      assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
1550      put_QFLAG32(qnewT, condT);
1551   }
1552   if (write_ge) {
1553      // Do GE3..0
1554      put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
1555                   condT);
1556      put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
1557                   condT);
1558      put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
1559                   condT);
1560      put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
1561                   condT);
1562   }
1563}
1564
1565
1566/*------------------------------------------------------------*/
1567/*--- Helpers for saturation                               ---*/
1568/*------------------------------------------------------------*/
1569
1570/* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
1571   (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
1572   (b) the floor is computed from the value of imm5.  these two fnsn
1573   should be commoned up. */
1574
1575/* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
1576   Optionally return flag resQ saying whether saturation occurred.
1577   See definition in manual, section A2.2.1, page 41
1578   (bits(N), boolean) UnsignedSatQ( integer i, integer N )
1579   {
1580     if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
1581     elsif ( i < 0 )    { result = 0; saturated = TRUE; }
1582     else               { result = i; saturated = FALSE; }
1583     return ( result<N-1:0>, saturated );
1584   }
1585*/
1586static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
1587                             IRTemp* resQ, /* OUT - Ity_I32  */
1588                             IRTemp regT,  /* value to clamp - Ity_I32 */
1589                             UInt imm5 )   /* saturation ceiling */
1590{
1591   UInt ceil  = (1 << imm5) - 1;    // (2^imm5)-1
1592   UInt floor = 0;
1593
1594   IRTemp node0 = newTemp(Ity_I32);
1595   IRTemp node1 = newTemp(Ity_I32);
1596   IRTemp node2 = newTemp(Ity_I1);
1597   IRTemp node3 = newTemp(Ity_I32);
1598   IRTemp node4 = newTemp(Ity_I32);
1599   IRTemp node5 = newTemp(Ity_I1);
1600   IRTemp node6 = newTemp(Ity_I32);
1601
1602   assign( node0, mkexpr(regT) );
1603   assign( node1, mkU32(ceil) );
1604   assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) );
1605   assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)),
1606                                mkexpr(node0),
1607                                mkexpr(node1) ) );
1608   assign( node4, mkU32(floor) );
1609   assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) );
1610   assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)),
1611                                mkexpr(node3),
1612                                mkexpr(node4) ) );
1613   assign( *res, mkexpr(node6) );
1614
1615   /* if saturation occurred, then resQ is set to some nonzero value
1616      if sat did not occur, resQ is guaranteed to be zero. */
1617   if (resQ) {
1618      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1619   }
1620}
1621
1622
1623/* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
1624   Optionally return flag resQ saying whether saturation occurred.
1625   - see definition in manual, section A2.2.1, page 41
1626   (bits(N), boolean ) SignedSatQ( integer i, integer N )
1627   {
1628     if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
1629     elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
1630     else                      { result = i;           saturated = FALSE; }
1631     return ( result[N-1:0], saturated );
1632   }
1633*/
1634static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
1635                           UInt imm5,      /* saturation ceiling */
1636                           IRTemp* res,    /* OUT - Ity_I32 */
1637                           IRTemp* resQ )  /* OUT - Ity_I32  */
1638{
1639   Int ceil  =  (1 << (imm5-1)) - 1;  //  (2^(imm5-1))-1
1640   Int floor = -(1 << (imm5-1));      // -(2^(imm5-1))
1641
1642   IRTemp node0 = newTemp(Ity_I32);
1643   IRTemp node1 = newTemp(Ity_I32);
1644   IRTemp node2 = newTemp(Ity_I1);
1645   IRTemp node3 = newTemp(Ity_I32);
1646   IRTemp node4 = newTemp(Ity_I32);
1647   IRTemp node5 = newTemp(Ity_I1);
1648   IRTemp node6 = newTemp(Ity_I32);
1649
1650   assign( node0, mkexpr(regT) );
1651   assign( node1, mkU32(ceil) );
1652   assign( node2, binop( Iop_CmpLT32S, mkexpr(node1), mkexpr(node0) ) );
1653   assign( node3, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node2)),
1654                                mkexpr(node0),  mkexpr(node1) ) );
1655   assign( node4, mkU32(floor) );
1656   assign( node5, binop( Iop_CmpLT32S, mkexpr(node3), mkexpr(node4) ) );
1657   assign( node6, IRExpr_Mux0X( unop(Iop_1Uto8, mkexpr(node5)),
1658                                mkexpr(node3),  mkexpr(node4) ) );
1659   assign( *res, mkexpr(node6) );
1660
1661   /* if saturation occurred, then resQ is set to some nonzero value
1662      if sat did not occur, resQ is guaranteed to be zero. */
1663   if (resQ) {
1664     assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1665   }
1666}
1667
1668
1669/* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
1670   overflow occurred for 32-bit addition.  Needs both args and the
1671   result.  HD p27. */
1672static
1673IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
1674                                      IRTemp argL, IRTemp argR )
1675{
1676   IRTemp res = newTemp(Ity_I32);
1677   assign(res, resE);
1678   return
1679      binop( Iop_Shr32,
1680             binop( Iop_And32,
1681                    binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
1682                    binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
1683             mkU8(31) );
1684}
1685
1686
1687/*------------------------------------------------------------*/
1688/*--- Larger helpers                                       ---*/
1689/*------------------------------------------------------------*/
1690
1691/* Compute both the result and new C flag value for a LSL by an imm5
1692   or by a register operand.  May generate reads of the old C value
1693   (hence only safe to use before any writes to guest state happen).
1694   Are factored out so can be used by both ARM and Thumb.
1695
1696   Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
1697   "res" (the result)  is a.k.a. "shop", shifter operand
1698   "newC" (the new C)  is a.k.a. "shco", shifter carry out
1699
1700   The calling convention for res and newC is a bit funny.  They could
1701   be passed by value, but instead are passed by ref.
1702
1703   The C (shco) value computed must be zero in bits 31:1, as the IR
1704   optimisations for flag handling (guest_arm_spechelper) rely on
1705   that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
1706   for it.  Same applies to all these functions that compute shco
1707   after a shift or rotate, not just this one.
1708*/
1709
1710static void compute_result_and_C_after_LSL_by_imm5 (
1711               /*OUT*/HChar* buf,
1712               IRTemp* res,
1713               IRTemp* newC,
1714               IRTemp rMt, UInt shift_amt, /* operands */
1715               UInt rM      /* only for debug printing */
1716            )
1717{
1718   if (shift_amt == 0) {
1719      if (newC) {
1720         assign( *newC, mk_armg_calculate_flag_c() );
1721      }
1722      assign( *res, mkexpr(rMt) );
1723      DIS(buf, "r%u", rM);
1724   } else {
1725      vassert(shift_amt >= 1 && shift_amt <= 31);
1726      if (newC) {
1727         assign( *newC,
1728                 binop(Iop_And32,
1729                       binop(Iop_Shr32, mkexpr(rMt),
1730                                        mkU8(32 - shift_amt)),
1731                       mkU32(1)));
1732      }
1733      assign( *res,
1734              binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
1735      DIS(buf, "r%u, LSL #%u", rM, shift_amt);
1736   }
1737}
1738
1739
1740static void compute_result_and_C_after_LSL_by_reg (
1741               /*OUT*/HChar* buf,
1742               IRTemp* res,
1743               IRTemp* newC,
1744               IRTemp rMt, IRTemp rSt,  /* operands */
1745               UInt rM,    UInt rS      /* only for debug printing */
1746            )
1747{
1748   // shift left in range 0 .. 255
1749   // amt  = rS & 255
1750   // res  = amt < 32 ?  Rm << amt  : 0
1751   // newC = amt == 0     ? oldC  :
1752   //        amt in 1..32 ?  Rm[32-amt]  : 0
1753   IRTemp amtT = newTemp(Ity_I32);
1754   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1755   if (newC) {
1756      /* mux0X(amt == 0,
1757               mux0X(amt < 32,
1758                     0,
1759                     Rm[(32-amt) & 31]),
1760               oldC)
1761      */
1762      /* About the best you can do is pray that iropt is able
1763         to nuke most or all of the following junk. */
1764      IRTemp oldC = newTemp(Ity_I32);
1765      assign(oldC, mk_armg_calculate_flag_c() );
1766      assign(
1767         *newC,
1768         IRExpr_Mux0X(
1769            unop(Iop_1Uto8,
1770                 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
1771            IRExpr_Mux0X(
1772               unop(Iop_1Uto8,
1773                    binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
1774               mkU32(0),
1775               binop(Iop_And32,
1776                     binop(Iop_Shr32,
1777                           mkexpr(rMt),
1778                           unop(Iop_32to8,
1779                                binop(Iop_And32,
1780                                      binop(Iop_Sub32,
1781                                            mkU32(32),
1782                                            mkexpr(amtT)),
1783                                      mkU32(31)
1784                                )
1785                           )
1786                     ),
1787                     mkU32(1)
1788               )
1789            ),
1790            mkexpr(oldC)
1791         )
1792      );
1793   }
1794   // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1795   // Lhs of the & limits the shift to 31 bits, so as to
1796   // give known IR semantics.  Rhs of the & is all 1s for
1797   // Rs <= 31 and all 0s for Rs >= 32.
1798   assign(
1799      *res,
1800      binop(
1801         Iop_And32,
1802         binop(Iop_Shl32,
1803               mkexpr(rMt),
1804               unop(Iop_32to8,
1805                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1806         binop(Iop_Sar32,
1807               binop(Iop_Sub32,
1808                     mkexpr(amtT),
1809                     mkU32(32)),
1810               mkU8(31))));
1811    DIS(buf, "r%u, LSL r%u", rM, rS);
1812}
1813
1814
1815static void compute_result_and_C_after_LSR_by_imm5 (
1816               /*OUT*/HChar* buf,
1817               IRTemp* res,
1818               IRTemp* newC,
1819               IRTemp rMt, UInt shift_amt, /* operands */
1820               UInt rM      /* only for debug printing */
1821            )
1822{
1823   if (shift_amt == 0) {
1824      // conceptually a 32-bit shift, however:
1825      // res  = 0
1826      // newC = Rm[31]
1827      if (newC) {
1828         assign( *newC,
1829                 binop(Iop_And32,
1830                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1831                       mkU32(1)));
1832      }
1833      assign( *res, mkU32(0) );
1834      DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
1835   } else {
1836      // shift in range 1..31
1837      // res  = Rm >>u shift_amt
1838      // newC = Rm[shift_amt - 1]
1839      vassert(shift_amt >= 1 && shift_amt <= 31);
1840      if (newC) {
1841         assign( *newC,
1842                 binop(Iop_And32,
1843                       binop(Iop_Shr32, mkexpr(rMt),
1844                                        mkU8(shift_amt - 1)),
1845                       mkU32(1)));
1846      }
1847      assign( *res,
1848              binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
1849      DIS(buf, "r%u, LSR #%u", rM, shift_amt);
1850   }
1851}
1852
1853
1854static void compute_result_and_C_after_LSR_by_reg (
1855               /*OUT*/HChar* buf,
1856               IRTemp* res,
1857               IRTemp* newC,
1858               IRTemp rMt, IRTemp rSt,  /* operands */
1859               UInt rM,    UInt rS      /* only for debug printing */
1860            )
1861{
1862   // shift right in range 0 .. 255
1863   // amt = rS & 255
1864   // res  = amt < 32 ?  Rm >>u amt  : 0
1865   // newC = amt == 0     ? oldC  :
1866   //        amt in 1..32 ?  Rm[amt-1]  : 0
1867   IRTemp amtT = newTemp(Ity_I32);
1868   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1869   if (newC) {
1870      /* mux0X(amt == 0,
1871               mux0X(amt < 32,
1872                     0,
1873                     Rm[(amt-1) & 31]),
1874               oldC)
1875      */
1876      IRTemp oldC = newTemp(Ity_I32);
1877      assign(oldC, mk_armg_calculate_flag_c() );
1878      assign(
1879         *newC,
1880         IRExpr_Mux0X(
1881            unop(Iop_1Uto8,
1882                 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
1883            IRExpr_Mux0X(
1884               unop(Iop_1Uto8,
1885                    binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
1886               mkU32(0),
1887               binop(Iop_And32,
1888                     binop(Iop_Shr32,
1889                           mkexpr(rMt),
1890                           unop(Iop_32to8,
1891                                binop(Iop_And32,
1892                                      binop(Iop_Sub32,
1893                                            mkexpr(amtT),
1894                                            mkU32(1)),
1895                                      mkU32(31)
1896                                )
1897                           )
1898                     ),
1899                     mkU32(1)
1900               )
1901            ),
1902            mkexpr(oldC)
1903         )
1904      );
1905   }
1906   // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1907   // Lhs of the & limits the shift to 31 bits, so as to
1908   // give known IR semantics.  Rhs of the & is all 1s for
1909   // Rs <= 31 and all 0s for Rs >= 32.
1910   assign(
1911      *res,
1912      binop(
1913         Iop_And32,
1914         binop(Iop_Shr32,
1915               mkexpr(rMt),
1916               unop(Iop_32to8,
1917                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1918         binop(Iop_Sar32,
1919               binop(Iop_Sub32,
1920                     mkexpr(amtT),
1921                     mkU32(32)),
1922               mkU8(31))));
1923    DIS(buf, "r%u, LSR r%u", rM, rS);
1924}
1925
1926
1927static void compute_result_and_C_after_ASR_by_imm5 (
1928               /*OUT*/HChar* buf,
1929               IRTemp* res,
1930               IRTemp* newC,
1931               IRTemp rMt, UInt shift_amt, /* operands */
1932               UInt rM      /* only for debug printing */
1933            )
1934{
1935   if (shift_amt == 0) {
1936      // conceptually a 32-bit shift, however:
1937      // res  = Rm >>s 31
1938      // newC = Rm[31]
1939      if (newC) {
1940         assign( *newC,
1941                 binop(Iop_And32,
1942                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1943                       mkU32(1)));
1944      }
1945      assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
1946      DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
1947   } else {
1948      // shift in range 1..31
1949      // res = Rm >>s shift_amt
1950      // newC = Rm[shift_amt - 1]
1951      vassert(shift_amt >= 1 && shift_amt <= 31);
1952      if (newC) {
1953         assign( *newC,
1954                 binop(Iop_And32,
1955                       binop(Iop_Shr32, mkexpr(rMt),
1956                                        mkU8(shift_amt - 1)),
1957                       mkU32(1)));
1958      }
1959      assign( *res,
1960              binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
1961      DIS(buf, "r%u, ASR #%u", rM, shift_amt);
1962   }
1963}
1964
1965
1966static void compute_result_and_C_after_ASR_by_reg (
1967               /*OUT*/HChar* buf,
1968               IRTemp* res,
1969               IRTemp* newC,
1970               IRTemp rMt, IRTemp rSt,  /* operands */
1971               UInt rM,    UInt rS      /* only for debug printing */
1972            )
1973{
1974   // arithmetic shift right in range 0 .. 255
1975   // amt = rS & 255
1976   // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
1977   // newC = amt == 0     ? oldC  :
1978   //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
1979   IRTemp amtT = newTemp(Ity_I32);
1980   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1981   if (newC) {
1982      /* mux0X(amt == 0,
1983               mux0X(amt < 32,
1984                     Rm[31],
1985                     Rm[(amt-1) & 31])
1986               oldC)
1987      */
1988      IRTemp oldC = newTemp(Ity_I32);
1989      assign(oldC, mk_armg_calculate_flag_c() );
1990      assign(
1991         *newC,
1992         IRExpr_Mux0X(
1993            unop(Iop_1Uto8,
1994                 binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0))),
1995            IRExpr_Mux0X(
1996               unop(Iop_1Uto8,
1997                    binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
1998               binop(Iop_And32,
1999                     binop(Iop_Shr32,
2000                           mkexpr(rMt),
2001                           mkU8(31)
2002                     ),
2003                     mkU32(1)
2004               ),
2005               binop(Iop_And32,
2006                     binop(Iop_Shr32,
2007                           mkexpr(rMt),
2008                           unop(Iop_32to8,
2009                                binop(Iop_And32,
2010                                      binop(Iop_Sub32,
2011                                            mkexpr(amtT),
2012                                            mkU32(1)),
2013                                      mkU32(31)
2014                                )
2015                           )
2016                     ),
2017                     mkU32(1)
2018               )
2019            ),
2020            mkexpr(oldC)
2021         )
2022      );
2023   }
2024   // (Rm >>s (amt <u 32 ? amt : 31))
2025   assign(
2026      *res,
2027      binop(
2028         Iop_Sar32,
2029         mkexpr(rMt),
2030         unop(
2031            Iop_32to8,
2032            IRExpr_Mux0X(
2033               unop(
2034                 Iop_1Uto8,
2035                 binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32))),
2036               mkU32(31),
2037               mkexpr(amtT)))));
2038    DIS(buf, "r%u, ASR r%u", rM, rS);
2039}
2040
2041
2042static void compute_result_and_C_after_ROR_by_reg (
2043               /*OUT*/HChar* buf,
2044               IRTemp* res,
2045               IRTemp* newC,
2046               IRTemp rMt, IRTemp rSt,  /* operands */
2047               UInt rM,    UInt rS      /* only for debug printing */
2048            )
2049{
2050   // rotate right in range 0 .. 255
2051   // amt = rS & 255
2052   // shop =  Rm `ror` (amt & 31)
2053   // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
2054   IRTemp amtT = newTemp(Ity_I32);
2055   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2056   IRTemp amt5T = newTemp(Ity_I32);
2057   assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
2058   IRTemp oldC = newTemp(Ity_I32);
2059   assign(oldC, mk_armg_calculate_flag_c() );
2060   if (newC) {
2061      assign(
2062         *newC,
2063         IRExpr_Mux0X(
2064            unop(Iop_32to8, mkexpr(amtT)),
2065            mkexpr(oldC),
2066            binop(Iop_And32,
2067                  binop(Iop_Shr32,
2068                        mkexpr(rMt),
2069                        unop(Iop_32to8,
2070                             binop(Iop_And32,
2071                                   binop(Iop_Sub32,
2072                                         mkexpr(amtT),
2073                                         mkU32(1)
2074                                   ),
2075                                   mkU32(31)
2076                             )
2077                        )
2078                  ),
2079                  mkU32(1)
2080            )
2081         )
2082      );
2083   }
2084   assign(
2085      *res,
2086      IRExpr_Mux0X(
2087         unop(Iop_32to8, mkexpr(amt5T)), mkexpr(rMt),
2088         binop(Iop_Or32,
2089               binop(Iop_Shr32,
2090                     mkexpr(rMt),
2091                     unop(Iop_32to8, mkexpr(amt5T))
2092               ),
2093               binop(Iop_Shl32,
2094                     mkexpr(rMt),
2095                     unop(Iop_32to8,
2096                          binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
2097                     )
2098               )
2099         )
2100      )
2101   );
2102   DIS(buf, "r%u, ROR r#%u", rM, rS);
2103}
2104
2105
2106/* Generate an expression corresponding to the immediate-shift case of
2107   a shifter operand.  This is used both for ARM and Thumb2.
2108
2109   Bind it to a temporary, and return that via *res.  If newC is
2110   non-NULL, also compute a value for the shifter's carry out (in the
2111   LSB of a word), bind it to a temporary, and return that via *shco.
2112
2113   Generates GETs from the guest state and is therefore not safe to
2114   use once we start doing PUTs to it, for any given instruction.
2115
2116   'how' is encoded thusly:
2117      00b LSL,  01b LSR,  10b ASR,  11b ROR
2118   Most but not all ARM and Thumb integer insns use this encoding.
2119   Be careful to ensure the right value is passed here.
2120*/
2121static void compute_result_and_C_after_shift_by_imm5 (
2122               /*OUT*/HChar* buf,
2123               /*OUT*/IRTemp* res,
2124               /*OUT*/IRTemp* newC,
2125               IRTemp  rMt,       /* reg to shift */
2126               UInt    how,       /* what kind of shift */
2127               UInt    shift_amt, /* shift amount (0..31) */
2128               UInt    rM         /* only for debug printing */
2129            )
2130{
2131   vassert(shift_amt < 32);
2132   vassert(how < 4);
2133
2134   switch (how) {
2135
2136      case 0:
2137         compute_result_and_C_after_LSL_by_imm5(
2138            buf, res, newC, rMt, shift_amt, rM
2139         );
2140         break;
2141
2142      case 1:
2143         compute_result_and_C_after_LSR_by_imm5(
2144            buf, res, newC, rMt, shift_amt, rM
2145         );
2146         break;
2147
2148      case 2:
2149         compute_result_and_C_after_ASR_by_imm5(
2150            buf, res, newC, rMt, shift_amt, rM
2151         );
2152         break;
2153
2154      case 3:
2155         if (shift_amt == 0) {
2156            IRTemp oldcT = newTemp(Ity_I32);
2157            // rotate right 1 bit through carry (?)
2158            // RRX -- described at ARM ARM A5-17
2159            // res  = (oldC << 31) | (Rm >>u 1)
2160            // newC = Rm[0]
2161            if (newC) {
2162               assign( *newC,
2163                       binop(Iop_And32, mkexpr(rMt), mkU32(1)));
2164            }
2165            assign( oldcT, mk_armg_calculate_flag_c() );
2166            assign( *res,
2167                    binop(Iop_Or32,
2168                          binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
2169                          binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
2170            DIS(buf, "r%u, RRX", rM);
2171         } else {
2172            // rotate right in range 1..31
2173            // res  = Rm `ror` shift_amt
2174            // newC = Rm[shift_amt - 1]
2175            vassert(shift_amt >= 1 && shift_amt <= 31);
2176            if (newC) {
2177               assign( *newC,
2178                       binop(Iop_And32,
2179                             binop(Iop_Shr32, mkexpr(rMt),
2180                                              mkU8(shift_amt - 1)),
2181                             mkU32(1)));
2182            }
2183            assign( *res,
2184                    binop(Iop_Or32,
2185                          binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
2186                          binop(Iop_Shl32, mkexpr(rMt),
2187                                           mkU8(32-shift_amt))));
2188            DIS(buf, "r%u, ROR #%u", rM, shift_amt);
2189         }
2190         break;
2191
2192      default:
2193         /*NOTREACHED*/
2194         vassert(0);
2195   }
2196}
2197
2198
2199/* Generate an expression corresponding to the register-shift case of
2200   a shifter operand.  This is used both for ARM and Thumb2.
2201
2202   Bind it to a temporary, and return that via *res.  If newC is
2203   non-NULL, also compute a value for the shifter's carry out (in the
2204   LSB of a word), bind it to a temporary, and return that via *shco.
2205
2206   Generates GETs from the guest state and is therefore not safe to
2207   use once we start doing PUTs to it, for any given instruction.
2208
2209   'how' is encoded thusly:
2210      00b LSL,  01b LSR,  10b ASR,  11b ROR
2211   Most but not all ARM and Thumb integer insns use this encoding.
2212   Be careful to ensure the right value is passed here.
2213*/
2214static void compute_result_and_C_after_shift_by_reg (
2215               /*OUT*/HChar*  buf,
2216               /*OUT*/IRTemp* res,
2217               /*OUT*/IRTemp* newC,
2218               IRTemp  rMt,       /* reg to shift */
2219               UInt    how,       /* what kind of shift */
2220               IRTemp  rSt,       /* shift amount */
2221               UInt    rM,        /* only for debug printing */
2222               UInt    rS         /* only for debug printing */
2223            )
2224{
2225   vassert(how < 4);
2226   switch (how) {
2227      case 0: { /* LSL */
2228         compute_result_and_C_after_LSL_by_reg(
2229            buf, res, newC, rMt, rSt, rM, rS
2230         );
2231         break;
2232      }
2233      case 1: { /* LSR */
2234         compute_result_and_C_after_LSR_by_reg(
2235            buf, res, newC, rMt, rSt, rM, rS
2236         );
2237         break;
2238      }
2239      case 2: { /* ASR */
2240         compute_result_and_C_after_ASR_by_reg(
2241            buf, res, newC, rMt, rSt, rM, rS
2242         );
2243         break;
2244      }
2245      case 3: { /* ROR */
2246         compute_result_and_C_after_ROR_by_reg(
2247             buf, res, newC, rMt, rSt, rM, rS
2248         );
2249         break;
2250      }
2251      default:
2252         /*NOTREACHED*/
2253         vassert(0);
2254   }
2255}
2256
2257
2258/* Generate an expression corresponding to a shifter_operand, bind it
2259   to a temporary, and return that via *shop.  If shco is non-NULL,
2260   also compute a value for the shifter's carry out (in the LSB of a
2261   word), bind it to a temporary, and return that via *shco.
2262
2263   If for some reason we can't come up with a shifter operand (missing
2264   case?  not really a shifter operand?) return False.
2265
2266   Generates GETs from the guest state and is therefore not safe to
2267   use once we start doing PUTs to it, for any given instruction.
2268
2269   For ARM insns only; not for Thumb.
2270*/
2271static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
2272                                 /*OUT*/IRTemp* shop,
2273                                 /*OUT*/IRTemp* shco,
2274                                 /*OUT*/HChar* buf )
2275{
2276   UInt insn_4 = (insn_11_0 >> 4) & 1;
2277   UInt insn_7 = (insn_11_0 >> 7) & 1;
2278   vassert(insn_25 <= 0x1);
2279   vassert(insn_11_0 <= 0xFFF);
2280
2281   vassert(shop && *shop == IRTemp_INVALID);
2282   *shop = newTemp(Ity_I32);
2283
2284   if (shco) {
2285      vassert(*shco == IRTemp_INVALID);
2286      *shco = newTemp(Ity_I32);
2287   }
2288
2289   /* 32-bit immediate */
2290
2291   if (insn_25 == 1) {
2292      /* immediate: (7:0) rotated right by 2 * (11:8) */
2293      UInt imm = (insn_11_0 >> 0) & 0xFF;
2294      UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
2295      vassert(rot <= 30);
2296      imm = ROR32(imm, rot);
2297      if (shco) {
2298         if (rot == 0) {
2299            assign( *shco, mk_armg_calculate_flag_c() );
2300         } else {
2301            assign( *shco, mkU32( (imm >> 31) & 1 ) );
2302         }
2303      }
2304      DIS(buf, "#0x%x", imm);
2305      assign( *shop, mkU32(imm) );
2306      return True;
2307   }
2308
2309   /* Shift/rotate by immediate */
2310
2311   if (insn_25 == 0 && insn_4 == 0) {
2312      /* Rm (3:0) shifted (6:5) by immediate (11:7) */
2313      UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
2314      UInt rM        = (insn_11_0 >> 0) & 0xF;
2315      UInt how       = (insn_11_0 >> 5) & 3;
2316      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2317      IRTemp rMt = newTemp(Ity_I32);
2318      assign(rMt, getIRegA(rM));
2319
2320      vassert(shift_amt <= 31);
2321
2322      compute_result_and_C_after_shift_by_imm5(
2323         buf, shop, shco, rMt, how, shift_amt, rM
2324      );
2325      return True;
2326   }
2327
2328   /* Shift/rotate by register */
2329   if (insn_25 == 0 && insn_4 == 1) {
2330      /* Rm (3:0) shifted (6:5) by Rs (11:8) */
2331      UInt rM  = (insn_11_0 >> 0) & 0xF;
2332      UInt rS  = (insn_11_0 >> 8) & 0xF;
2333      UInt how = (insn_11_0 >> 5) & 3;
2334      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2335      IRTemp rMt = newTemp(Ity_I32);
2336      IRTemp rSt = newTemp(Ity_I32);
2337
2338      if (insn_7 == 1)
2339         return False; /* not really a shifter operand */
2340
2341      assign(rMt, getIRegA(rM));
2342      assign(rSt, getIRegA(rS));
2343
2344      compute_result_and_C_after_shift_by_reg(
2345         buf, shop, shco, rMt, how, rSt, rM, rS
2346      );
2347      return True;
2348   }
2349
2350   vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
2351   return False;
2352}
2353
2354
2355/* ARM only */
2356static
2357IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
2358                                    /*OUT*/HChar* buf )
2359{
2360   vassert(rN < 16);
2361   vassert(bU < 2);
2362   vassert(imm12 < 0x1000);
2363   UChar opChar = bU == 1 ? '+' : '-';
2364   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
2365   return
2366      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2367             getIRegA(rN),
2368             mkU32(imm12) );
2369}
2370
2371
2372/* ARM only.
2373   NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
2374*/
2375static
2376IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
2377                                          UInt sh2, UInt imm5,
2378                                          /*OUT*/HChar* buf )
2379{
2380   vassert(rN < 16);
2381   vassert(bU < 2);
2382   vassert(rM < 16);
2383   vassert(sh2 < 4);
2384   vassert(imm5 < 32);
2385   UChar   opChar = bU == 1 ? '+' : '-';
2386   IRExpr* index  = NULL;
2387   switch (sh2) {
2388      case 0: /* LSL */
2389         /* imm5 can be in the range 0 .. 31 inclusive. */
2390         index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
2391         DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
2392         break;
2393      case 1: /* LSR */
2394         if (imm5 == 0) {
2395            index = mkU32(0);
2396            vassert(0); // ATC
2397         } else {
2398            index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
2399         }
2400         DIS(buf, "[r%u, %cr%u, LSR #%u]",
2401                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2402         break;
2403      case 2: /* ASR */
2404         /* Doesn't this just mean that the behaviour with imm5 == 0
2405            is the same as if it had been 31 ? */
2406         if (imm5 == 0) {
2407            index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
2408            vassert(0); // ATC
2409         } else {
2410            index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
2411         }
2412         DIS(buf, "[r%u, %cr%u, ASR #%u]",
2413                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2414         break;
2415      case 3: /* ROR or RRX */
2416         if (imm5 == 0) {
2417            IRTemp rmT    = newTemp(Ity_I32);
2418            IRTemp cflagT = newTemp(Ity_I32);
2419            assign(rmT, getIRegA(rM));
2420            assign(cflagT, mk_armg_calculate_flag_c());
2421            index = binop(Iop_Or32,
2422                          binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
2423                          binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
2424            DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
2425         } else {
2426            IRTemp rmT = newTemp(Ity_I32);
2427            assign(rmT, getIRegA(rM));
2428            vassert(imm5 >= 1 && imm5 <= 31);
2429            index = binop(Iop_Or32,
2430                          binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
2431                          binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
2432            DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
2433         }
2434         break;
2435      default:
2436         vassert(0);
2437   }
2438   vassert(index);
2439   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2440                getIRegA(rN), index);
2441}
2442
2443
2444/* ARM only */
2445static
2446IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
2447                                   /*OUT*/HChar* buf )
2448{
2449   vassert(rN < 16);
2450   vassert(bU < 2);
2451   vassert(imm8 < 0x100);
2452   UChar opChar = bU == 1 ? '+' : '-';
2453   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
2454   return
2455      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2456             getIRegA(rN),
2457             mkU32(imm8) );
2458}
2459
2460
2461/* ARM only */
2462static
2463IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
2464                                  /*OUT*/HChar* buf )
2465{
2466   vassert(rN < 16);
2467   vassert(bU < 2);
2468   vassert(rM < 16);
2469   UChar   opChar = bU == 1 ? '+' : '-';
2470   IRExpr* index  = getIRegA(rM);
2471   DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
2472   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2473                getIRegA(rN), index);
2474}
2475
2476
2477/* irRes :: Ity_I32 holds a floating point comparison result encoded
2478   as an IRCmpF64Result.  Generate code to convert it to an
2479   ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
2480   Assign a new temp to hold that value, and return the temp. */
2481static
2482IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
2483{
2484   IRTemp ix       = newTemp(Ity_I32);
2485   IRTemp termL    = newTemp(Ity_I32);
2486   IRTemp termR    = newTemp(Ity_I32);
2487   IRTemp nzcv     = newTemp(Ity_I32);
2488
2489   /* This is where the fun starts.  We have to convert 'irRes' from
2490      an IR-convention return result (IRCmpF64Result) to an
2491      ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2492      4 bits of 'nzcv'. */
2493   /* Map compare result from IR to ARM(nzcv) */
2494   /*
2495      FP cmp result | IR   | ARM(nzcv)
2496      --------------------------------
2497      UN              0x45   0011
2498      LT              0x01   1000
2499      GT              0x00   0010
2500      EQ              0x40   0110
2501   */
2502   /* Now since you're probably wondering WTF ..
2503
2504      ix fishes the useful bits out of the IR value, bits 6 and 0, and
2505      places them side by side, giving a number which is 0, 1, 2 or 3.
2506
2507      termL is a sequence cooked up by GNU superopt.  It converts ix
2508         into an almost correct value NZCV value (incredibly), except
2509         for the case of UN, where it produces 0100 instead of the
2510         required 0011.
2511
2512      termR is therefore a correction term, also computed from ix.  It
2513         is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2514         the final correct value, we subtract termR from termL.
2515
2516      Don't take my word for it.  There's a test program at the bottom
2517      of this file, to try this out with.
2518   */
2519   assign(
2520      ix,
2521      binop(Iop_Or32,
2522            binop(Iop_And32,
2523                  binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
2524                  mkU32(3)),
2525            binop(Iop_And32, mkexpr(irRes), mkU32(1))));
2526
2527   assign(
2528      termL,
2529      binop(Iop_Add32,
2530            binop(Iop_Shr32,
2531                  binop(Iop_Sub32,
2532                        binop(Iop_Shl32,
2533                              binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
2534                              mkU8(30)),
2535                        mkU32(1)),
2536                  mkU8(29)),
2537            mkU32(1)));
2538
2539   assign(
2540      termR,
2541      binop(Iop_And32,
2542            binop(Iop_And32,
2543                  mkexpr(ix),
2544                  binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
2545            mkU32(1)));
2546
2547   assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
2548   return nzcv;
2549}
2550
2551
2552/* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
2553   updatesC is non-NULL, a boolean is written to it indicating whether
2554   or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
2555*/
2556static UInt thumbExpandImm ( Bool* updatesC,
2557                             UInt imm1, UInt imm3, UInt imm8 )
2558{
2559   vassert(imm1 < (1<<1));
2560   vassert(imm3 < (1<<3));
2561   vassert(imm8 < (1<<8));
2562   UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
2563   UInt abcdefgh = imm8;
2564   UInt lbcdefgh = imm8 | 0x80;
2565   if (updatesC) {
2566      *updatesC = i_imm3_a >= 8;
2567   }
2568   switch (i_imm3_a) {
2569      case 0: case 1:
2570         return abcdefgh;
2571      case 2: case 3:
2572         return (abcdefgh << 16) | abcdefgh;
2573      case 4: case 5:
2574         return (abcdefgh << 24) | (abcdefgh << 8);
2575      case 6: case 7:
2576         return (abcdefgh << 24) | (abcdefgh << 16)
2577                | (abcdefgh << 8) | abcdefgh;
2578      case 8 ... 31:
2579         return lbcdefgh << (32 - i_imm3_a);
2580      default:
2581         break;
2582   }
2583   /*NOTREACHED*/vassert(0);
2584}
2585
2586
2587/* Version of thumbExpandImm where we simply feed it the
2588   instruction halfwords (the lowest addressed one is I0). */
2589static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
2590                                        UShort i0s, UShort i1s )
2591{
2592   UInt i0    = (UInt)i0s;
2593   UInt i1    = (UInt)i1s;
2594   UInt imm1  = SLICE_UInt(i0,10,10);
2595   UInt imm3  = SLICE_UInt(i1,14,12);
2596   UInt imm8  = SLICE_UInt(i1,7,0);
2597   return thumbExpandImm(updatesC, imm1, imm3, imm8);
2598}
2599
2600
2601/* Thumb16 only.  Given the firstcond and mask fields from an IT
2602   instruction, compute the 32-bit ITSTATE value implied, as described
2603   in libvex_guest_arm.h.  This is not the ARM ARM representation.
2604   Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
2605   disassembly printing.  Returns False if firstcond or mask
2606   denote something invalid.
2607
2608   The number and conditions for the instructions to be
2609   conditionalised depend on firstcond and mask:
2610
2611   mask      cond 1    cond 2      cond 3      cond 4
2612
2613   1000      fc[3:0]
2614   x100      fc[3:0]   fc[3:1]:x
2615   xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
2616   xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
2617
2618   The condition fields are assembled in *itstate backwards (cond 4 at
2619   the top, cond 1 at the bottom).  Conditions are << 4'd and then
2620   ^0xE'd, and those fields that correspond to instructions in the IT
2621   block are tagged with a 1 bit.
2622*/
2623static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
2624                              /*OUT*/UChar* ch1,
2625                              /*OUT*/UChar* ch2,
2626                              /*OUT*/UChar* ch3,
2627                              UInt firstcond, UInt mask )
2628{
2629   vassert(firstcond <= 0xF);
2630   vassert(mask <= 0xF);
2631   *itstate = 0;
2632   *ch1 = *ch2 = *ch3 = '.';
2633   if (mask == 0)
2634      return False; /* the logic below actually ensures this anyway,
2635                       but clearer to make it explicit. */
2636   if (firstcond == 0xF)
2637      return False; /* NV is not allowed */
2638   if (firstcond == 0xE && popcount32(mask) != 1)
2639      return False; /* if firstcond is AL then all the rest must be too */
2640
2641   UInt m3 = (mask >> 3) & 1;
2642   UInt m2 = (mask >> 2) & 1;
2643   UInt m1 = (mask >> 1) & 1;
2644   UInt m0 = (mask >> 0) & 1;
2645
2646   UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
2647   UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
2648
2649   if (m3 == 1 && (m2|m1|m0) == 0) {
2650      *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
2651      *itstate ^= 0xE0E0E0E0;
2652      return True;
2653   }
2654
2655   if (m2 == 1 && (m1|m0) == 0) {
2656      *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
2657      *itstate ^= 0xE0E0E0E0;
2658      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2659      return True;
2660   }
2661
2662   if (m1 == 1 && m0 == 0) {
2663      *itstate = (ni << 24)
2664                 | (setbit32(fc, 4, m2) << 16)
2665                 | (setbit32(fc, 4, m3) << 8) | fc;
2666      *itstate ^= 0xE0E0E0E0;
2667      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2668      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2669      return True;
2670   }
2671
2672   if (m0 == 1) {
2673      *itstate = (setbit32(fc, 4, m1) << 24)
2674                 | (setbit32(fc, 4, m2) << 16)
2675                 | (setbit32(fc, 4, m3) << 8) | fc;
2676      *itstate ^= 0xE0E0E0E0;
2677      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2678      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2679      *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
2680      return True;
2681   }
2682
2683   return False;
2684}
2685
2686
2687/* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
2688   Chapter 7 Section 1. */
2689static IRTemp gen_BITREV ( IRTemp x0 )
2690{
2691   IRTemp x1 = newTemp(Ity_I32);
2692   IRTemp x2 = newTemp(Ity_I32);
2693   IRTemp x3 = newTemp(Ity_I32);
2694   IRTemp x4 = newTemp(Ity_I32);
2695   IRTemp x5 = newTemp(Ity_I32);
2696   UInt   c1 = 0x55555555;
2697   UInt   c2 = 0x33333333;
2698   UInt   c3 = 0x0F0F0F0F;
2699   UInt   c4 = 0x00FF00FF;
2700   UInt   c5 = 0x0000FFFF;
2701   assign(x1,
2702          binop(Iop_Or32,
2703                binop(Iop_Shl32,
2704                      binop(Iop_And32, mkexpr(x0), mkU32(c1)),
2705                      mkU8(1)),
2706                binop(Iop_Shr32,
2707                      binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
2708                      mkU8(1))
2709   ));
2710   assign(x2,
2711          binop(Iop_Or32,
2712                binop(Iop_Shl32,
2713                      binop(Iop_And32, mkexpr(x1), mkU32(c2)),
2714                      mkU8(2)),
2715                binop(Iop_Shr32,
2716                      binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
2717                      mkU8(2))
2718   ));
2719   assign(x3,
2720          binop(Iop_Or32,
2721                binop(Iop_Shl32,
2722                      binop(Iop_And32, mkexpr(x2), mkU32(c3)),
2723                      mkU8(4)),
2724                binop(Iop_Shr32,
2725                      binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
2726                      mkU8(4))
2727   ));
2728   assign(x4,
2729          binop(Iop_Or32,
2730                binop(Iop_Shl32,
2731                      binop(Iop_And32, mkexpr(x3), mkU32(c4)),
2732                      mkU8(8)),
2733                binop(Iop_Shr32,
2734                      binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
2735                      mkU8(8))
2736   ));
2737   assign(x5,
2738          binop(Iop_Or32,
2739                binop(Iop_Shl32,
2740                      binop(Iop_And32, mkexpr(x4), mkU32(c5)),
2741                      mkU8(16)),
2742                binop(Iop_Shr32,
2743                      binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
2744                      mkU8(16))
2745   ));
2746   return x5;
2747}
2748
2749
2750/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2751   0:1:2:3 (aka byte-swap). */
2752static IRTemp gen_REV ( IRTemp arg )
2753{
2754   IRTemp res = newTemp(Ity_I32);
2755   assign(res,
2756          binop(Iop_Or32,
2757                binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
2758          binop(Iop_Or32,
2759                binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2760                                 mkU32(0x00FF0000)),
2761          binop(Iop_Or32,
2762                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2763                                       mkU32(0x0000FF00)),
2764                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
2765                                       mkU32(0x000000FF) )
2766   ))));
2767   return res;
2768}
2769
2770
2771/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2772   2:3:0:1 (swap within lo and hi halves). */
2773static IRTemp gen_REV16 ( IRTemp arg )
2774{
2775   IRTemp res = newTemp(Ity_I32);
2776   assign(res,
2777          binop(Iop_Or32,
2778                binop(Iop_And32,
2779                      binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2780                      mkU32(0xFF00FF00)),
2781                binop(Iop_And32,
2782                      binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2783                      mkU32(0x00FF00FF))));
2784   return res;
2785}
2786
2787
2788/*------------------------------------------------------------*/
2789/*--- Advanced SIMD (NEON) instructions                    ---*/
2790/*------------------------------------------------------------*/
2791
2792/*------------------------------------------------------------*/
2793/*--- NEON data processing                                 ---*/
2794/*------------------------------------------------------------*/
2795
2796/* For all NEON DP ops, we use the normal scheme to handle conditional
2797   writes to registers -- pass in condT and hand that on to the
2798   put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
2799   since NEON is unconditional for ARM.  In Thumb mode condT is
2800   derived from the ITSTATE shift register in the normal way. */
2801
2802static
2803UInt get_neon_d_regno(UInt theInstr)
2804{
2805   UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2806   if (theInstr & 0x40) {
2807      if (x & 1) {
2808         x = x + 0x100;
2809      } else {
2810         x = x >> 1;
2811      }
2812   }
2813   return x;
2814}
2815
2816static
2817UInt get_neon_n_regno(UInt theInstr)
2818{
2819   UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
2820   if (theInstr & 0x40) {
2821      if (x & 1) {
2822         x = x + 0x100;
2823      } else {
2824         x = x >> 1;
2825      }
2826   }
2827   return x;
2828}
2829
2830static
2831UInt get_neon_m_regno(UInt theInstr)
2832{
2833   UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2834   if (theInstr & 0x40) {
2835      if (x & 1) {
2836         x = x + 0x100;
2837      } else {
2838         x = x >> 1;
2839      }
2840   }
2841   return x;
2842}
2843
2844static
2845Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
2846{
2847   UInt dreg = get_neon_d_regno(theInstr);
2848   UInt mreg = get_neon_m_regno(theInstr);
2849   UInt nreg = get_neon_n_regno(theInstr);
2850   UInt imm4 = (theInstr >> 8) & 0xf;
2851   UInt Q = (theInstr >> 6) & 1;
2852   HChar reg_t = Q ? 'q' : 'd';
2853
2854   if (Q) {
2855      putQReg(dreg, triop(Iop_ExtractV128, getQReg(nreg),
2856               getQReg(mreg), mkU8(imm4)), condT);
2857   } else {
2858      putDRegI64(dreg, triop(Iop_Extract64, getDRegI64(nreg),
2859                 getDRegI64(mreg), mkU8(imm4)), condT);
2860   }
2861   DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg,
2862                                         reg_t, mreg, imm4);
2863   return True;
2864}
2865
2866/* VTBL, VTBX */
2867static
2868Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
2869{
2870   UInt op = (theInstr >> 6) & 1;
2871   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
2872   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
2873   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
2874   UInt len = (theInstr >> 8) & 3;
2875   Int i;
2876   IROp cmp;
2877   ULong imm;
2878   IRTemp arg_l;
2879   IRTemp old_mask, new_mask, cur_mask;
2880   IRTemp old_res, new_res;
2881   IRTemp old_arg, new_arg;
2882
2883   if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
2884      return False;
2885   if (nreg + len > 31)
2886      return False;
2887
2888   cmp = Iop_CmpGT8Ux8;
2889
2890   old_mask = newTemp(Ity_I64);
2891   old_res = newTemp(Ity_I64);
2892   old_arg = newTemp(Ity_I64);
2893   assign(old_mask, mkU64(0));
2894   assign(old_res, mkU64(0));
2895   assign(old_arg, getDRegI64(mreg));
2896   imm = 8;
2897   imm = (imm <<  8) | imm;
2898   imm = (imm << 16) | imm;
2899   imm = (imm << 32) | imm;
2900
2901   for (i = 0; i <= len; i++) {
2902      arg_l = newTemp(Ity_I64);
2903      new_mask = newTemp(Ity_I64);
2904      cur_mask = newTemp(Ity_I64);
2905      new_res = newTemp(Ity_I64);
2906      new_arg = newTemp(Ity_I64);
2907      assign(arg_l, getDRegI64(nreg+i));
2908      assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
2909      assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
2910      assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
2911      assign(new_res, binop(Iop_Or64,
2912                            mkexpr(old_res),
2913                            binop(Iop_And64,
2914                                  binop(Iop_Perm8x8,
2915                                        mkexpr(arg_l),
2916                                        binop(Iop_And64,
2917                                              mkexpr(old_arg),
2918                                              mkexpr(cur_mask))),
2919                                  mkexpr(cur_mask))));
2920
2921      old_arg = new_arg;
2922      old_mask = new_mask;
2923      old_res = new_res;
2924   }
2925   if (op) {
2926      new_res = newTemp(Ity_I64);
2927      assign(new_res, binop(Iop_Or64,
2928                            binop(Iop_And64,
2929                                  getDRegI64(dreg),
2930                                  unop(Iop_Not64, mkexpr(old_mask))),
2931                            mkexpr(old_res)));
2932      old_res = new_res;
2933   }
2934
2935   putDRegI64(dreg, mkexpr(old_res), condT);
2936   DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
2937   if (len > 0) {
2938      DIP("d%u-d%u", nreg, nreg + len);
2939   } else {
2940      DIP("d%u", nreg);
2941   }
2942   DIP("}, d%u\n", mreg);
2943   return True;
2944}
2945
2946/* VDUP (scalar)  */
2947static
2948Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
2949{
2950   UInt Q = (theInstr >> 6) & 1;
2951   UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2952   UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2953   UInt imm4 = (theInstr >> 16) & 0xF;
2954   UInt index;
2955   UInt size;
2956   IRTemp arg_m;
2957   IRTemp res;
2958   IROp op, op2;
2959
2960   if ((imm4 == 0) || (imm4 == 8))
2961      return False;
2962   if ((Q == 1) && ((dreg & 1) == 1))
2963      return False;
2964   if (Q)
2965      dreg >>= 1;
2966   arg_m = newTemp(Ity_I64);
2967   assign(arg_m, getDRegI64(mreg));
2968   if (Q)
2969      res = newTemp(Ity_V128);
2970   else
2971      res = newTemp(Ity_I64);
2972   if ((imm4 & 1) == 1) {
2973      op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
2974      op2 = Iop_GetElem8x8;
2975      index = imm4 >> 1;
2976      size = 8;
2977   } else if ((imm4 & 3) == 2) {
2978      op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
2979      op2 = Iop_GetElem16x4;
2980      index = imm4 >> 2;
2981      size = 16;
2982   } else if ((imm4 & 7) == 4) {
2983      op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
2984      op2 = Iop_GetElem32x2;
2985      index = imm4 >> 3;
2986      size = 32;
2987   } else {
2988      return False; // can this ever happen?
2989   }
2990   assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
2991   if (Q) {
2992      putQReg(dreg, mkexpr(res), condT);
2993   } else {
2994      putDRegI64(dreg, mkexpr(res), condT);
2995   }
2996   DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
2997   return True;
2998}
2999
3000/* A7.4.1 Three registers of the same length */
3001static
3002Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
3003{
3004   UInt Q = (theInstr >> 6) & 1;
3005   UInt dreg = get_neon_d_regno(theInstr);
3006   UInt nreg = get_neon_n_regno(theInstr);
3007   UInt mreg = get_neon_m_regno(theInstr);
3008   UInt A = (theInstr >> 8) & 0xF;
3009   UInt B = (theInstr >> 4) & 1;
3010   UInt C = (theInstr >> 20) & 0x3;
3011   UInt U = (theInstr >> 24) & 1;
3012   UInt size = C;
3013
3014   IRTemp arg_n;
3015   IRTemp arg_m;
3016   IRTemp res;
3017
3018   if (Q) {
3019      arg_n = newTemp(Ity_V128);
3020      arg_m = newTemp(Ity_V128);
3021      res = newTemp(Ity_V128);
3022      assign(arg_n, getQReg(nreg));
3023      assign(arg_m, getQReg(mreg));
3024   } else {
3025      arg_n = newTemp(Ity_I64);
3026      arg_m = newTemp(Ity_I64);
3027      res = newTemp(Ity_I64);
3028      assign(arg_n, getDRegI64(nreg));
3029      assign(arg_m, getDRegI64(mreg));
3030   }
3031
3032   switch(A) {
3033      case 0:
3034         if (B == 0) {
3035            /* VHADD */
3036            ULong imm = 0;
3037            IRExpr *imm_val;
3038            IROp addOp;
3039            IROp andOp;
3040            IROp shOp;
3041            char regType = Q ? 'q' : 'd';
3042
3043            if (size == 3)
3044               return False;
3045            switch(size) {
3046               case 0: imm = 0x101010101010101LL; break;
3047               case 1: imm = 0x1000100010001LL; break;
3048               case 2: imm = 0x100000001LL; break;
3049               default: vassert(0);
3050            }
3051            if (Q) {
3052               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3053               andOp = Iop_AndV128;
3054            } else {
3055               imm_val = mkU64(imm);
3056               andOp = Iop_And64;
3057            }
3058            if (U) {
3059               switch(size) {
3060                  case 0:
3061                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3062                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3063                     break;
3064                  case 1:
3065                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3066                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3067                     break;
3068                  case 2:
3069                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3070                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3071                     break;
3072                  default:
3073                     vassert(0);
3074               }
3075            } else {
3076               switch(size) {
3077                  case 0:
3078                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3079                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3080                     break;
3081                  case 1:
3082                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3083                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3084                     break;
3085                  case 2:
3086                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3087                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3088                     break;
3089                  default:
3090                     vassert(0);
3091               }
3092            }
3093            assign(res,
3094                   binop(addOp,
3095                         binop(addOp,
3096                               binop(shOp, mkexpr(arg_m), mkU8(1)),
3097                               binop(shOp, mkexpr(arg_n), mkU8(1))),
3098                         binop(shOp,
3099                               binop(addOp,
3100                                     binop(andOp, mkexpr(arg_m), imm_val),
3101                                     binop(andOp, mkexpr(arg_n), imm_val)),
3102                               mkU8(1))));
3103            DIP("vhadd.%c%d %c%d, %c%d, %c%d\n",
3104                U ? 'u' : 's', 8 << size, regType,
3105                dreg, regType, nreg, regType, mreg);
3106         } else {
3107            /* VQADD */
3108            IROp op, op2;
3109            IRTemp tmp;
3110            char reg_t = Q ? 'q' : 'd';
3111            if (Q) {
3112               switch (size) {
3113                  case 0:
3114                     op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
3115                     op2 = Iop_Add8x16;
3116                     break;
3117                  case 1:
3118                     op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
3119                     op2 = Iop_Add16x8;
3120                     break;
3121                  case 2:
3122                     op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
3123                     op2 = Iop_Add32x4;
3124                     break;
3125                  case 3:
3126                     op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
3127                     op2 = Iop_Add64x2;
3128                     break;
3129                  default:
3130                     vassert(0);
3131               }
3132            } else {
3133               switch (size) {
3134                  case 0:
3135                     op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
3136                     op2 = Iop_Add8x8;
3137                     break;
3138                  case 1:
3139                     op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
3140                     op2 = Iop_Add16x4;
3141                     break;
3142                  case 2:
3143                     op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
3144                     op2 = Iop_Add32x2;
3145                     break;
3146                  case 3:
3147                     op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
3148                     op2 = Iop_Add64;
3149                     break;
3150                  default:
3151                     vassert(0);
3152               }
3153            }
3154            if (Q) {
3155               tmp = newTemp(Ity_V128);
3156            } else {
3157               tmp = newTemp(Ity_I64);
3158            }
3159            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3160#ifndef DISABLE_QC_FLAG
3161            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3162            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3163#endif
3164            DIP("vqadd.%c%d %c%d, %c%d, %c%d\n",
3165                U ? 'u' : 's',
3166                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3167         }
3168         break;
3169      case 1:
3170         if (B == 0) {
3171            /* VRHADD */
3172            /* VRHADD C, A, B ::=
3173                 C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
3174            IROp shift_op, add_op;
3175            IRTemp cc;
3176            ULong one = 1;
3177            HChar reg_t = Q ? 'q' : 'd';
3178            switch (size) {
3179               case 0: one = (one <<  8) | one; /* fall through */
3180               case 1: one = (one << 16) | one; /* fall through */
3181               case 2: one = (one << 32) | one; break;
3182               case 3: return False;
3183               default: vassert(0);
3184            }
3185            if (Q) {
3186               switch (size) {
3187                  case 0:
3188                     shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
3189                     add_op = Iop_Add8x16;
3190                     break;
3191                  case 1:
3192                     shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
3193                     add_op = Iop_Add16x8;
3194                     break;
3195                  case 2:
3196                     shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
3197                     add_op = Iop_Add32x4;
3198                     break;
3199                  case 3:
3200                     return False;
3201                  default:
3202                     vassert(0);
3203               }
3204            } else {
3205               switch (size) {
3206                  case 0:
3207                     shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
3208                     add_op = Iop_Add8x8;
3209                     break;
3210                  case 1:
3211                     shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
3212                     add_op = Iop_Add16x4;
3213                     break;
3214                  case 2:
3215                     shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
3216                     add_op = Iop_Add32x2;
3217                     break;
3218                  case 3:
3219                     return False;
3220                  default:
3221                     vassert(0);
3222               }
3223            }
3224            if (Q) {
3225               cc = newTemp(Ity_V128);
3226               assign(cc, binop(shift_op,
3227                                binop(add_op,
3228                                      binop(add_op,
3229                                            binop(Iop_AndV128,
3230                                                  mkexpr(arg_n),
3231                                                  binop(Iop_64HLtoV128,
3232                                                        mkU64(one),
3233                                                        mkU64(one))),
3234                                            binop(Iop_AndV128,
3235                                                  mkexpr(arg_m),
3236                                                  binop(Iop_64HLtoV128,
3237                                                        mkU64(one),
3238                                                        mkU64(one)))),
3239                                      binop(Iop_64HLtoV128,
3240                                            mkU64(one),
3241                                            mkU64(one))),
3242                                mkU8(1)));
3243               assign(res, binop(add_op,
3244                                 binop(add_op,
3245                                       binop(shift_op,
3246                                             mkexpr(arg_n),
3247                                             mkU8(1)),
3248                                       binop(shift_op,
3249                                             mkexpr(arg_m),
3250                                             mkU8(1))),
3251                                 mkexpr(cc)));
3252            } else {
3253               cc = newTemp(Ity_I64);
3254               assign(cc, binop(shift_op,
3255                                binop(add_op,
3256                                      binop(add_op,
3257                                            binop(Iop_And64,
3258                                                  mkexpr(arg_n),
3259                                                  mkU64(one)),
3260                                            binop(Iop_And64,
3261                                                  mkexpr(arg_m),
3262                                                  mkU64(one))),
3263                                      mkU64(one)),
3264                                mkU8(1)));
3265               assign(res, binop(add_op,
3266                                 binop(add_op,
3267                                       binop(shift_op,
3268                                             mkexpr(arg_n),
3269                                             mkU8(1)),
3270                                       binop(shift_op,
3271                                             mkexpr(arg_m),
3272                                             mkU8(1))),
3273                                 mkexpr(cc)));
3274            }
3275            DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n",
3276                U ? 'u' : 's',
3277                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3278         } else {
3279            if (U == 0)  {
3280               switch(C) {
3281                  case 0: {
3282                     /* VAND  */
3283                     HChar reg_t = Q ? 'q' : 'd';
3284                     if (Q) {
3285                        assign(res, binop(Iop_AndV128, mkexpr(arg_n),
3286                                                       mkexpr(arg_m)));
3287                     } else {
3288                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3289                                                     mkexpr(arg_m)));
3290                     }
3291                     DIP("vand %c%d, %c%d, %c%d\n",
3292                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3293                     break;
3294                  }
3295                  case 1: {
3296                     /* VBIC  */
3297                     HChar reg_t = Q ? 'q' : 'd';
3298                     if (Q) {
3299                        assign(res, binop(Iop_AndV128,mkexpr(arg_n),
3300                               unop(Iop_NotV128, mkexpr(arg_m))));
3301                     } else {
3302                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3303                               unop(Iop_Not64, mkexpr(arg_m))));
3304                     }
3305                     DIP("vbic %c%d, %c%d, %c%d\n",
3306                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3307                     break;
3308                  }
3309                  case 2:
3310                     if ( nreg != mreg) {
3311                        /* VORR  */
3312                        HChar reg_t = Q ? 'q' : 'd';
3313                        if (Q) {
3314                           assign(res, binop(Iop_OrV128, mkexpr(arg_n),
3315                                                         mkexpr(arg_m)));
3316                        } else {
3317                           assign(res, binop(Iop_Or64, mkexpr(arg_n),
3318                                                       mkexpr(arg_m)));
3319                        }
3320                        DIP("vorr %c%d, %c%d, %c%d\n",
3321                            reg_t, dreg, reg_t, nreg, reg_t, mreg);
3322                     } else {
3323                        /* VMOV  */
3324                        HChar reg_t = Q ? 'q' : 'd';
3325                        assign(res, mkexpr(arg_m));
3326                        DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg);
3327                     }
3328                     break;
3329                  case 3:{
3330                     /* VORN  */
3331                     HChar reg_t = Q ? 'q' : 'd';
3332                     if (Q) {
3333                        assign(res, binop(Iop_OrV128,mkexpr(arg_n),
3334                               unop(Iop_NotV128, mkexpr(arg_m))));
3335                     } else {
3336                        assign(res, binop(Iop_Or64, mkexpr(arg_n),
3337                               unop(Iop_Not64, mkexpr(arg_m))));
3338                     }
3339                     DIP("vorn %c%d, %c%d, %c%d\n",
3340                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3341                     break;
3342                  }
3343               }
3344            } else {
3345               switch(C) {
3346                  case 0:
3347                     /* VEOR (XOR)  */
3348                     if (Q) {
3349                        assign(res, binop(Iop_XorV128, mkexpr(arg_n),
3350                                                       mkexpr(arg_m)));
3351                     } else {
3352                        assign(res, binop(Iop_Xor64, mkexpr(arg_n),
3353                                                     mkexpr(arg_m)));
3354                     }
3355                     DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
3356                           Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3357                     break;
3358                  case 1:
3359                     /* VBSL  */
3360                     if (Q) {
3361                        IRTemp reg_d = newTemp(Ity_V128);
3362                        assign(reg_d, getQReg(dreg));
3363                        assign(res,
3364                               binop(Iop_OrV128,
3365                                     binop(Iop_AndV128, mkexpr(arg_n),
3366                                                        mkexpr(reg_d)),
3367                                     binop(Iop_AndV128,
3368                                           mkexpr(arg_m),
3369                                           unop(Iop_NotV128,
3370                                                 mkexpr(reg_d)) ) ) );
3371                     } else {
3372                        IRTemp reg_d = newTemp(Ity_I64);
3373                        assign(reg_d, getDRegI64(dreg));
3374                        assign(res,
3375                               binop(Iop_Or64,
3376                                     binop(Iop_And64, mkexpr(arg_n),
3377                                                      mkexpr(reg_d)),
3378                                     binop(Iop_And64,
3379                                           mkexpr(arg_m),
3380                                           unop(Iop_Not64, mkexpr(reg_d)))));
3381                     }
3382                     DIP("vbsl %c%u, %c%u, %c%u\n",
3383                         Q ? 'q' : 'd', dreg,
3384                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3385                     break;
3386                  case 2:
3387                     /* VBIT  */
3388                     if (Q) {
3389                        IRTemp reg_d = newTemp(Ity_V128);
3390                        assign(reg_d, getQReg(dreg));
3391                        assign(res,
3392                               binop(Iop_OrV128,
3393                                     binop(Iop_AndV128, mkexpr(arg_n),
3394                                                        mkexpr(arg_m)),
3395                                     binop(Iop_AndV128,
3396                                           mkexpr(reg_d),
3397                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3398                     } else {
3399                        IRTemp reg_d = newTemp(Ity_I64);
3400                        assign(reg_d, getDRegI64(dreg));
3401                        assign(res,
3402                               binop(Iop_Or64,
3403                                     binop(Iop_And64, mkexpr(arg_n),
3404                                                      mkexpr(arg_m)),
3405                                     binop(Iop_And64,
3406                                           mkexpr(reg_d),
3407                                           unop(Iop_Not64, mkexpr(arg_m)))));
3408                     }
3409                     DIP("vbit %c%u, %c%u, %c%u\n",
3410                         Q ? 'q' : 'd', dreg,
3411                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3412                     break;
3413                  case 3:
3414                     /* VBIF  */
3415                     if (Q) {
3416                        IRTemp reg_d = newTemp(Ity_V128);
3417                        assign(reg_d, getQReg(dreg));
3418                        assign(res,
3419                               binop(Iop_OrV128,
3420                                     binop(Iop_AndV128, mkexpr(reg_d),
3421                                                        mkexpr(arg_m)),
3422                                     binop(Iop_AndV128,
3423                                           mkexpr(arg_n),
3424                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3425                     } else {
3426                        IRTemp reg_d = newTemp(Ity_I64);
3427                        assign(reg_d, getDRegI64(dreg));
3428                        assign(res,
3429                               binop(Iop_Or64,
3430                                     binop(Iop_And64, mkexpr(reg_d),
3431                                                      mkexpr(arg_m)),
3432                                     binop(Iop_And64,
3433                                           mkexpr(arg_n),
3434                                           unop(Iop_Not64, mkexpr(arg_m)))));
3435                     }
3436                     DIP("vbif %c%u, %c%u, %c%u\n",
3437                         Q ? 'q' : 'd', dreg,
3438                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3439                     break;
3440               }
3441            }
3442         }
3443         break;
3444      case 2:
3445         if (B == 0) {
3446            /* VHSUB */
3447            /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
3448            ULong imm = 0;
3449            IRExpr *imm_val;
3450            IROp subOp;
3451            IROp notOp;
3452            IROp andOp;
3453            IROp shOp;
3454            if (size == 3)
3455               return False;
3456            switch(size) {
3457               case 0: imm = 0x101010101010101LL; break;
3458               case 1: imm = 0x1000100010001LL; break;
3459               case 2: imm = 0x100000001LL; break;
3460               default: vassert(0);
3461            }
3462            if (Q) {
3463               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3464               andOp = Iop_AndV128;
3465               notOp = Iop_NotV128;
3466            } else {
3467               imm_val = mkU64(imm);
3468               andOp = Iop_And64;
3469               notOp = Iop_Not64;
3470            }
3471            if (U) {
3472               switch(size) {
3473                  case 0:
3474                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3475                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3476                     break;
3477                  case 1:
3478                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3479                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3480                     break;
3481                  case 2:
3482                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3483                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3484                     break;
3485                  default:
3486                     vassert(0);
3487               }
3488            } else {
3489               switch(size) {
3490                  case 0:
3491                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3492                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3493                     break;
3494                  case 1:
3495                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3496                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3497                     break;
3498                  case 2:
3499                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3500                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3501                     break;
3502                  default:
3503                     vassert(0);
3504               }
3505            }
3506            assign(res,
3507                   binop(subOp,
3508                         binop(subOp,
3509                               binop(shOp, mkexpr(arg_n), mkU8(1)),
3510                               binop(shOp, mkexpr(arg_m), mkU8(1))),
3511                         binop(andOp,
3512                               binop(andOp,
3513                                     unop(notOp, mkexpr(arg_n)),
3514                                     mkexpr(arg_m)),
3515                               imm_val)));
3516            DIP("vhsub.%c%u %c%u, %c%u, %c%u\n",
3517                U ? 'u' : 's', 8 << size,
3518                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3519                mreg);
3520         } else {
3521            /* VQSUB */
3522            IROp op, op2;
3523            IRTemp tmp;
3524            if (Q) {
3525               switch (size) {
3526                  case 0:
3527                     op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
3528                     op2 = Iop_Sub8x16;
3529                     break;
3530                  case 1:
3531                     op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
3532                     op2 = Iop_Sub16x8;
3533                     break;
3534                  case 2:
3535                     op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
3536                     op2 = Iop_Sub32x4;
3537                     break;
3538                  case 3:
3539                     op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
3540                     op2 = Iop_Sub64x2;
3541                     break;
3542                  default:
3543                     vassert(0);
3544               }
3545            } else {
3546               switch (size) {
3547                  case 0:
3548                     op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
3549                     op2 = Iop_Sub8x8;
3550                     break;
3551                  case 1:
3552                     op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
3553                     op2 = Iop_Sub16x4;
3554                     break;
3555                  case 2:
3556                     op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
3557                     op2 = Iop_Sub32x2;
3558                     break;
3559                  case 3:
3560                     op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
3561                     op2 = Iop_Sub64;
3562                     break;
3563                  default:
3564                     vassert(0);
3565               }
3566            }
3567            if (Q)
3568               tmp = newTemp(Ity_V128);
3569            else
3570               tmp = newTemp(Ity_I64);
3571            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3572#ifndef DISABLE_QC_FLAG
3573            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3574            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3575#endif
3576            DIP("vqsub.%c%u %c%u, %c%u, %c%u\n",
3577                U ? 'u' : 's', 8 << size,
3578                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3579                mreg);
3580         }
3581         break;
3582      case 3: {
3583            IROp op;
3584            if (Q) {
3585               switch (size) {
3586                  case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
3587                  case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
3588                  case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
3589                  case 3: return False;
3590                  default: vassert(0);
3591               }
3592            } else {
3593               switch (size) {
3594                  case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
3595                  case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
3596                  case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
3597                  case 3: return False;
3598                  default: vassert(0);
3599               }
3600            }
3601            if (B == 0) {
3602               /* VCGT  */
3603               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3604               DIP("vcgt.%c%u %c%u, %c%u, %c%u\n",
3605                   U ? 'u' : 's', 8 << size,
3606                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3607                   mreg);
3608            } else {
3609               /* VCGE  */
3610               /* VCGE res, argn, argm
3611                    is equal to
3612                  VCGT tmp, argm, argn
3613                  VNOT res, tmp */
3614               assign(res,
3615                      unop(Q ? Iop_NotV128 : Iop_Not64,
3616                           binop(op, mkexpr(arg_m), mkexpr(arg_n))));
3617               DIP("vcge.%c%u %c%u, %c%u, %c%u\n",
3618                   U ? 'u' : 's', 8 << size,
3619                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3620                   mreg);
3621            }
3622         }
3623         break;
3624      case 4:
3625         if (B == 0) {
3626            /* VSHL */
3627            IROp op, sub_op;
3628            IRTemp tmp;
3629            if (U) {
3630               switch (size) {
3631                  case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
3632                  case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
3633                  case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
3634                  case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
3635                  default: vassert(0);
3636               }
3637            } else {
3638               tmp = newTemp(Q ? Ity_V128 : Ity_I64);
3639               switch (size) {
3640                  case 0:
3641                     op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3642                     sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3643                     break;
3644                  case 1:
3645                     op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3646                     sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3647                     break;
3648                  case 2:
3649                     op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3650                     sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3651                     break;
3652                  case 3:
3653                     op = Q ? Iop_Sar64x2 : Iop_Sar64;
3654                     sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
3655                     break;
3656                  default:
3657                     vassert(0);
3658               }
3659            }
3660            if (U) {
3661               if (!Q && (size == 3))
3662                  assign(res, binop(op, mkexpr(arg_m),
3663                                        unop(Iop_64to8, mkexpr(arg_n))));
3664               else
3665                  assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3666            } else {
3667               if (Q)
3668                  assign(tmp, binop(sub_op,
3669                                    binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
3670                                    mkexpr(arg_n)));
3671               else
3672                  assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
3673               if (!Q && (size == 3))
3674                  assign(res, binop(op, mkexpr(arg_m),
3675                                        unop(Iop_64to8, mkexpr(tmp))));
3676               else
3677                  assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
3678            }
3679            DIP("vshl.%c%u %c%u, %c%u, %c%u\n",
3680                U ? 'u' : 's', 8 << size,
3681                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3682                nreg);
3683         } else {
3684            /* VQSHL */
3685            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
3686            IRTemp tmp, shval, mask, old_shval;
3687            UInt i;
3688            ULong esize;
3689            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3690            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3691            if (U) {
3692               switch (size) {
3693                  case 0:
3694                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
3695                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
3696                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3697                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3698                     break;
3699                  case 1:
3700                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
3701                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
3702                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3703                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3704                     break;
3705                  case 2:
3706                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
3707                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
3708                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3709                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3710                     break;
3711                  case 3:
3712                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
3713                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
3714                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3715                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3716                     break;
3717                  default:
3718                     vassert(0);
3719               }
3720            } else {
3721               switch (size) {
3722                  case 0:
3723                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
3724                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3725                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3726                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3727                     break;
3728                  case 1:
3729                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
3730                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3731                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3732                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3733                     break;
3734                  case 2:
3735                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
3736                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3737                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3738                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3739                     break;
3740                  case 3:
3741                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
3742                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
3743                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3744                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3745                     break;
3746                  default:
3747                     vassert(0);
3748               }
3749            }
3750            if (Q) {
3751               tmp = newTemp(Ity_V128);
3752               shval = newTemp(Ity_V128);
3753               mask = newTemp(Ity_V128);
3754            } else {
3755               tmp = newTemp(Ity_I64);
3756               shval = newTemp(Ity_I64);
3757               mask = newTemp(Ity_I64);
3758            }
3759            assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3760#ifndef DISABLE_QC_FLAG
3761            /* Only least significant byte from second argument is used.
3762               Copy this byte to the whole vector element. */
3763            assign(shval, binop(op_shrn,
3764                                binop(op_shln,
3765                                       mkexpr(arg_n),
3766                                       mkU8((8 << size) - 8)),
3767                                mkU8((8 << size) - 8)));
3768            for(i = 0; i < size; i++) {
3769               old_shval = shval;
3770               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3771               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3772                                   mkexpr(old_shval),
3773                                   binop(op_shln,
3774                                         mkexpr(old_shval),
3775                                         mkU8(8 << i))));
3776            }
3777            /* If shift is greater or equal to the element size and
3778               element is non-zero, then QC flag should be set. */
3779            esize = (8 << size) - 1;
3780            esize = (esize <<  8) | esize;
3781            esize = (esize << 16) | esize;
3782            esize = (esize << 32) | esize;
3783            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3784                             binop(cmp_gt, mkexpr(shval),
3785                                           Q ? mkU128(esize) : mkU64(esize)),
3786                             unop(cmp_neq, mkexpr(arg_m))),
3787                       Q ? mkU128(0) : mkU64(0),
3788                       Q, condT);
3789            /* Othervise QC flag should be set if shift value is positive and
3790               result beign rightshifted the same value is not equal to left
3791               argument. */
3792            assign(mask, binop(cmp_gt, mkexpr(shval),
3793                                       Q ? mkU128(0) : mkU64(0)));
3794            if (!Q && size == 3)
3795               assign(tmp, binop(op_rev, mkexpr(res),
3796                                         unop(Iop_64to8, mkexpr(arg_n))));
3797            else
3798               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
3799            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3800                             mkexpr(tmp), mkexpr(mask)),
3801                       binop(Q ? Iop_AndV128 : Iop_And64,
3802                             mkexpr(arg_m), mkexpr(mask)),
3803                       Q, condT);
3804#endif
3805            DIP("vqshl.%c%u %c%u, %c%u, %c%u\n",
3806                U ? 'u' : 's', 8 << size,
3807                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3808                nreg);
3809         }
3810         break;
3811      case 5:
3812         if (B == 0) {
3813            /* VRSHL */
3814            IROp op, op_shrn, op_shln, cmp_gt, op_add;
3815            IRTemp shval, old_shval, imm_val, round;
3816            UInt i;
3817            ULong imm;
3818            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3819            imm = 1L;
3820            switch (size) {
3821               case 0: imm = (imm <<  8) | imm; /* fall through */
3822               case 1: imm = (imm << 16) | imm; /* fall through */
3823               case 2: imm = (imm << 32) | imm; /* fall through */
3824               case 3: break;
3825               default: vassert(0);
3826            }
3827            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3828            round = newTemp(Q ? Ity_V128 : Ity_I64);
3829            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3830            if (U) {
3831               switch (size) {
3832                  case 0:
3833                     op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
3834                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3835                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3836                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3837                     break;
3838                  case 1:
3839                     op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
3840                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3841                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3842                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3843                     break;
3844                  case 2:
3845                     op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
3846                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3847                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3848                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3849                     break;
3850                  case 3:
3851                     op = Q ? Iop_Shl64x2 : Iop_Shl64;
3852                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3853                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3854                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3855                     break;
3856                  default:
3857                     vassert(0);
3858               }
3859            } else {
3860               switch (size) {
3861                  case 0:
3862                     op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
3863                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3864                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3865                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3866                     break;
3867                  case 1:
3868                     op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
3869                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3870                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3871                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3872                     break;
3873                  case 2:
3874                     op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
3875                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3876                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3877                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3878                     break;
3879                  case 3:
3880                     op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
3881                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3882                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3883                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3884                     break;
3885                  default:
3886                     vassert(0);
3887               }
3888            }
3889            if (Q) {
3890               shval = newTemp(Ity_V128);
3891            } else {
3892               shval = newTemp(Ity_I64);
3893            }
3894            /* Only least significant byte from second argument is used.
3895               Copy this byte to the whole vector element. */
3896            assign(shval, binop(op_shrn,
3897                                binop(op_shln,
3898                                       mkexpr(arg_n),
3899                                       mkU8((8 << size) - 8)),
3900                                mkU8((8 << size) - 8)));
3901            for (i = 0; i < size; i++) {
3902               old_shval = shval;
3903               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3904               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3905                                   mkexpr(old_shval),
3906                                   binop(op_shln,
3907                                         mkexpr(old_shval),
3908                                         mkU8(8 << i))));
3909            }
3910            /* Compute the result */
3911            if (!Q && size == 3 && U) {
3912               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3913                                   binop(op,
3914                                         mkexpr(arg_m),
3915                                         unop(Iop_64to8,
3916                                              binop(op_add,
3917                                                    mkexpr(arg_n),
3918                                                    mkexpr(imm_val)))),
3919                                   binop(Q ? Iop_AndV128 : Iop_And64,
3920                                         mkexpr(imm_val),
3921                                         binop(cmp_gt,
3922                                               Q ? mkU128(0) : mkU64(0),
3923                                               mkexpr(arg_n)))));
3924               assign(res, binop(op_add,
3925                                 binop(op,
3926                                       mkexpr(arg_m),
3927                                       unop(Iop_64to8, mkexpr(arg_n))),
3928                                 mkexpr(round)));
3929            } else {
3930               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3931                                   binop(op,
3932                                         mkexpr(arg_m),
3933                                         binop(op_add,
3934                                               mkexpr(arg_n),
3935                                               mkexpr(imm_val))),
3936                                   binop(Q ? Iop_AndV128 : Iop_And64,
3937                                         mkexpr(imm_val),
3938                                         binop(cmp_gt,
3939                                               Q ? mkU128(0) : mkU64(0),
3940                                               mkexpr(arg_n)))));
3941               assign(res, binop(op_add,
3942                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
3943                                 mkexpr(round)));
3944            }
3945            DIP("vrshl.%c%u %c%u, %c%u, %c%u\n",
3946                U ? 'u' : 's', 8 << size,
3947                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3948                nreg);
3949         } else {
3950            /* VQRSHL */
3951            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
3952            IRTemp tmp, shval, mask, old_shval, imm_val, round;
3953            UInt i;
3954            ULong esize, imm;
3955            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3956            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3957            imm = 1L;
3958            switch (size) {
3959               case 0: imm = (imm <<  8) | imm; /* fall through */
3960               case 1: imm = (imm << 16) | imm; /* fall through */
3961               case 2: imm = (imm << 32) | imm; /* fall through */
3962               case 3: break;
3963               default: vassert(0);
3964            }
3965            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3966            round = newTemp(Q ? Ity_V128 : Ity_I64);
3967            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3968            if (U) {
3969               switch (size) {
3970                  case 0:
3971                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
3972                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3973                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
3974                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3975                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3976                     break;
3977                  case 1:
3978                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
3979                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3980                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
3981                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3982                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3983                     break;
3984                  case 2:
3985                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
3986                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3987                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
3988                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3989                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3990                     break;
3991                  case 3:
3992                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
3993                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3994                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
3995                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3996                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3997                     break;
3998                  default:
3999                     vassert(0);
4000               }
4001            } else {
4002               switch (size) {
4003                  case 0:
4004                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
4005                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4006                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
4007                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4008                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4009                     break;
4010                  case 1:
4011                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
4012                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4013                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
4014                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4015                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4016                     break;
4017                  case 2:
4018                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
4019                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4020                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
4021                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4022                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4023                     break;
4024                  case 3:
4025                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
4026                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
4027                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
4028                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4029                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4030                     break;
4031                  default:
4032                     vassert(0);
4033               }
4034            }
4035            if (Q) {
4036               tmp = newTemp(Ity_V128);
4037               shval = newTemp(Ity_V128);
4038               mask = newTemp(Ity_V128);
4039            } else {
4040               tmp = newTemp(Ity_I64);
4041               shval = newTemp(Ity_I64);
4042               mask = newTemp(Ity_I64);
4043            }
4044            /* Only least significant byte from second argument is used.
4045               Copy this byte to the whole vector element. */
4046            assign(shval, binop(op_shrn,
4047                                binop(op_shln,
4048                                       mkexpr(arg_n),
4049                                       mkU8((8 << size) - 8)),
4050                                mkU8((8 << size) - 8)));
4051            for (i = 0; i < size; i++) {
4052               old_shval = shval;
4053               shval = newTemp(Q ? Ity_V128 : Ity_I64);
4054               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
4055                                   mkexpr(old_shval),
4056                                   binop(op_shln,
4057                                         mkexpr(old_shval),
4058                                         mkU8(8 << i))));
4059            }
4060            /* Compute the result */
4061            assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4062                                binop(op,
4063                                      mkexpr(arg_m),
4064                                      binop(op_add,
4065                                            mkexpr(arg_n),
4066                                            mkexpr(imm_val))),
4067                                binop(Q ? Iop_AndV128 : Iop_And64,
4068                                      mkexpr(imm_val),
4069                                      binop(cmp_gt,
4070                                            Q ? mkU128(0) : mkU64(0),
4071                                            mkexpr(arg_n)))));
4072            assign(res, binop(op_add,
4073                              binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4074                              mkexpr(round)));
4075#ifndef DISABLE_QC_FLAG
4076            /* If shift is greater or equal to the element size and element is
4077               non-zero, then QC flag should be set. */
4078            esize = (8 << size) - 1;
4079            esize = (esize <<  8) | esize;
4080            esize = (esize << 16) | esize;
4081            esize = (esize << 32) | esize;
4082            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4083                             binop(cmp_gt, mkexpr(shval),
4084                                           Q ? mkU128(esize) : mkU64(esize)),
4085                             unop(cmp_neq, mkexpr(arg_m))),
4086                       Q ? mkU128(0) : mkU64(0),
4087                       Q, condT);
4088            /* Othervise QC flag should be set if shift value is positive and
4089               result beign rightshifted the same value is not equal to left
4090               argument. */
4091            assign(mask, binop(cmp_gt, mkexpr(shval),
4092                               Q ? mkU128(0) : mkU64(0)));
4093            if (!Q && size == 3)
4094               assign(tmp, binop(op_rev, mkexpr(res),
4095                                         unop(Iop_64to8, mkexpr(arg_n))));
4096            else
4097               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
4098            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4099                             mkexpr(tmp), mkexpr(mask)),
4100                       binop(Q ? Iop_AndV128 : Iop_And64,
4101                             mkexpr(arg_m), mkexpr(mask)),
4102                       Q, condT);
4103#endif
4104            DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n",
4105                U ? 'u' : 's', 8 << size,
4106                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4107                nreg);
4108         }
4109         break;
4110      case 6:
4111         /* VMAX, VMIN  */
4112         if (B == 0) {
4113            /* VMAX */
4114            IROp op;
4115            if (U == 0) {
4116               switch (size) {
4117                  case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
4118                  case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
4119                  case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
4120                  case 3: return False;
4121                  default: vassert(0);
4122               }
4123            } else {
4124               switch (size) {
4125                  case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
4126                  case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
4127                  case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
4128                  case 3: return False;
4129                  default: vassert(0);
4130               }
4131            }
4132            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4133            DIP("vmax.%c%u %c%u, %c%u, %c%u\n",
4134                U ? 'u' : 's', 8 << size,
4135                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4136                mreg);
4137         } else {
4138            /* VMIN */
4139            IROp op;
4140            if (U == 0) {
4141               switch (size) {
4142                  case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
4143                  case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
4144                  case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
4145                  case 3: return False;
4146                  default: vassert(0);
4147               }
4148            } else {
4149               switch (size) {
4150                  case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
4151                  case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
4152                  case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
4153                  case 3: return False;
4154                  default: vassert(0);
4155               }
4156            }
4157            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4158            DIP("vmin.%c%u %c%u, %c%u, %c%u\n",
4159                U ? 'u' : 's', 8 << size,
4160                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4161                mreg);
4162         }
4163         break;
4164      case 7:
4165         if (B == 0) {
4166            /* VABD */
4167            IROp op_cmp, op_sub;
4168            IRTemp cond;
4169            if ((theInstr >> 23) & 1) {
4170               vpanic("VABDL should not be in dis_neon_data_3same\n");
4171            }
4172            if (Q) {
4173               switch (size) {
4174                  case 0:
4175                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4176                     op_sub = Iop_Sub8x16;
4177                     break;
4178                  case 1:
4179                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4180                     op_sub = Iop_Sub16x8;
4181                     break;
4182                  case 2:
4183                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4184                     op_sub = Iop_Sub32x4;
4185                     break;
4186                  case 3:
4187                     return False;
4188                  default:
4189                     vassert(0);
4190               }
4191            } else {
4192               switch (size) {
4193                  case 0:
4194                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4195                     op_sub = Iop_Sub8x8;
4196                     break;
4197                  case 1:
4198                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4199                     op_sub = Iop_Sub16x4;
4200                     break;
4201                  case 2:
4202                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4203                     op_sub = Iop_Sub32x2;
4204                     break;
4205                  case 3:
4206                     return False;
4207                  default:
4208                     vassert(0);
4209               }
4210            }
4211            if (Q) {
4212               cond = newTemp(Ity_V128);
4213            } else {
4214               cond = newTemp(Ity_I64);
4215            }
4216            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4217            assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
4218                              binop(Q ? Iop_AndV128 : Iop_And64,
4219                                    binop(op_sub, mkexpr(arg_n),
4220                                                  mkexpr(arg_m)),
4221                                    mkexpr(cond)),
4222                              binop(Q ? Iop_AndV128 : Iop_And64,
4223                                    binop(op_sub, mkexpr(arg_m),
4224                                                  mkexpr(arg_n)),
4225                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4226                                         mkexpr(cond)))));
4227            DIP("vabd.%c%u %c%u, %c%u, %c%u\n",
4228                U ? 'u' : 's', 8 << size,
4229                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4230                mreg);
4231         } else {
4232            /* VABA */
4233            IROp op_cmp, op_sub, op_add;
4234            IRTemp cond, acc, tmp;
4235            if ((theInstr >> 23) & 1) {
4236               vpanic("VABAL should not be in dis_neon_data_3same");
4237            }
4238            if (Q) {
4239               switch (size) {
4240                  case 0:
4241                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4242                     op_sub = Iop_Sub8x16;
4243                     op_add = Iop_Add8x16;
4244                     break;
4245                  case 1:
4246                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4247                     op_sub = Iop_Sub16x8;
4248                     op_add = Iop_Add16x8;
4249                     break;
4250                  case 2:
4251                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4252                     op_sub = Iop_Sub32x4;
4253                     op_add = Iop_Add32x4;
4254                     break;
4255                  case 3:
4256                     return False;
4257                  default:
4258                     vassert(0);
4259               }
4260            } else {
4261               switch (size) {
4262                  case 0:
4263                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4264                     op_sub = Iop_Sub8x8;
4265                     op_add = Iop_Add8x8;
4266                     break;
4267                  case 1:
4268                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4269                     op_sub = Iop_Sub16x4;
4270                     op_add = Iop_Add16x4;
4271                     break;
4272                  case 2:
4273                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4274                     op_sub = Iop_Sub32x2;
4275                     op_add = Iop_Add32x2;
4276                     break;
4277                  case 3:
4278                     return False;
4279                  default:
4280                     vassert(0);
4281               }
4282            }
4283            if (Q) {
4284               cond = newTemp(Ity_V128);
4285               acc = newTemp(Ity_V128);
4286               tmp = newTemp(Ity_V128);
4287               assign(acc, getQReg(dreg));
4288            } else {
4289               cond = newTemp(Ity_I64);
4290               acc = newTemp(Ity_I64);
4291               tmp = newTemp(Ity_I64);
4292               assign(acc, getDRegI64(dreg));
4293            }
4294            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4295            assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
4296                              binop(Q ? Iop_AndV128 : Iop_And64,
4297                                    binop(op_sub, mkexpr(arg_n),
4298                                                  mkexpr(arg_m)),
4299                                    mkexpr(cond)),
4300                              binop(Q ? Iop_AndV128 : Iop_And64,
4301                                    binop(op_sub, mkexpr(arg_m),
4302                                                  mkexpr(arg_n)),
4303                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4304                                         mkexpr(cond)))));
4305            assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
4306            DIP("vaba.%c%u %c%u, %c%u, %c%u\n",
4307                U ? 'u' : 's', 8 << size,
4308                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4309                mreg);
4310         }
4311         break;
4312      case 8:
4313         if (B == 0) {
4314            IROp op;
4315            if (U == 0) {
4316               /* VADD  */
4317               switch (size) {
4318                  case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
4319                  case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
4320                  case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
4321                  case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
4322                  default: vassert(0);
4323               }
4324               DIP("vadd.i%u %c%u, %c%u, %c%u\n",
4325                   8 << size, Q ? 'q' : 'd',
4326                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4327            } else {
4328               /* VSUB  */
4329               switch (size) {
4330                  case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
4331                  case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
4332                  case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
4333                  case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
4334                  default: vassert(0);
4335               }
4336               DIP("vsub.i%u %c%u, %c%u, %c%u\n",
4337                   8 << size, Q ? 'q' : 'd',
4338                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4339            }
4340            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4341         } else {
4342            IROp op;
4343            switch (size) {
4344               case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
4345               case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
4346               case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
4347               case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
4348               default: vassert(0);
4349            }
4350            if (U == 0) {
4351               /* VTST  */
4352               assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
4353                                          mkexpr(arg_n),
4354                                          mkexpr(arg_m))));
4355               DIP("vtst.%u %c%u, %c%u, %c%u\n",
4356                   8 << size, Q ? 'q' : 'd',
4357                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4358            } else {
4359               /* VCEQ  */
4360               assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
4361                                unop(op,
4362                                     binop(Q ? Iop_XorV128 : Iop_Xor64,
4363                                           mkexpr(arg_n),
4364                                           mkexpr(arg_m)))));
4365               DIP("vceq.i%u %c%u, %c%u, %c%u\n",
4366                   8 << size, Q ? 'q' : 'd',
4367                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4368            }
4369         }
4370         break;
4371      case 9:
4372         if (B == 0) {
4373            /* VMLA, VMLS (integer) */
4374            IROp op, op2;
4375            UInt P = (theInstr >> 24) & 1;
4376            if (P) {
4377               switch (size) {
4378                  case 0:
4379                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4380                     op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
4381                     break;
4382                  case 1:
4383                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4384                     op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
4385                     break;
4386                  case 2:
4387                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4388                     op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
4389                     break;
4390                  case 3:
4391                     return False;
4392                  default:
4393                     vassert(0);
4394               }
4395            } else {
4396               switch (size) {
4397                  case 0:
4398                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4399                     op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
4400                     break;
4401                  case 1:
4402                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4403                     op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
4404                     break;
4405                  case 2:
4406                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4407                     op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
4408                     break;
4409                  case 3:
4410                     return False;
4411                  default:
4412                     vassert(0);
4413               }
4414            }
4415            assign(res, binop(op2,
4416                              Q ? getQReg(dreg) : getDRegI64(dreg),
4417                              binop(op, mkexpr(arg_n), mkexpr(arg_m))));
4418            DIP("vml%c.i%u %c%u, %c%u, %c%u\n",
4419                P ? 's' : 'a', 8 << size,
4420                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4421                mreg);
4422         } else {
4423            /* VMUL */
4424            IROp op;
4425            UInt P = (theInstr >> 24) & 1;
4426            if (P) {
4427               switch (size) {
4428                  case 0:
4429                     op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
4430                     break;
4431                  case 1: case 2: case 3: return False;
4432                  default: vassert(0);
4433               }
4434            } else {
4435               switch (size) {
4436                  case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
4437                  case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
4438                  case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
4439                  case 3: return False;
4440                  default: vassert(0);
4441               }
4442            }
4443            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4444            DIP("vmul.%c%u %c%u, %c%u, %c%u\n",
4445                P ? 'p' : 'i', 8 << size,
4446                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4447                mreg);
4448         }
4449         break;
4450      case 10: {
4451         /* VPMAX, VPMIN  */
4452         UInt P = (theInstr >> 4) & 1;
4453         IROp op;
4454         if (Q)
4455            return False;
4456         if (P) {
4457            switch (size) {
4458               case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
4459               case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
4460               case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
4461               case 3: return False;
4462               default: vassert(0);
4463            }
4464         } else {
4465            switch (size) {
4466               case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
4467               case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
4468               case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
4469               case 3: return False;
4470               default: vassert(0);
4471            }
4472         }
4473         assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4474         DIP("vp%s.%c%u %c%u, %c%u, %c%u\n",
4475             P ? "min" : "max", U ? 'u' : 's',
4476             8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4477             Q ? 'q' : 'd', mreg);
4478         break;
4479      }
4480      case 11:
4481         if (B == 0) {
4482            if (U == 0) {
4483               /* VQDMULH  */
4484               IROp op ,op2;
4485               ULong imm;
4486               switch (size) {
4487                  case 0: case 3:
4488                     return False;
4489                  case 1:
4490                     op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
4491                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4492                     imm = 1LL << 15;
4493                     imm = (imm << 16) | imm;
4494                     imm = (imm << 32) | imm;
4495                     break;
4496                  case 2:
4497                     op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
4498                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4499                     imm = 1LL << 31;
4500                     imm = (imm << 32) | imm;
4501                     break;
4502                  default:
4503                     vassert(0);
4504               }
4505               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4506#ifndef DISABLE_QC_FLAG
4507               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4508                                binop(op2, mkexpr(arg_n),
4509                                           Q ? mkU128(imm) : mkU64(imm)),
4510                                binop(op2, mkexpr(arg_m),
4511                                           Q ? mkU128(imm) : mkU64(imm))),
4512                          Q ? mkU128(0) : mkU64(0),
4513                          Q, condT);
4514#endif
4515               DIP("vqdmulh.s%u %c%u, %c%u, %c%u\n",
4516                   8 << size, Q ? 'q' : 'd',
4517                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4518            } else {
4519               /* VQRDMULH */
4520               IROp op ,op2;
4521               ULong imm;
4522               switch(size) {
4523                  case 0: case 3:
4524                     return False;
4525                  case 1:
4526                     imm = 1LL << 15;
4527                     imm = (imm << 16) | imm;
4528                     imm = (imm << 32) | imm;
4529                     op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
4530                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4531                     break;
4532                  case 2:
4533                     imm = 1LL << 31;
4534                     imm = (imm << 32) | imm;
4535                     op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
4536                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4537                     break;
4538                  default:
4539                     vassert(0);
4540               }
4541               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4542#ifndef DISABLE_QC_FLAG
4543               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4544                                binop(op2, mkexpr(arg_n),
4545                                           Q ? mkU128(imm) : mkU64(imm)),
4546                                binop(op2, mkexpr(arg_m),
4547                                           Q ? mkU128(imm) : mkU64(imm))),
4548                          Q ? mkU128(0) : mkU64(0),
4549                          Q, condT);
4550#endif
4551               DIP("vqrdmulh.s%u %c%u, %c%u, %c%u\n",
4552                   8 << size, Q ? 'q' : 'd',
4553                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4554            }
4555         } else {
4556            if (U == 0) {
4557               /* VPADD */
4558               IROp op;
4559               if (Q)
4560                  return False;
4561               switch (size) {
4562                  case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
4563                  case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
4564                  case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
4565                  case 3: return False;
4566                  default: vassert(0);
4567               }
4568               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4569               DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
4570                   8 << size, Q ? 'q' : 'd',
4571                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4572            }
4573         }
4574         break;
4575      /* Starting from here these are FP SIMD cases */
4576      case 13:
4577         if (B == 0) {
4578            IROp op;
4579            if (U == 0) {
4580               if ((C >> 1) == 0) {
4581                  /* VADD  */
4582                  op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
4583                  DIP("vadd.f32 %c%u, %c%u, %c%u\n",
4584                      Q ? 'q' : 'd', dreg,
4585                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4586               } else {
4587                  /* VSUB  */
4588                  op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
4589                  DIP("vsub.f32 %c%u, %c%u, %c%u\n",
4590                      Q ? 'q' : 'd', dreg,
4591                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4592               }
4593            } else {
4594               if ((C >> 1) == 0) {
4595                  /* VPADD */
4596                  if (Q)
4597                     return False;
4598                  op = Iop_PwAdd32Fx2;
4599                  DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4600               } else {
4601                  /* VABD  */
4602                  if (Q) {
4603                     assign(res, unop(Iop_Abs32Fx4,
4604                                      binop(Iop_Sub32Fx4,
4605                                            mkexpr(arg_n),
4606                                            mkexpr(arg_m))));
4607                  } else {
4608                     assign(res, unop(Iop_Abs32Fx2,
4609                                      binop(Iop_Sub32Fx2,
4610                                            mkexpr(arg_n),
4611                                            mkexpr(arg_m))));
4612                  }
4613                  DIP("vabd.f32 %c%u, %c%u, %c%u\n",
4614                      Q ? 'q' : 'd', dreg,
4615                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4616                  break;
4617               }
4618            }
4619            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4620         } else {
4621            if (U == 0) {
4622               /* VMLA, VMLS  */
4623               IROp op, op2;
4624               UInt P = (theInstr >> 21) & 1;
4625               if (P) {
4626                  switch (size & 1) {
4627                     case 0:
4628                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4629                        op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
4630                        break;
4631                     case 1: return False;
4632                     default: vassert(0);
4633                  }
4634               } else {
4635                  switch (size & 1) {
4636                     case 0:
4637                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4638                        op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
4639                        break;
4640                     case 1: return False;
4641                     default: vassert(0);
4642                  }
4643               }
4644               assign(res, binop(op2,
4645                                 Q ? getQReg(dreg) : getDRegI64(dreg),
4646                                 binop(op, mkexpr(arg_n), mkexpr(arg_m))));
4647
4648               DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
4649                   P ? 's' : 'a', Q ? 'q' : 'd',
4650                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4651            } else {
4652               /* VMUL  */
4653               IROp op;
4654               if ((C >> 1) != 0)
4655                  return False;
4656               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
4657               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4658               DIP("vmul.f32 %c%u, %c%u, %c%u\n",
4659                   Q ? 'q' : 'd', dreg,
4660                   Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4661            }
4662         }
4663         break;
4664      case 14:
4665         if (B == 0) {
4666            if (U == 0) {
4667               if ((C >> 1) == 0) {
4668                  /* VCEQ  */
4669                  IROp op;
4670                  if ((theInstr >> 20) & 1)
4671                     return False;
4672                  op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
4673                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4674                  DIP("vceq.f32 %c%u, %c%u, %c%u\n",
4675                      Q ? 'q' : 'd', dreg,
4676                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4677               } else {
4678                  return False;
4679               }
4680            } else {
4681               if ((C >> 1) == 0) {
4682                  /* VCGE  */
4683                  IROp op;
4684                  if ((theInstr >> 20) & 1)
4685                     return False;
4686                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4687                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4688                  DIP("vcge.f32 %c%u, %c%u, %c%u\n",
4689                      Q ? 'q' : 'd', dreg,
4690                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4691               } else {
4692                  /* VCGT  */
4693                  IROp op;
4694                  if ((theInstr >> 20) & 1)
4695                     return False;
4696                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4697                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4698                  DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
4699                      Q ? 'q' : 'd', dreg,
4700                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4701               }
4702            }
4703         } else {
4704            if (U == 1) {
4705               /* VACGE, VACGT */
4706               UInt op_bit = (theInstr >> 21) & 1;
4707               IROp op, op2;
4708               op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
4709               if (op_bit) {
4710                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4711                  assign(res, binop(op,
4712                                    unop(op2, mkexpr(arg_n)),
4713                                    unop(op2, mkexpr(arg_m))));
4714               } else {
4715                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4716                  assign(res, binop(op,
4717                                    unop(op2, mkexpr(arg_n)),
4718                                    unop(op2, mkexpr(arg_m))));
4719               }
4720               DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
4721                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4722                   Q ? 'q' : 'd', mreg);
4723            }
4724         }
4725         break;
4726      case 15:
4727         if (B == 0) {
4728            if (U == 0) {
4729               /* VMAX, VMIN  */
4730               IROp op;
4731               if ((theInstr >> 20) & 1)
4732                  return False;
4733               if ((theInstr >> 21) & 1) {
4734                  op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
4735                  DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4736                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4737               } else {
4738                  op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
4739                  DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4740                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4741               }
4742               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4743            } else {
4744               /* VPMAX, VPMIN   */
4745               IROp op;
4746               if (Q)
4747                  return False;
4748               if ((theInstr >> 20) & 1)
4749                  return False;
4750               if ((theInstr >> 21) & 1) {
4751                  op = Iop_PwMin32Fx2;
4752                  DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4753               } else {
4754                  op = Iop_PwMax32Fx2;
4755                  DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4756               }
4757               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4758            }
4759         } else {
4760            if (U == 0) {
4761               if ((C >> 1) == 0) {
4762                  /* VRECPS */
4763                  if ((theInstr >> 20) & 1)
4764                     return False;
4765                  assign(res, binop(Q ? Iop_Recps32Fx4 : Iop_Recps32Fx2,
4766                                    mkexpr(arg_n),
4767                                    mkexpr(arg_m)));
4768                  DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4769                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4770               } else {
4771                  /* VRSQRTS  */
4772                  if ((theInstr >> 20) & 1)
4773                     return False;
4774                  assign(res, binop(Q ? Iop_Rsqrts32Fx4 : Iop_Rsqrts32Fx2,
4775                                    mkexpr(arg_n),
4776                                    mkexpr(arg_m)));
4777                  DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4778                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4779               }
4780            }
4781         }
4782         break;
4783   }
4784
4785   if (Q) {
4786      putQReg(dreg, mkexpr(res), condT);
4787   } else {
4788      putDRegI64(dreg, mkexpr(res), condT);
4789   }
4790
4791   return True;
4792}
4793
4794/* A7.4.2 Three registers of different length */
4795static
4796Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
4797{
4798   UInt A = (theInstr >> 8) & 0xf;
4799   UInt B = (theInstr >> 20) & 3;
4800   UInt U = (theInstr >> 24) & 1;
4801   UInt P = (theInstr >> 9) & 1;
4802   UInt mreg = get_neon_m_regno(theInstr);
4803   UInt nreg = get_neon_n_regno(theInstr);
4804   UInt dreg = get_neon_d_regno(theInstr);
4805   UInt size = B;
4806   ULong imm;
4807   IRTemp res, arg_m, arg_n, cond, tmp;
4808   IROp cvt, cvt2, cmp, op, op2, sh, add;
4809   switch (A) {
4810      case 0: case 1: case 2: case 3:
4811         /* VADDL, VADDW, VSUBL, VSUBW */
4812         if (dreg & 1)
4813            return False;
4814         dreg >>= 1;
4815         size = B;
4816         switch (size) {
4817            case 0:
4818               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4819               op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
4820               break;
4821            case 1:
4822               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4823               op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
4824               break;
4825            case 2:
4826               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4827               op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
4828               break;
4829            case 3:
4830               return False;
4831            default:
4832               vassert(0);
4833         }
4834         arg_n = newTemp(Ity_V128);
4835         arg_m = newTemp(Ity_V128);
4836         if (A & 1) {
4837            if (nreg & 1)
4838               return False;
4839            nreg >>= 1;
4840            assign(arg_n, getQReg(nreg));
4841         } else {
4842            assign(arg_n, unop(cvt, getDRegI64(nreg)));
4843         }
4844         assign(arg_m, unop(cvt, getDRegI64(mreg)));
4845         putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4846                       condT);
4847         DIP("v%s%c.%c%u q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
4848             (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
4849             (A & 1) ? 'q' : 'd', nreg, mreg);
4850         return True;
4851      case 4:
4852         /* VADDHN, VRADDHN */
4853         if (mreg & 1)
4854            return False;
4855         mreg >>= 1;
4856         if (nreg & 1)
4857            return False;
4858         nreg >>= 1;
4859         size = B;
4860         switch (size) {
4861            case 0:
4862               op = Iop_Add16x8;
4863               cvt = Iop_NarrowUn16to8x8;
4864               sh = Iop_ShrN16x8;
4865               imm = 1U << 7;
4866               imm = (imm << 16) | imm;
4867               imm = (imm << 32) | imm;
4868               break;
4869            case 1:
4870               op = Iop_Add32x4;
4871               cvt = Iop_NarrowUn32to16x4;
4872               sh = Iop_ShrN32x4;
4873               imm = 1U << 15;
4874               imm = (imm << 32) | imm;
4875               break;
4876            case 2:
4877               op = Iop_Add64x2;
4878               cvt = Iop_NarrowUn64to32x2;
4879               sh = Iop_ShrN64x2;
4880               imm = 1U << 31;
4881               break;
4882            case 3:
4883               return False;
4884            default:
4885               vassert(0);
4886         }
4887         tmp = newTemp(Ity_V128);
4888         res = newTemp(Ity_V128);
4889         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
4890         if (U) {
4891            /* VRADDHN */
4892            assign(res, binop(op, mkexpr(tmp),
4893                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
4894         } else {
4895            assign(res, mkexpr(tmp));
4896         }
4897         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
4898                    condT);
4899         DIP("v%saddhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
4900             nreg, mreg);
4901         return True;
4902      case 5:
4903         /* VABAL */
4904         if (!((theInstr >> 23) & 1)) {
4905            vpanic("VABA should not be in dis_neon_data_3diff\n");
4906         }
4907         if (dreg & 1)
4908            return False;
4909         dreg >>= 1;
4910         switch (size) {
4911            case 0:
4912               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4913               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4914               cvt2 = Iop_Widen8Sto16x8;
4915               op = Iop_Sub16x8;
4916               op2 = Iop_Add16x8;
4917               break;
4918            case 1:
4919               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4920               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4921               cvt2 = Iop_Widen16Sto32x4;
4922               op = Iop_Sub32x4;
4923               op2 = Iop_Add32x4;
4924               break;
4925            case 2:
4926               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4927               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4928               cvt2 = Iop_Widen32Sto64x2;
4929               op = Iop_Sub64x2;
4930               op2 = Iop_Add64x2;
4931               break;
4932            case 3:
4933               return False;
4934            default:
4935               vassert(0);
4936         }
4937         arg_n = newTemp(Ity_V128);
4938         arg_m = newTemp(Ity_V128);
4939         cond = newTemp(Ity_V128);
4940         res = newTemp(Ity_V128);
4941         assign(arg_n, unop(cvt, getDRegI64(nreg)));
4942         assign(arg_m, unop(cvt, getDRegI64(mreg)));
4943         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
4944                                            getDRegI64(mreg))));
4945         assign(res, binop(op2,
4946                           binop(Iop_OrV128,
4947                                 binop(Iop_AndV128,
4948                                       binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4949                                       mkexpr(cond)),
4950                                 binop(Iop_AndV128,
4951                                       binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4952                                       unop(Iop_NotV128, mkexpr(cond)))),
4953                           getQReg(dreg)));
4954         putQReg(dreg, mkexpr(res), condT);
4955         DIP("vabal.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
4956             nreg, mreg);
4957         return True;
4958      case 6:
4959         /* VSUBHN, VRSUBHN */
4960         if (mreg & 1)
4961            return False;
4962         mreg >>= 1;
4963         if (nreg & 1)
4964            return False;
4965         nreg >>= 1;
4966         size = B;
4967         switch (size) {
4968            case 0:
4969               op = Iop_Sub16x8;
4970               op2 = Iop_Add16x8;
4971               cvt = Iop_NarrowUn16to8x8;
4972               sh = Iop_ShrN16x8;
4973               imm = 1U << 7;
4974               imm = (imm << 16) | imm;
4975               imm = (imm << 32) | imm;
4976               break;
4977            case 1:
4978               op = Iop_Sub32x4;
4979               op2 = Iop_Add32x4;
4980               cvt = Iop_NarrowUn32to16x4;
4981               sh = Iop_ShrN32x4;
4982               imm = 1U << 15;
4983               imm = (imm << 32) | imm;
4984               break;
4985            case 2:
4986               op = Iop_Sub64x2;
4987               op2 = Iop_Add64x2;
4988               cvt = Iop_NarrowUn64to32x2;
4989               sh = Iop_ShrN64x2;
4990               imm = 1U << 31;
4991               break;
4992            case 3:
4993               return False;
4994            default:
4995               vassert(0);
4996         }
4997         tmp = newTemp(Ity_V128);
4998         res = newTemp(Ity_V128);
4999         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
5000         if (U) {
5001            /* VRSUBHN */
5002            assign(res, binop(op2, mkexpr(tmp),
5003                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
5004         } else {
5005            assign(res, mkexpr(tmp));
5006         }
5007         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
5008                    condT);
5009         DIP("v%ssubhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
5010             nreg, mreg);
5011         return True;
5012      case 7:
5013         /* VABDL */
5014         if (!((theInstr >> 23) & 1)) {
5015            vpanic("VABL should not be in dis_neon_data_3diff\n");
5016         }
5017         if (dreg & 1)
5018            return False;
5019         dreg >>= 1;
5020         switch (size) {
5021            case 0:
5022               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5023               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5024               cvt2 = Iop_Widen8Sto16x8;
5025               op = Iop_Sub16x8;
5026               break;
5027            case 1:
5028               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5029               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5030               cvt2 = Iop_Widen16Sto32x4;
5031               op = Iop_Sub32x4;
5032               break;
5033            case 2:
5034               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5035               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5036               cvt2 = Iop_Widen32Sto64x2;
5037               op = Iop_Sub64x2;
5038               break;
5039            case 3:
5040               return False;
5041            default:
5042               vassert(0);
5043         }
5044         arg_n = newTemp(Ity_V128);
5045         arg_m = newTemp(Ity_V128);
5046         cond = newTemp(Ity_V128);
5047         res = newTemp(Ity_V128);
5048         assign(arg_n, unop(cvt, getDRegI64(nreg)));
5049         assign(arg_m, unop(cvt, getDRegI64(mreg)));
5050         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5051                                            getDRegI64(mreg))));
5052         assign(res, binop(Iop_OrV128,
5053                           binop(Iop_AndV128,
5054                                 binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5055                                 mkexpr(cond)),
5056                           binop(Iop_AndV128,
5057                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5058                                 unop(Iop_NotV128, mkexpr(cond)))));
5059         putQReg(dreg, mkexpr(res), condT);
5060         DIP("vabdl.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5061             nreg, mreg);
5062         return True;
5063      case 8:
5064      case 10:
5065         /* VMLAL, VMLSL (integer) */
5066         if (dreg & 1)
5067            return False;
5068         dreg >>= 1;
5069         size = B;
5070         switch (size) {
5071            case 0:
5072               op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5073               op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
5074               break;
5075            case 1:
5076               op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5077               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5078               break;
5079            case 2:
5080               op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5081               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5082               break;
5083            case 3:
5084               return False;
5085            default:
5086               vassert(0);
5087         }
5088         res = newTemp(Ity_V128);
5089         assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
5090         putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5091         DIP("vml%cl.%c%u q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
5092             8 << size, dreg, nreg, mreg);
5093         return True;
5094      case 9:
5095      case 11:
5096         /* VQDMLAL, VQDMLSL */
5097         if (U)
5098            return False;
5099         if (dreg & 1)
5100            return False;
5101         dreg >>= 1;
5102         size = B;
5103         switch (size) {
5104            case 0: case 3:
5105               return False;
5106            case 1:
5107               op = Iop_QDMulLong16Sx4;
5108               cmp = Iop_CmpEQ16x4;
5109               add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5110               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5111               imm = 1LL << 15;
5112               imm = (imm << 16) | imm;
5113               imm = (imm << 32) | imm;
5114               break;
5115            case 2:
5116               op = Iop_QDMulLong32Sx2;
5117               cmp = Iop_CmpEQ32x2;
5118               add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5119               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5120               imm = 1LL << 31;
5121               imm = (imm << 32) | imm;
5122               break;
5123            default:
5124               vassert(0);
5125         }
5126         res = newTemp(Ity_V128);
5127         tmp = newTemp(Ity_V128);
5128         assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
5129#ifndef DISABLE_QC_FLAG
5130         assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5131         setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5132                    True, condT);
5133         setFlag_QC(binop(Iop_And64,
5134                          binop(cmp, getDRegI64(nreg), mkU64(imm)),
5135                          binop(cmp, getDRegI64(mreg), mkU64(imm))),
5136                    mkU64(0),
5137                    False, condT);
5138#endif
5139         putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5140         DIP("vqdml%cl.s%u q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
5141             nreg, mreg);
5142         return True;
5143      case 12:
5144      case 14:
5145         /* VMULL (integer or polynomial) */
5146         if (dreg & 1)
5147            return False;
5148         dreg >>= 1;
5149         size = B;
5150         switch (size) {
5151            case 0:
5152               op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5153               if (P)
5154                  op = Iop_PolynomialMull8x8;
5155               break;
5156            case 1:
5157               op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5158               break;
5159            case 2:
5160               op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5161               break;
5162            default:
5163               vassert(0);
5164         }
5165         putQReg(dreg, binop(op, getDRegI64(nreg),
5166                                 getDRegI64(mreg)), condT);
5167         DIP("vmull.%c%u q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
5168               8 << size, dreg, nreg, mreg);
5169         return True;
5170      case 13:
5171         /* VQDMULL */
5172         if (U)
5173            return False;
5174         if (dreg & 1)
5175            return False;
5176         dreg >>= 1;
5177         size = B;
5178         switch (size) {
5179            case 0:
5180            case 3:
5181               return False;
5182            case 1:
5183               op = Iop_QDMulLong16Sx4;
5184               op2 = Iop_CmpEQ16x4;
5185               imm = 1LL << 15;
5186               imm = (imm << 16) | imm;
5187               imm = (imm << 32) | imm;
5188               break;
5189            case 2:
5190               op = Iop_QDMulLong32Sx2;
5191               op2 = Iop_CmpEQ32x2;
5192               imm = 1LL << 31;
5193               imm = (imm << 32) | imm;
5194               break;
5195            default:
5196               vassert(0);
5197         }
5198         putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
5199               condT);
5200#ifndef DISABLE_QC_FLAG
5201         setFlag_QC(binop(Iop_And64,
5202                          binop(op2, getDRegI64(nreg), mkU64(imm)),
5203                          binop(op2, getDRegI64(mreg), mkU64(imm))),
5204                    mkU64(0),
5205                    False, condT);
5206#endif
5207         DIP("vqdmull.s%u q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
5208         return True;
5209      default:
5210         return False;
5211   }
5212   return False;
5213}
5214
5215/* A7.4.3 Two registers and a scalar */
5216static
5217Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
5218{
5219#  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
5220   UInt U = INSN(24,24);
5221   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
5222   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
5223   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
5224   UInt size = INSN(21,20);
5225   UInt index;
5226   UInt Q = INSN(24,24);
5227
5228   if (INSN(27,25) != 1 || INSN(23,23) != 1
5229       || INSN(6,6) != 1 || INSN(4,4) != 0)
5230      return False;
5231
5232   /* VMLA, VMLS (scalar)  */
5233   if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
5234      IRTemp res, arg_m, arg_n;
5235      IROp dup, get, op, op2, add, sub;
5236      if (Q) {
5237         if ((dreg & 1) || (nreg & 1))
5238            return False;
5239         dreg >>= 1;
5240         nreg >>= 1;
5241         res = newTemp(Ity_V128);
5242         arg_m = newTemp(Ity_V128);
5243         arg_n = newTemp(Ity_V128);
5244         assign(arg_n, getQReg(nreg));
5245         switch(size) {
5246            case 1:
5247               dup = Iop_Dup16x8;
5248               get = Iop_GetElem16x4;
5249               index = mreg >> 3;
5250               mreg &= 7;
5251               break;
5252            case 2:
5253               dup = Iop_Dup32x4;
5254               get = Iop_GetElem32x2;
5255               index = mreg >> 4;
5256               mreg &= 0xf;
5257               break;
5258            case 0:
5259            case 3:
5260               return False;
5261            default:
5262               vassert(0);
5263         }
5264         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5265      } else {
5266         res = newTemp(Ity_I64);
5267         arg_m = newTemp(Ity_I64);
5268         arg_n = newTemp(Ity_I64);
5269         assign(arg_n, getDRegI64(nreg));
5270         switch(size) {
5271            case 1:
5272               dup = Iop_Dup16x4;
5273               get = Iop_GetElem16x4;
5274               index = mreg >> 3;
5275               mreg &= 7;
5276               break;
5277            case 2:
5278               dup = Iop_Dup32x2;
5279               get = Iop_GetElem32x2;
5280               index = mreg >> 4;
5281               mreg &= 0xf;
5282               break;
5283            case 0:
5284            case 3:
5285               return False;
5286            default:
5287               vassert(0);
5288         }
5289         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5290      }
5291      if (INSN(8,8)) {
5292         switch (size) {
5293            case 2:
5294               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5295               add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
5296               sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
5297               break;
5298            case 0:
5299            case 1:
5300            case 3:
5301               return False;
5302            default:
5303               vassert(0);
5304         }
5305      } else {
5306         switch (size) {
5307            case 1:
5308               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5309               add = Q ? Iop_Add16x8 : Iop_Add16x4;
5310               sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
5311               break;
5312            case 2:
5313               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5314               add = Q ? Iop_Add32x4 : Iop_Add32x2;
5315               sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
5316               break;
5317            case 0:
5318            case 3:
5319               return False;
5320            default:
5321               vassert(0);
5322         }
5323      }
5324      op2 = INSN(10,10) ? sub : add;
5325      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5326      if (Q)
5327         putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)),
5328               condT);
5329      else
5330         putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
5331                    condT);
5332      DIP("vml%c.%c%u %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
5333            INSN(8,8) ? 'f' : 'i', 8 << size,
5334            Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
5335      return True;
5336   }
5337
5338   /* VMLAL, VMLSL (scalar)   */
5339   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
5340      IRTemp res, arg_m, arg_n;
5341      IROp dup, get, op, op2, add, sub;
5342      if (dreg & 1)
5343         return False;
5344      dreg >>= 1;
5345      res = newTemp(Ity_V128);
5346      arg_m = newTemp(Ity_I64);
5347      arg_n = newTemp(Ity_I64);
5348      assign(arg_n, getDRegI64(nreg));
5349      switch(size) {
5350         case 1:
5351            dup = Iop_Dup16x4;
5352            get = Iop_GetElem16x4;
5353            index = mreg >> 3;
5354            mreg &= 7;
5355            break;
5356         case 2:
5357            dup = Iop_Dup32x2;
5358            get = Iop_GetElem32x2;
5359            index = mreg >> 4;
5360            mreg &= 0xf;
5361            break;
5362         case 0:
5363         case 3:
5364            return False;
5365         default:
5366            vassert(0);
5367      }
5368      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5369      switch (size) {
5370         case 1:
5371            op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5372            add = Iop_Add32x4;
5373            sub = Iop_Sub32x4;
5374            break;
5375         case 2:
5376            op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5377            add = Iop_Add64x2;
5378            sub = Iop_Sub64x2;
5379            break;
5380         case 0:
5381         case 3:
5382            return False;
5383         default:
5384            vassert(0);
5385      }
5386      op2 = INSN(10,10) ? sub : add;
5387      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5388      putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5389      DIP("vml%cl.%c%u q%u, d%u, d%u[%u]\n",
5390          INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
5391          8 << size, dreg, nreg, mreg, index);
5392      return True;
5393   }
5394
5395   /* VQDMLAL, VQDMLSL (scalar)  */
5396   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
5397      IRTemp res, arg_m, arg_n, tmp;
5398      IROp dup, get, op, op2, add, cmp;
5399      UInt P = INSN(10,10);
5400      ULong imm;
5401      if (dreg & 1)
5402         return False;
5403      dreg >>= 1;
5404      res = newTemp(Ity_V128);
5405      arg_m = newTemp(Ity_I64);
5406      arg_n = newTemp(Ity_I64);
5407      assign(arg_n, getDRegI64(nreg));
5408      switch(size) {
5409         case 1:
5410            dup = Iop_Dup16x4;
5411            get = Iop_GetElem16x4;
5412            index = mreg >> 3;
5413            mreg &= 7;
5414            break;
5415         case 2:
5416            dup = Iop_Dup32x2;
5417            get = Iop_GetElem32x2;
5418            index = mreg >> 4;
5419            mreg &= 0xf;
5420            break;
5421         case 0:
5422         case 3:
5423            return False;
5424         default:
5425            vassert(0);
5426      }
5427      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5428      switch (size) {
5429         case 0:
5430         case 3:
5431            return False;
5432         case 1:
5433            op = Iop_QDMulLong16Sx4;
5434            cmp = Iop_CmpEQ16x4;
5435            add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5436            op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5437            imm = 1LL << 15;
5438            imm = (imm << 16) | imm;
5439            imm = (imm << 32) | imm;
5440            break;
5441         case 2:
5442            op = Iop_QDMulLong32Sx2;
5443            cmp = Iop_CmpEQ32x2;
5444            add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5445            op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5446            imm = 1LL << 31;
5447            imm = (imm << 32) | imm;
5448            break;
5449         default:
5450            vassert(0);
5451      }
5452      res = newTemp(Ity_V128);
5453      tmp = newTemp(Ity_V128);
5454      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5455#ifndef DISABLE_QC_FLAG
5456      assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5457      setFlag_QC(binop(Iop_And64,
5458                       binop(cmp, mkexpr(arg_n), mkU64(imm)),
5459                       binop(cmp, mkexpr(arg_m), mkU64(imm))),
5460                 mkU64(0),
5461                 False, condT);
5462      setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5463                 True, condT);
5464#endif
5465      putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5466      DIP("vqdml%cl.s%u q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
5467          dreg, nreg, mreg, index);
5468      return True;
5469   }
5470
5471   /* VMUL (by scalar)  */
5472   if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
5473      IRTemp res, arg_m, arg_n;
5474      IROp dup, get, op;
5475      if (Q) {
5476         if ((dreg & 1) || (nreg & 1))
5477            return False;
5478         dreg >>= 1;
5479         nreg >>= 1;
5480         res = newTemp(Ity_V128);
5481         arg_m = newTemp(Ity_V128);
5482         arg_n = newTemp(Ity_V128);
5483         assign(arg_n, getQReg(nreg));
5484         switch(size) {
5485            case 1:
5486               dup = Iop_Dup16x8;
5487               get = Iop_GetElem16x4;
5488               index = mreg >> 3;
5489               mreg &= 7;
5490               break;
5491            case 2:
5492               dup = Iop_Dup32x4;
5493               get = Iop_GetElem32x2;
5494               index = mreg >> 4;
5495               mreg &= 0xf;
5496               break;
5497            case 0:
5498            case 3:
5499               return False;
5500            default:
5501               vassert(0);
5502         }
5503         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5504      } else {
5505         res = newTemp(Ity_I64);
5506         arg_m = newTemp(Ity_I64);
5507         arg_n = newTemp(Ity_I64);
5508         assign(arg_n, getDRegI64(nreg));
5509         switch(size) {
5510            case 1:
5511               dup = Iop_Dup16x4;
5512               get = Iop_GetElem16x4;
5513               index = mreg >> 3;
5514               mreg &= 7;
5515               break;
5516            case 2:
5517               dup = Iop_Dup32x2;
5518               get = Iop_GetElem32x2;
5519               index = mreg >> 4;
5520               mreg &= 0xf;
5521               break;
5522            case 0:
5523            case 3:
5524               return False;
5525            default:
5526               vassert(0);
5527         }
5528         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5529      }
5530      if (INSN(8,8)) {
5531         switch (size) {
5532            case 2:
5533               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5534               break;
5535            case 0:
5536            case 1:
5537            case 3:
5538               return False;
5539            default:
5540               vassert(0);
5541         }
5542      } else {
5543         switch (size) {
5544            case 1:
5545               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5546               break;
5547            case 2:
5548               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5549               break;
5550            case 0:
5551            case 3:
5552               return False;
5553            default:
5554               vassert(0);
5555         }
5556      }
5557      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5558      if (Q)
5559         putQReg(dreg, mkexpr(res), condT);
5560      else
5561         putDRegI64(dreg, mkexpr(res), condT);
5562      DIP("vmul.%c%u %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
5563          8 << size, Q ? 'q' : 'd', dreg,
5564          Q ? 'q' : 'd', nreg, mreg, index);
5565      return True;
5566   }
5567
5568   /* VMULL (scalar) */
5569   if (INSN(11,8) == BITS4(1,0,1,0)) {
5570      IRTemp res, arg_m, arg_n;
5571      IROp dup, get, op;
5572      if (dreg & 1)
5573         return False;
5574      dreg >>= 1;
5575      res = newTemp(Ity_V128);
5576      arg_m = newTemp(Ity_I64);
5577      arg_n = newTemp(Ity_I64);
5578      assign(arg_n, getDRegI64(nreg));
5579      switch(size) {
5580         case 1:
5581            dup = Iop_Dup16x4;
5582            get = Iop_GetElem16x4;
5583            index = mreg >> 3;
5584            mreg &= 7;
5585            break;
5586         case 2:
5587            dup = Iop_Dup32x2;
5588            get = Iop_GetElem32x2;
5589            index = mreg >> 4;
5590            mreg &= 0xf;
5591            break;
5592         case 0:
5593         case 3:
5594            return False;
5595         default:
5596            vassert(0);
5597      }
5598      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5599      switch (size) {
5600         case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
5601         case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
5602         case 0: case 3: return False;
5603         default: vassert(0);
5604      }
5605      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5606      putQReg(dreg, mkexpr(res), condT);
5607      DIP("vmull.%c%u q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
5608          nreg, mreg, index);
5609      return True;
5610   }
5611
5612   /* VQDMULL */
5613   if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
5614      IROp op ,op2, dup, get;
5615      ULong imm;
5616      IRTemp arg_m, arg_n;
5617      if (dreg & 1)
5618         return False;
5619      dreg >>= 1;
5620      arg_m = newTemp(Ity_I64);
5621      arg_n = newTemp(Ity_I64);
5622      assign(arg_n, getDRegI64(nreg));
5623      switch(size) {
5624         case 1:
5625            dup = Iop_Dup16x4;
5626            get = Iop_GetElem16x4;
5627            index = mreg >> 3;
5628            mreg &= 7;
5629            break;
5630         case 2:
5631            dup = Iop_Dup32x2;
5632            get = Iop_GetElem32x2;
5633            index = mreg >> 4;
5634            mreg &= 0xf;
5635            break;
5636         case 0:
5637         case 3:
5638            return False;
5639         default:
5640            vassert(0);
5641      }
5642      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5643      switch (size) {
5644         case 0:
5645         case 3:
5646            return False;
5647         case 1:
5648            op = Iop_QDMulLong16Sx4;
5649            op2 = Iop_CmpEQ16x4;
5650            imm = 1LL << 15;
5651            imm = (imm << 16) | imm;
5652            imm = (imm << 32) | imm;
5653            break;
5654         case 2:
5655            op = Iop_QDMulLong32Sx2;
5656            op2 = Iop_CmpEQ32x2;
5657            imm = 1LL << 31;
5658            imm = (imm << 32) | imm;
5659            break;
5660         default:
5661            vassert(0);
5662      }
5663      putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5664            condT);
5665#ifndef DISABLE_QC_FLAG
5666      setFlag_QC(binop(Iop_And64,
5667                       binop(op2, mkexpr(arg_n), mkU64(imm)),
5668                       binop(op2, mkexpr(arg_m), mkU64(imm))),
5669                 mkU64(0),
5670                 False, condT);
5671#endif
5672      DIP("vqdmull.s%u q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
5673          index);
5674      return True;
5675   }
5676
5677   /* VQDMULH */
5678   if (INSN(11,8) == BITS4(1,1,0,0)) {
5679      IROp op ,op2, dup, get;
5680      ULong imm;
5681      IRTemp res, arg_m, arg_n;
5682      if (Q) {
5683         if ((dreg & 1) || (nreg & 1))
5684            return False;
5685         dreg >>= 1;
5686         nreg >>= 1;
5687         res = newTemp(Ity_V128);
5688         arg_m = newTemp(Ity_V128);
5689         arg_n = newTemp(Ity_V128);
5690         assign(arg_n, getQReg(nreg));
5691         switch(size) {
5692            case 1:
5693               dup = Iop_Dup16x8;
5694               get = Iop_GetElem16x4;
5695               index = mreg >> 3;
5696               mreg &= 7;
5697               break;
5698            case 2:
5699               dup = Iop_Dup32x4;
5700               get = Iop_GetElem32x2;
5701               index = mreg >> 4;
5702               mreg &= 0xf;
5703               break;
5704            case 0:
5705            case 3:
5706               return False;
5707            default:
5708               vassert(0);
5709         }
5710         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5711      } else {
5712         res = newTemp(Ity_I64);
5713         arg_m = newTemp(Ity_I64);
5714         arg_n = newTemp(Ity_I64);
5715         assign(arg_n, getDRegI64(nreg));
5716         switch(size) {
5717            case 1:
5718               dup = Iop_Dup16x4;
5719               get = Iop_GetElem16x4;
5720               index = mreg >> 3;
5721               mreg &= 7;
5722               break;
5723            case 2:
5724               dup = Iop_Dup32x2;
5725               get = Iop_GetElem32x2;
5726               index = mreg >> 4;
5727               mreg &= 0xf;
5728               break;
5729            case 0:
5730            case 3:
5731               return False;
5732            default:
5733               vassert(0);
5734         }
5735         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5736      }
5737      switch (size) {
5738         case 0:
5739         case 3:
5740            return False;
5741         case 1:
5742            op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
5743            op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5744            imm = 1LL << 15;
5745            imm = (imm << 16) | imm;
5746            imm = (imm << 32) | imm;
5747            break;
5748         case 2:
5749            op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
5750            op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5751            imm = 1LL << 31;
5752            imm = (imm << 32) | imm;
5753            break;
5754         default:
5755            vassert(0);
5756      }
5757      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5758#ifndef DISABLE_QC_FLAG
5759      setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5760                       binop(op2, mkexpr(arg_n),
5761                                  Q ? mkU128(imm) : mkU64(imm)),
5762                       binop(op2, mkexpr(arg_m),
5763                             Q ? mkU128(imm) : mkU64(imm))),
5764                 Q ? mkU128(0) : mkU64(0),
5765                 Q, condT);
5766#endif
5767      if (Q)
5768         putQReg(dreg, mkexpr(res), condT);
5769      else
5770         putDRegI64(dreg, mkexpr(res), condT);
5771      DIP("vqdmulh.s%u %c%u, %c%u, d%u[%u]\n",
5772          8 << size, Q ? 'q' : 'd', dreg,
5773          Q ? 'q' : 'd', nreg, mreg, index);
5774      return True;
5775   }
5776
5777   /* VQRDMULH (scalar) */
5778   if (INSN(11,8) == BITS4(1,1,0,1)) {
5779      IROp op ,op2, dup, get;
5780      ULong imm;
5781      IRTemp res, arg_m, arg_n;
5782      if (Q) {
5783         if ((dreg & 1) || (nreg & 1))
5784            return False;
5785         dreg >>= 1;
5786         nreg >>= 1;
5787         res = newTemp(Ity_V128);
5788         arg_m = newTemp(Ity_V128);
5789         arg_n = newTemp(Ity_V128);
5790         assign(arg_n, getQReg(nreg));
5791         switch(size) {
5792            case 1:
5793               dup = Iop_Dup16x8;
5794               get = Iop_GetElem16x4;
5795               index = mreg >> 3;
5796               mreg &= 7;
5797               break;
5798            case 2:
5799               dup = Iop_Dup32x4;
5800               get = Iop_GetElem32x2;
5801               index = mreg >> 4;
5802               mreg &= 0xf;
5803               break;
5804            case 0:
5805            case 3:
5806               return False;
5807            default:
5808               vassert(0);
5809         }
5810         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5811      } else {
5812         res = newTemp(Ity_I64);
5813         arg_m = newTemp(Ity_I64);
5814         arg_n = newTemp(Ity_I64);
5815         assign(arg_n, getDRegI64(nreg));
5816         switch(size) {
5817            case 1:
5818               dup = Iop_Dup16x4;
5819               get = Iop_GetElem16x4;
5820               index = mreg >> 3;
5821               mreg &= 7;
5822               break;
5823            case 2:
5824               dup = Iop_Dup32x2;
5825               get = Iop_GetElem32x2;
5826               index = mreg >> 4;
5827               mreg &= 0xf;
5828               break;
5829            case 0:
5830            case 3:
5831               return False;
5832            default:
5833               vassert(0);
5834         }
5835         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5836      }
5837      switch (size) {
5838         case 0:
5839         case 3:
5840            return False;
5841         case 1:
5842            op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
5843            op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5844            imm = 1LL << 15;
5845            imm = (imm << 16) | imm;
5846            imm = (imm << 32) | imm;
5847            break;
5848         case 2:
5849            op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
5850            op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5851            imm = 1LL << 31;
5852            imm = (imm << 32) | imm;
5853            break;
5854         default:
5855            vassert(0);
5856      }
5857      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5858#ifndef DISABLE_QC_FLAG
5859      setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5860                       binop(op2, mkexpr(arg_n),
5861                                  Q ? mkU128(imm) : mkU64(imm)),
5862                       binop(op2, mkexpr(arg_m),
5863                                  Q ? mkU128(imm) : mkU64(imm))),
5864                 Q ? mkU128(0) : mkU64(0),
5865                 Q, condT);
5866#endif
5867      if (Q)
5868         putQReg(dreg, mkexpr(res), condT);
5869      else
5870         putDRegI64(dreg, mkexpr(res), condT);
5871      DIP("vqrdmulh.s%u %c%u, %c%u, d%u[%u]\n",
5872          8 << size, Q ? 'q' : 'd', dreg,
5873          Q ? 'q' : 'd', nreg, mreg, index);
5874      return True;
5875   }
5876
5877   return False;
5878#  undef INSN
5879}
5880
5881/* A7.4.4 Two registers and a shift amount */
5882static
5883Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
5884{
5885   UInt A = (theInstr >> 8) & 0xf;
5886   UInt B = (theInstr >> 6) & 1;
5887   UInt L = (theInstr >> 7) & 1;
5888   UInt U = (theInstr >> 24) & 1;
5889   UInt Q = B;
5890   UInt imm6 = (theInstr >> 16) & 0x3f;
5891   UInt shift_imm;
5892   UInt size = 4;
5893   UInt tmp;
5894   UInt mreg = get_neon_m_regno(theInstr);
5895   UInt dreg = get_neon_d_regno(theInstr);
5896   ULong imm = 0;
5897   IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
5898   IRTemp reg_m, res, mask;
5899
5900   if (L == 0 && ((theInstr >> 19) & 7) == 0)
5901      /* It is one reg and immediate */
5902      return False;
5903
5904   tmp = (L << 6) | imm6;
5905   if (tmp & 0x40) {
5906      size = 3;
5907      shift_imm = 64 - imm6;
5908   } else if (tmp & 0x20) {
5909      size = 2;
5910      shift_imm = 64 - imm6;
5911   } else if (tmp & 0x10) {
5912      size = 1;
5913      shift_imm = 32 - imm6;
5914   } else if (tmp & 0x8) {
5915      size = 0;
5916      shift_imm = 16 - imm6;
5917   } else {
5918      return False;
5919   }
5920
5921   switch (A) {
5922      case 3:
5923      case 2:
5924         /* VRSHR, VRSRA */
5925         if (shift_imm > 0) {
5926            IRExpr *imm_val;
5927            imm = 1L;
5928            switch (size) {
5929               case 0:
5930                  imm = (imm << 8) | imm;
5931                  /* fall through */
5932               case 1:
5933                  imm = (imm << 16) | imm;
5934                  /* fall through */
5935               case 2:
5936                  imm = (imm << 32) | imm;
5937                  /* fall through */
5938               case 3:
5939                  break;
5940               default:
5941                  vassert(0);
5942            }
5943            if (Q) {
5944               reg_m = newTemp(Ity_V128);
5945               res = newTemp(Ity_V128);
5946               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
5947               assign(reg_m, getQReg(mreg));
5948               switch (size) {
5949                  case 0:
5950                     add = Iop_Add8x16;
5951                     op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
5952                     break;
5953                  case 1:
5954                     add = Iop_Add16x8;
5955                     op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
5956                     break;
5957                  case 2:
5958                     add = Iop_Add32x4;
5959                     op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
5960                     break;
5961                  case 3:
5962                     add = Iop_Add64x2;
5963                     op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
5964                     break;
5965                  default:
5966                     vassert(0);
5967               }
5968            } else {
5969               reg_m = newTemp(Ity_I64);
5970               res = newTemp(Ity_I64);
5971               imm_val = mkU64(imm);
5972               assign(reg_m, getDRegI64(mreg));
5973               switch (size) {
5974                  case 0:
5975                     add = Iop_Add8x8;
5976                     op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
5977                     break;
5978                  case 1:
5979                     add = Iop_Add16x4;
5980                     op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
5981                     break;
5982                  case 2:
5983                     add = Iop_Add32x2;
5984                     op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
5985                     break;
5986                  case 3:
5987                     add = Iop_Add64;
5988                     op = U ? Iop_Shr64 : Iop_Sar64;
5989                     break;
5990                  default:
5991                     vassert(0);
5992               }
5993            }
5994            assign(res,
5995                   binop(add,
5996                         binop(op,
5997                               mkexpr(reg_m),
5998                               mkU8(shift_imm)),
5999                         binop(Q ? Iop_AndV128 : Iop_And64,
6000                               binop(op,
6001                                     mkexpr(reg_m),
6002                                     mkU8(shift_imm - 1)),
6003                               imm_val)));
6004         } else {
6005            if (Q) {
6006               res = newTemp(Ity_V128);
6007               assign(res, getQReg(mreg));
6008            } else {
6009               res = newTemp(Ity_I64);
6010               assign(res, getDRegI64(mreg));
6011            }
6012         }
6013         if (A == 3) {
6014            if (Q) {
6015               putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6016                             condT);
6017            } else {
6018               putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6019                                condT);
6020            }
6021            DIP("vrsra.%c%u %c%u, %c%u, #%u\n",
6022                U ? 'u' : 's', 8 << size,
6023                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6024         } else {
6025            if (Q) {
6026               putQReg(dreg, mkexpr(res), condT);
6027            } else {
6028               putDRegI64(dreg, mkexpr(res), condT);
6029            }
6030            DIP("vrshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6031                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6032         }
6033         return True;
6034      case 1:
6035      case 0:
6036         /* VSHR, VSRA */
6037         if (Q) {
6038            reg_m = newTemp(Ity_V128);
6039            assign(reg_m, getQReg(mreg));
6040            res = newTemp(Ity_V128);
6041         } else {
6042            reg_m = newTemp(Ity_I64);
6043            assign(reg_m, getDRegI64(mreg));
6044            res = newTemp(Ity_I64);
6045         }
6046         if (Q) {
6047            switch (size) {
6048               case 0:
6049                  op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6050                  add = Iop_Add8x16;
6051                  break;
6052               case 1:
6053                  op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6054                  add = Iop_Add16x8;
6055                  break;
6056               case 2:
6057                  op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6058                  add = Iop_Add32x4;
6059                  break;
6060               case 3:
6061                  op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6062                  add = Iop_Add64x2;
6063                  break;
6064               default:
6065                  vassert(0);
6066            }
6067         } else {
6068            switch (size) {
6069               case 0:
6070                  op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
6071                  add = Iop_Add8x8;
6072                  break;
6073               case 1:
6074                  op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6075                  add = Iop_Add16x4;
6076                  break;
6077               case 2:
6078                  op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6079                  add = Iop_Add32x2;
6080                  break;
6081               case 3:
6082                  op = U ? Iop_Shr64 : Iop_Sar64;
6083                  add = Iop_Add64;
6084                  break;
6085               default:
6086                  vassert(0);
6087            }
6088         }
6089         assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6090         if (A == 1) {
6091            if (Q) {
6092               putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6093                             condT);
6094            } else {
6095               putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6096                                condT);
6097            }
6098            DIP("vsra.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6099                  Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6100         } else {
6101            if (Q) {
6102               putQReg(dreg, mkexpr(res), condT);
6103            } else {
6104               putDRegI64(dreg, mkexpr(res), condT);
6105            }
6106            DIP("vshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6107                  Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6108         }
6109         return True;
6110      case 4:
6111         /* VSRI */
6112         if (!U)
6113            return False;
6114         if (Q) {
6115            res = newTemp(Ity_V128);
6116            mask = newTemp(Ity_V128);
6117         } else {
6118            res = newTemp(Ity_I64);
6119            mask = newTemp(Ity_I64);
6120         }
6121         switch (size) {
6122            case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
6123            case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
6124            case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
6125            case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
6126            default: vassert(0);
6127         }
6128         if (Q) {
6129            assign(mask, binop(op, binop(Iop_64HLtoV128,
6130                                         mkU64(0xFFFFFFFFFFFFFFFFLL),
6131                                         mkU64(0xFFFFFFFFFFFFFFFFLL)),
6132                               mkU8(shift_imm)));
6133            assign(res, binop(Iop_OrV128,
6134                              binop(Iop_AndV128,
6135                                    getQReg(dreg),
6136                                    unop(Iop_NotV128,
6137                                         mkexpr(mask))),
6138                              binop(op,
6139                                    getQReg(mreg),
6140                                    mkU8(shift_imm))));
6141            putQReg(dreg, mkexpr(res), condT);
6142         } else {
6143            assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6144                               mkU8(shift_imm)));
6145            assign(res, binop(Iop_Or64,
6146                              binop(Iop_And64,
6147                                    getDRegI64(dreg),
6148                                    unop(Iop_Not64,
6149                                         mkexpr(mask))),
6150                              binop(op,
6151                                    getDRegI64(mreg),
6152                                    mkU8(shift_imm))));
6153            putDRegI64(dreg, mkexpr(res), condT);
6154         }
6155         DIP("vsri.%u %c%u, %c%u, #%u\n",
6156             8 << size, Q ? 'q' : 'd', dreg,
6157             Q ? 'q' : 'd', mreg, shift_imm);
6158         return True;
6159      case 5:
6160         if (U) {
6161            /* VSLI */
6162            shift_imm = 8 * (1 << size) - shift_imm;
6163            if (Q) {
6164               res = newTemp(Ity_V128);
6165               mask = newTemp(Ity_V128);
6166            } else {
6167               res = newTemp(Ity_I64);
6168               mask = newTemp(Ity_I64);
6169            }
6170            switch (size) {
6171               case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6172               case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6173               case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6174               case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6175               default: vassert(0);
6176            }
6177            if (Q) {
6178               assign(mask, binop(op, binop(Iop_64HLtoV128,
6179                                            mkU64(0xFFFFFFFFFFFFFFFFLL),
6180                                            mkU64(0xFFFFFFFFFFFFFFFFLL)),
6181                                  mkU8(shift_imm)));
6182               assign(res, binop(Iop_OrV128,
6183                                 binop(Iop_AndV128,
6184                                       getQReg(dreg),
6185                                       unop(Iop_NotV128,
6186                                            mkexpr(mask))),
6187                                 binop(op,
6188                                       getQReg(mreg),
6189                                       mkU8(shift_imm))));
6190               putQReg(dreg, mkexpr(res), condT);
6191            } else {
6192               assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6193                                  mkU8(shift_imm)));
6194               assign(res, binop(Iop_Or64,
6195                                 binop(Iop_And64,
6196                                       getDRegI64(dreg),
6197                                       unop(Iop_Not64,
6198                                            mkexpr(mask))),
6199                                 binop(op,
6200                                       getDRegI64(mreg),
6201                                       mkU8(shift_imm))));
6202               putDRegI64(dreg, mkexpr(res), condT);
6203            }
6204            DIP("vsli.%u %c%u, %c%u, #%u\n",
6205                8 << size, Q ? 'q' : 'd', dreg,
6206                Q ? 'q' : 'd', mreg, shift_imm);
6207            return True;
6208         } else {
6209            /* VSHL #imm */
6210            shift_imm = 8 * (1 << size) - shift_imm;
6211            if (Q) {
6212               res = newTemp(Ity_V128);
6213            } else {
6214               res = newTemp(Ity_I64);
6215            }
6216            switch (size) {
6217               case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6218               case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6219               case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6220               case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6221               default: vassert(0);
6222            }
6223            assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
6224                     mkU8(shift_imm)));
6225            if (Q) {
6226               putQReg(dreg, mkexpr(res), condT);
6227            } else {
6228               putDRegI64(dreg, mkexpr(res), condT);
6229            }
6230            DIP("vshl.i%u %c%u, %c%u, #%u\n",
6231                8 << size, Q ? 'q' : 'd', dreg,
6232                Q ? 'q' : 'd', mreg, shift_imm);
6233            return True;
6234         }
6235         break;
6236      case 6:
6237      case 7:
6238         /* VQSHL, VQSHLU */
6239         shift_imm = 8 * (1 << size) - shift_imm;
6240         if (U) {
6241            if (A & 1) {
6242               switch (size) {
6243                  case 0:
6244                     op = Q ? Iop_QShlN8x16 : Iop_QShlN8x8;
6245                     op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6246                     break;
6247                  case 1:
6248                     op = Q ? Iop_QShlN16x8 : Iop_QShlN16x4;
6249                     op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6250                     break;
6251                  case 2:
6252                     op = Q ? Iop_QShlN32x4 : Iop_QShlN32x2;
6253                     op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6254                     break;
6255                  case 3:
6256                     op = Q ? Iop_QShlN64x2 : Iop_QShlN64x1;
6257                     op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6258                     break;
6259                  default:
6260                     vassert(0);
6261               }
6262               DIP("vqshl.u%u %c%u, %c%u, #%u\n",
6263                   8 << size,
6264                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6265            } else {
6266               switch (size) {
6267                  case 0:
6268                     op = Q ? Iop_QShlN8Sx16 : Iop_QShlN8Sx8;
6269                     op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6270                     break;
6271                  case 1:
6272                     op = Q ? Iop_QShlN16Sx8 : Iop_QShlN16Sx4;
6273                     op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6274                     break;
6275                  case 2:
6276                     op = Q ? Iop_QShlN32Sx4 : Iop_QShlN32Sx2;
6277                     op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6278                     break;
6279                  case 3:
6280                     op = Q ? Iop_QShlN64Sx2 : Iop_QShlN64Sx1;
6281                     op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6282                     break;
6283                  default:
6284                     vassert(0);
6285               }
6286               DIP("vqshlu.s%u %c%u, %c%u, #%u\n",
6287                   8 << size,
6288                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6289            }
6290         } else {
6291            if (!(A & 1))
6292               return False;
6293            switch (size) {
6294               case 0:
6295                  op = Q ? Iop_QSalN8x16 : Iop_QSalN8x8;
6296                  op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
6297                  break;
6298               case 1:
6299                  op = Q ? Iop_QSalN16x8 : Iop_QSalN16x4;
6300                  op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
6301                  break;
6302               case 2:
6303                  op = Q ? Iop_QSalN32x4 : Iop_QSalN32x2;
6304                  op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
6305                  break;
6306               case 3:
6307                  op = Q ? Iop_QSalN64x2 : Iop_QSalN64x1;
6308                  op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
6309                  break;
6310               default:
6311                  vassert(0);
6312            }
6313            DIP("vqshl.s%u %c%u, %c%u, #%u\n",
6314                8 << size,
6315                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6316         }
6317         if (Q) {
6318            tmp = newTemp(Ity_V128);
6319            res = newTemp(Ity_V128);
6320            reg_m = newTemp(Ity_V128);
6321            assign(reg_m, getQReg(mreg));
6322         } else {
6323            tmp = newTemp(Ity_I64);
6324            res = newTemp(Ity_I64);
6325            reg_m = newTemp(Ity_I64);
6326            assign(reg_m, getDRegI64(mreg));
6327         }
6328         assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6329#ifndef DISABLE_QC_FLAG
6330         assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
6331         setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
6332#endif
6333         if (Q)
6334            putQReg(dreg, mkexpr(res), condT);
6335         else
6336            putDRegI64(dreg, mkexpr(res), condT);
6337         return True;
6338      case 8:
6339         if (!U) {
6340            if (L == 1)
6341               return False;
6342            size++;
6343            dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6344            mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
6345            if (mreg & 1)
6346               return False;
6347            mreg >>= 1;
6348            if (!B) {
6349               /* VSHRN*/
6350               IROp narOp;
6351               reg_m = newTemp(Ity_V128);
6352               assign(reg_m, getQReg(mreg));
6353               res = newTemp(Ity_I64);
6354               switch (size) {
6355                  case 1:
6356                     op = Iop_ShrN16x8;
6357                     narOp = Iop_NarrowUn16to8x8;
6358                     break;
6359                  case 2:
6360                     op = Iop_ShrN32x4;
6361                     narOp = Iop_NarrowUn32to16x4;
6362                     break;
6363                  case 3:
6364                     op = Iop_ShrN64x2;
6365                     narOp = Iop_NarrowUn64to32x2;
6366                     break;
6367                  default:
6368                     vassert(0);
6369               }
6370               assign(res, unop(narOp,
6371                                binop(op,
6372                                      mkexpr(reg_m),
6373                                      mkU8(shift_imm))));
6374               putDRegI64(dreg, mkexpr(res), condT);
6375               DIP("vshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6376                   shift_imm);
6377               return True;
6378            } else {
6379               /* VRSHRN   */
6380               IROp addOp, shOp, narOp;
6381               IRExpr *imm_val;
6382               reg_m = newTemp(Ity_V128);
6383               assign(reg_m, getQReg(mreg));
6384               res = newTemp(Ity_I64);
6385               imm = 1L;
6386               switch (size) {
6387                  case 0: imm = (imm <<  8) | imm; /* fall through */
6388                  case 1: imm = (imm << 16) | imm; /* fall through */
6389                  case 2: imm = (imm << 32) | imm; /* fall through */
6390                  case 3: break;
6391                  default: vassert(0);
6392               }
6393               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6394               switch (size) {
6395                  case 1:
6396                     addOp = Iop_Add16x8;
6397                     shOp = Iop_ShrN16x8;
6398                     narOp = Iop_NarrowUn16to8x8;
6399                     break;
6400                  case 2:
6401                     addOp = Iop_Add32x4;
6402                     shOp = Iop_ShrN32x4;
6403                     narOp = Iop_NarrowUn32to16x4;
6404                     break;
6405                  case 3:
6406                     addOp = Iop_Add64x2;
6407                     shOp = Iop_ShrN64x2;
6408                     narOp = Iop_NarrowUn64to32x2;
6409                     break;
6410                  default:
6411                     vassert(0);
6412               }
6413               assign(res, unop(narOp,
6414                                binop(addOp,
6415                                      binop(shOp,
6416                                            mkexpr(reg_m),
6417                                            mkU8(shift_imm)),
6418                                      binop(Iop_AndV128,
6419                                            binop(shOp,
6420                                                  mkexpr(reg_m),
6421                                                  mkU8(shift_imm - 1)),
6422                                            imm_val))));
6423               putDRegI64(dreg, mkexpr(res), condT);
6424               if (shift_imm == 0) {
6425                  DIP("vmov%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6426                      shift_imm);
6427               } else {
6428                  DIP("vrshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6429                      shift_imm);
6430               }
6431               return True;
6432            }
6433         } else {
6434            /* fall through */
6435         }
6436      case 9:
6437         dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6438         mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
6439         if (mreg & 1)
6440            return False;
6441         mreg >>= 1;
6442         size++;
6443         if ((theInstr >> 8) & 1) {
6444            switch (size) {
6445               case 1:
6446                  op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6447                  cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
6448                  cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6449                  break;
6450               case 2:
6451                  op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6452                  cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
6453                  cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6454                  break;
6455               case 3:
6456                  op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6457                  cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
6458                  cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6459                  break;
6460               default:
6461                  vassert(0);
6462            }
6463            DIP("vq%sshrn.%c%u d%u, q%u, #%u\n", B ? "r" : "",
6464                U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
6465         } else {
6466            vassert(U);
6467            switch (size) {
6468               case 1:
6469                  op = Iop_SarN16x8;
6470                  cvt = Iop_QNarrowUn16Sto8Ux8;
6471                  cvt2 = Iop_Widen8Uto16x8;
6472                  break;
6473               case 2:
6474                  op = Iop_SarN32x4;
6475                  cvt = Iop_QNarrowUn32Sto16Ux4;
6476                  cvt2 = Iop_Widen16Uto32x4;
6477                  break;
6478               case 3:
6479                  op = Iop_SarN64x2;
6480                  cvt = Iop_QNarrowUn64Sto32Ux2;
6481                  cvt2 = Iop_Widen32Uto64x2;
6482                  break;
6483               default:
6484                  vassert(0);
6485            }
6486            DIP("vq%sshrun.s%u d%u, q%u, #%u\n", B ? "r" : "",
6487                8 << size, dreg, mreg, shift_imm);
6488         }
6489         if (B) {
6490            if (shift_imm > 0) {
6491               imm = 1;
6492               switch (size) {
6493                  case 1: imm = (imm << 16) | imm; /* fall through */
6494                  case 2: imm = (imm << 32) | imm; /* fall through */
6495                  case 3: break;
6496                  case 0: default: vassert(0);
6497               }
6498               switch (size) {
6499                  case 1: add = Iop_Add16x8; break;
6500                  case 2: add = Iop_Add32x4; break;
6501                  case 3: add = Iop_Add64x2; break;
6502                  case 0: default: vassert(0);
6503               }
6504            }
6505         }
6506         reg_m = newTemp(Ity_V128);
6507         res = newTemp(Ity_V128);
6508         assign(reg_m, getQReg(mreg));
6509         if (B) {
6510            /* VQRSHRN, VQRSHRUN */
6511            assign(res, binop(add,
6512                              binop(op, mkexpr(reg_m), mkU8(shift_imm)),
6513                              binop(Iop_AndV128,
6514                                    binop(op,
6515                                          mkexpr(reg_m),
6516                                          mkU8(shift_imm - 1)),
6517                                    mkU128(imm))));
6518         } else {
6519            /* VQSHRN, VQSHRUN */
6520            assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6521         }
6522#ifndef DISABLE_QC_FLAG
6523         setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
6524                    True, condT);
6525#endif
6526         putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
6527         return True;
6528      case 10:
6529         /* VSHLL
6530            VMOVL ::= VSHLL #0 */
6531         if (B)
6532            return False;
6533         if (dreg & 1)
6534            return False;
6535         dreg >>= 1;
6536         shift_imm = (8 << size) - shift_imm;
6537         res = newTemp(Ity_V128);
6538         switch (size) {
6539            case 0:
6540               op = Iop_ShlN16x8;
6541               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6542               break;
6543            case 1:
6544               op = Iop_ShlN32x4;
6545               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6546               break;
6547            case 2:
6548               op = Iop_ShlN64x2;
6549               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6550               break;
6551            case 3:
6552               return False;
6553            default:
6554               vassert(0);
6555         }
6556         assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
6557         putQReg(dreg, mkexpr(res), condT);
6558         if (shift_imm == 0) {
6559            DIP("vmovl.%c%u q%u, d%u\n", U ? 'u' : 's', 8 << size,
6560                dreg, mreg);
6561         } else {
6562            DIP("vshll.%c%u q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
6563                dreg, mreg, shift_imm);
6564         }
6565         return True;
6566      case 14:
6567      case 15:
6568         /* VCVT floating-point <-> fixed-point */
6569         if ((theInstr >> 8) & 1) {
6570            if (U) {
6571               op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
6572            } else {
6573               op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
6574            }
6575            DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6576                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6577                64 - ((theInstr >> 16) & 0x3f));
6578         } else {
6579            if (U) {
6580               op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
6581            } else {
6582               op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
6583            }
6584            DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6585                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6586                64 - ((theInstr >> 16) & 0x3f));
6587         }
6588         if (((theInstr >> 21) & 1) == 0)
6589            return False;
6590         if (Q) {
6591            putQReg(dreg, binop(op, getQReg(mreg),
6592                     mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6593         } else {
6594            putDRegI64(dreg, binop(op, getDRegI64(mreg),
6595                       mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6596         }
6597         return True;
6598      default:
6599         return False;
6600
6601   }
6602   return False;
6603}
6604
6605/* A7.4.5 Two registers, miscellaneous */
6606static
6607Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
6608{
6609   UInt A = (theInstr >> 16) & 3;
6610   UInt B = (theInstr >> 6) & 0x1f;
6611   UInt Q = (theInstr >> 6) & 1;
6612   UInt U = (theInstr >> 24) & 1;
6613   UInt size = (theInstr >> 18) & 3;
6614   UInt dreg = get_neon_d_regno(theInstr);
6615   UInt mreg = get_neon_m_regno(theInstr);
6616   UInt F = (theInstr >> 10) & 1;
6617   IRTemp arg_d;
6618   IRTemp arg_m;
6619   IRTemp res;
6620   switch (A) {
6621      case 0:
6622         if (Q) {
6623            arg_m = newTemp(Ity_V128);
6624            res = newTemp(Ity_V128);
6625            assign(arg_m, getQReg(mreg));
6626         } else {
6627            arg_m = newTemp(Ity_I64);
6628            res = newTemp(Ity_I64);
6629            assign(arg_m, getDRegI64(mreg));
6630         }
6631         switch (B >> 1) {
6632            case 0: {
6633               /* VREV64 */
6634               IROp op;
6635               switch (size) {
6636                  case 0:
6637                     op = Q ? Iop_Reverse64_8x16 : Iop_Reverse64_8x8;
6638                     break;
6639                  case 1:
6640                     op = Q ? Iop_Reverse64_16x8 : Iop_Reverse64_16x4;
6641                     break;
6642                  case 2:
6643                     op = Q ? Iop_Reverse64_32x4 : Iop_Reverse64_32x2;
6644                     break;
6645                  case 3:
6646                     return False;
6647                  default:
6648                     vassert(0);
6649               }
6650               assign(res, unop(op, mkexpr(arg_m)));
6651               DIP("vrev64.%u %c%u, %c%u\n", 8 << size,
6652                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6653               break;
6654            }
6655            case 1: {
6656               /* VREV32 */
6657               IROp op;
6658               switch (size) {
6659                  case 0:
6660                     op = Q ? Iop_Reverse32_8x16 : Iop_Reverse32_8x8;
6661                     break;
6662                  case 1:
6663                     op = Q ? Iop_Reverse32_16x8 : Iop_Reverse32_16x4;
6664                     break;
6665                  case 2:
6666                  case 3:
6667                     return False;
6668                  default:
6669                     vassert(0);
6670               }
6671               assign(res, unop(op, mkexpr(arg_m)));
6672               DIP("vrev32.%u %c%u, %c%u\n", 8 << size,
6673                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6674               break;
6675            }
6676            case 2: {
6677               /* VREV16 */
6678               IROp op;
6679               switch (size) {
6680                  case 0:
6681                     op = Q ? Iop_Reverse16_8x16 : Iop_Reverse16_8x8;
6682                     break;
6683                  case 1:
6684                  case 2:
6685                  case 3:
6686                     return False;
6687                  default:
6688                     vassert(0);
6689               }
6690               assign(res, unop(op, mkexpr(arg_m)));
6691               DIP("vrev16.%u %c%u, %c%u\n", 8 << size,
6692                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6693               break;
6694            }
6695            case 3:
6696               return False;
6697            case 4:
6698            case 5: {
6699               /* VPADDL */
6700               IROp op;
6701               U = (theInstr >> 7) & 1;
6702               if (Q) {
6703                  switch (size) {
6704                     case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
6705                     case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
6706                     case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
6707                     case 3: return False;
6708                     default: vassert(0);
6709                  }
6710               } else {
6711                  switch (size) {
6712                     case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
6713                     case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
6714                     case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
6715                     case 3: return False;
6716                     default: vassert(0);
6717                  }
6718               }
6719               assign(res, unop(op, mkexpr(arg_m)));
6720               DIP("vpaddl.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6721                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6722               break;
6723            }
6724            case 6:
6725            case 7:
6726               return False;
6727            case 8: {
6728               /* VCLS */
6729               IROp op;
6730               switch (size) {
6731                  case 0: op = Q ? Iop_Cls8Sx16 : Iop_Cls8Sx8; break;
6732                  case 1: op = Q ? Iop_Cls16Sx8 : Iop_Cls16Sx4; break;
6733                  case 2: op = Q ? Iop_Cls32Sx4 : Iop_Cls32Sx2; break;
6734                  case 3: return False;
6735                  default: vassert(0);
6736               }
6737               assign(res, unop(op, mkexpr(arg_m)));
6738               DIP("vcls.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6739                   Q ? 'q' : 'd', mreg);
6740               break;
6741            }
6742            case 9: {
6743               /* VCLZ */
6744               IROp op;
6745               switch (size) {
6746                  case 0: op = Q ? Iop_Clz8Sx16 : Iop_Clz8Sx8; break;
6747                  case 1: op = Q ? Iop_Clz16Sx8 : Iop_Clz16Sx4; break;
6748                  case 2: op = Q ? Iop_Clz32Sx4 : Iop_Clz32Sx2; break;
6749                  case 3: return False;
6750                  default: vassert(0);
6751               }
6752               assign(res, unop(op, mkexpr(arg_m)));
6753               DIP("vclz.i%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6754                   Q ? 'q' : 'd', mreg);
6755               break;
6756            }
6757            case 10:
6758               /* VCNT */
6759               assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
6760               DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6761                   mreg);
6762               break;
6763            case 11:
6764               /* VMVN */
6765               if (Q)
6766                  assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
6767               else
6768                  assign(res, unop(Iop_Not64, mkexpr(arg_m)));
6769               DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6770                   mreg);
6771               break;
6772            case 12:
6773            case 13: {
6774               /* VPADAL */
6775               IROp op, add_op;
6776               U = (theInstr >> 7) & 1;
6777               if (Q) {
6778                  switch (size) {
6779                     case 0:
6780                        op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
6781                        add_op = Iop_Add16x8;
6782                        break;
6783                     case 1:
6784                        op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
6785                        add_op = Iop_Add32x4;
6786                        break;
6787                     case 2:
6788                        op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
6789                        add_op = Iop_Add64x2;
6790                        break;
6791                     case 3:
6792                        return False;
6793                     default:
6794                        vassert(0);
6795                  }
6796               } else {
6797                  switch (size) {
6798                     case 0:
6799                        op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
6800                        add_op = Iop_Add16x4;
6801                        break;
6802                     case 1:
6803                        op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
6804                        add_op = Iop_Add32x2;
6805                        break;
6806                     case 2:
6807                        op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
6808                        add_op = Iop_Add64;
6809                        break;
6810                     case 3:
6811                        return False;
6812                     default:
6813                        vassert(0);
6814                  }
6815               }
6816               if (Q) {
6817                  arg_d = newTemp(Ity_V128);
6818                  assign(arg_d, getQReg(dreg));
6819               } else {
6820                  arg_d = newTemp(Ity_I64);
6821                  assign(arg_d, getDRegI64(dreg));
6822               }
6823               assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
6824                                         mkexpr(arg_d)));
6825               DIP("vpadal.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6826                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6827               break;
6828            }
6829            case 14: {
6830               /* VQABS */
6831               IROp op_sub, op_qsub, op_cmp;
6832               IRTemp mask, tmp;
6833               IRExpr *zero1, *zero2;
6834               IRExpr *neg, *neg2;
6835               if (Q) {
6836                  zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6837                  zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6838                  mask = newTemp(Ity_V128);
6839                  tmp = newTemp(Ity_V128);
6840               } else {
6841                  zero1 = mkU64(0);
6842                  zero2 = mkU64(0);
6843                  mask = newTemp(Ity_I64);
6844                  tmp = newTemp(Ity_I64);
6845               }
6846               switch (size) {
6847                  case 0:
6848                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6849                     op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6850                     op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
6851                     break;
6852                  case 1:
6853                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6854                     op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6855                     op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
6856                     break;
6857                  case 2:
6858                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6859                     op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6860                     op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
6861                     break;
6862                  case 3:
6863                     return False;
6864                  default:
6865                     vassert(0);
6866               }
6867               assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
6868               neg = binop(op_qsub, zero2, mkexpr(arg_m));
6869               neg2 = binop(op_sub, zero2, mkexpr(arg_m));
6870               assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
6871                                 binop(Q ? Iop_AndV128 : Iop_And64,
6872                                       mkexpr(mask),
6873                                       mkexpr(arg_m)),
6874                                 binop(Q ? Iop_AndV128 : Iop_And64,
6875                                       unop(Q ? Iop_NotV128 : Iop_Not64,
6876                                            mkexpr(mask)),
6877                                       neg)));
6878#ifndef DISABLE_QC_FLAG
6879               assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
6880                                 binop(Q ? Iop_AndV128 : Iop_And64,
6881                                       mkexpr(mask),
6882                                       mkexpr(arg_m)),
6883                                 binop(Q ? Iop_AndV128 : Iop_And64,
6884                                       unop(Q ? Iop_NotV128 : Iop_Not64,
6885                                            mkexpr(mask)),
6886                                       neg2)));
6887               setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
6888#endif
6889               DIP("vqabs.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6890                   Q ? 'q' : 'd', mreg);
6891               break;
6892            }
6893            case 15: {
6894               /* VQNEG */
6895               IROp op, op2;
6896               IRExpr *zero;
6897               if (Q) {
6898                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6899               } else {
6900                  zero = mkU64(0);
6901               }
6902               switch (size) {
6903                  case 0:
6904                     op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6905                     op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6906                     break;
6907                  case 1:
6908                     op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6909                     op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6910                     break;
6911                  case 2:
6912                     op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6913                     op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6914                     break;
6915                  case 3:
6916                     return False;
6917                  default:
6918                     vassert(0);
6919               }
6920               assign(res, binop(op, zero, mkexpr(arg_m)));
6921#ifndef DISABLE_QC_FLAG
6922               setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
6923                          Q, condT);
6924#endif
6925               DIP("vqneg.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6926                   Q ? 'q' : 'd', mreg);
6927               break;
6928            }
6929            default:
6930               vassert(0);
6931         }
6932         if (Q) {
6933            putQReg(dreg, mkexpr(res), condT);
6934         } else {
6935            putDRegI64(dreg, mkexpr(res), condT);
6936         }
6937         return True;
6938      case 1:
6939         if (Q) {
6940            arg_m = newTemp(Ity_V128);
6941            res = newTemp(Ity_V128);
6942            assign(arg_m, getQReg(mreg));
6943         } else {
6944            arg_m = newTemp(Ity_I64);
6945            res = newTemp(Ity_I64);
6946            assign(arg_m, getDRegI64(mreg));
6947         }
6948         switch ((B >> 1) & 0x7) {
6949            case 0: {
6950               /* VCGT #0 */
6951               IRExpr *zero;
6952               IROp op;
6953               if (Q) {
6954                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6955               } else {
6956                  zero = mkU64(0);
6957               }
6958               if (F) {
6959                  switch (size) {
6960                     case 0: case 1: case 3: return False;
6961                     case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
6962                     default: vassert(0);
6963                  }
6964               } else {
6965                  switch (size) {
6966                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
6967                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
6968                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
6969                     case 3: return False;
6970                     default: vassert(0);
6971                  }
6972               }
6973               assign(res, binop(op, mkexpr(arg_m), zero));
6974               DIP("vcgt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
6975                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6976               break;
6977            }
6978            case 1: {
6979               /* VCGE #0 */
6980               IROp op;
6981               IRExpr *zero;
6982               if (Q) {
6983                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6984               } else {
6985                  zero = mkU64(0);
6986               }
6987               if (F) {
6988                  switch (size) {
6989                     case 0: case 1: case 3: return False;
6990                     case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
6991                     default: vassert(0);
6992                  }
6993                  assign(res, binop(op, mkexpr(arg_m), zero));
6994               } else {
6995                  switch (size) {
6996                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
6997                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
6998                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
6999                     case 3: return False;
7000                     default: vassert(0);
7001                  }
7002                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7003                                   binop(op, zero, mkexpr(arg_m))));
7004               }
7005               DIP("vcge.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7006                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7007               break;
7008            }
7009            case 2: {
7010               /* VCEQ #0 */
7011               IROp op;
7012               IRExpr *zero;
7013               if (F) {
7014                  if (Q) {
7015                     zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7016                  } else {
7017                     zero = mkU64(0);
7018                  }
7019                  switch (size) {
7020                     case 0: case 1: case 3: return False;
7021                     case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
7022                     default: vassert(0);
7023                  }
7024                  assign(res, binop(op, zero, mkexpr(arg_m)));
7025               } else {
7026                  switch (size) {
7027                     case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
7028                     case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
7029                     case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
7030                     case 3: return False;
7031                     default: vassert(0);
7032                  }
7033                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7034                                   unop(op, mkexpr(arg_m))));
7035               }
7036               DIP("vceq.%c%u %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
7037                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7038               break;
7039            }
7040            case 3: {
7041               /* VCLE #0 */
7042               IRExpr *zero;
7043               IROp op;
7044               if (Q) {
7045                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7046               } else {
7047                  zero = mkU64(0);
7048               }
7049               if (F) {
7050                  switch (size) {
7051                     case 0: case 1: case 3: return False;
7052                     case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7053                     default: vassert(0);
7054                  }
7055                  assign(res, binop(op, zero, mkexpr(arg_m)));
7056               } else {
7057                  switch (size) {
7058                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7059                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7060                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7061                     case 3: return False;
7062                     default: vassert(0);
7063                  }
7064                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7065                                   binop(op, mkexpr(arg_m), zero)));
7066               }
7067               DIP("vcle.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7068                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7069               break;
7070            }
7071            case 4: {
7072               /* VCLT #0 */
7073               IROp op;
7074               IRExpr *zero;
7075               if (Q) {
7076                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7077               } else {
7078                  zero = mkU64(0);
7079               }
7080               if (F) {
7081                  switch (size) {
7082                     case 0: case 1: case 3: return False;
7083                     case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7084                     default: vassert(0);
7085                  }
7086                  assign(res, binop(op, zero, mkexpr(arg_m)));
7087               } else {
7088                  switch (size) {
7089                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7090                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7091                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7092                     case 3: return False;
7093                     default: vassert(0);
7094                  }
7095                  assign(res, binop(op, zero, mkexpr(arg_m)));
7096               }
7097               DIP("vclt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7098                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7099               break;
7100            }
7101            case 5:
7102               return False;
7103            case 6: {
7104               /* VABS */
7105               if (!F) {
7106                  IROp op;
7107                  switch(size) {
7108                     case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
7109                     case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
7110                     case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
7111                     case 3: return False;
7112                     default: vassert(0);
7113                  }
7114                  assign(res, unop(op, mkexpr(arg_m)));
7115               } else {
7116                  assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
7117                                   mkexpr(arg_m)));
7118               }
7119               DIP("vabs.%c%u %c%u, %c%u\n",
7120                   F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7121                   Q ? 'q' : 'd', mreg);
7122               break;
7123            }
7124            case 7: {
7125               /* VNEG */
7126               IROp op;
7127               IRExpr *zero;
7128               if (F) {
7129                  switch (size) {
7130                     case 0: case 1: case 3: return False;
7131                     case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
7132                     default: vassert(0);
7133                  }
7134                  assign(res, unop(op, mkexpr(arg_m)));
7135               } else {
7136                  if (Q) {
7137                     zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7138                  } else {
7139                     zero = mkU64(0);
7140                  }
7141                  switch (size) {
7142                     case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
7143                     case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
7144                     case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
7145                     case 3: return False;
7146                     default: vassert(0);
7147                  }
7148                  assign(res, binop(op, zero, mkexpr(arg_m)));
7149               }
7150               DIP("vneg.%c%u %c%u, %c%u\n",
7151                   F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7152                   Q ? 'q' : 'd', mreg);
7153               break;
7154            }
7155            default:
7156               vassert(0);
7157         }
7158         if (Q) {
7159            putQReg(dreg, mkexpr(res), condT);
7160         } else {
7161            putDRegI64(dreg, mkexpr(res), condT);
7162         }
7163         return True;
7164      case 2:
7165         if ((B >> 1) == 0) {
7166            /* VSWP */
7167            if (Q) {
7168               arg_m = newTemp(Ity_V128);
7169               assign(arg_m, getQReg(mreg));
7170               putQReg(mreg, getQReg(dreg), condT);
7171               putQReg(dreg, mkexpr(arg_m), condT);
7172            } else {
7173               arg_m = newTemp(Ity_I64);
7174               assign(arg_m, getDRegI64(mreg));
7175               putDRegI64(mreg, getDRegI64(dreg), condT);
7176               putDRegI64(dreg, mkexpr(arg_m), condT);
7177            }
7178            DIP("vswp %c%u, %c%u\n",
7179                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7180            return True;
7181         } else if ((B >> 1) == 1) {
7182            /* VTRN */
7183            IROp op_lo, op_hi;
7184            IRTemp res1, res2;
7185            if (Q) {
7186               arg_m = newTemp(Ity_V128);
7187               arg_d = newTemp(Ity_V128);
7188               res1 = newTemp(Ity_V128);
7189               res2 = newTemp(Ity_V128);
7190               assign(arg_m, getQReg(mreg));
7191               assign(arg_d, getQReg(dreg));
7192            } else {
7193               res1 = newTemp(Ity_I64);
7194               res2 = newTemp(Ity_I64);
7195               arg_m = newTemp(Ity_I64);
7196               arg_d = newTemp(Ity_I64);
7197               assign(arg_m, getDRegI64(mreg));
7198               assign(arg_d, getDRegI64(dreg));
7199            }
7200            if (Q) {
7201               switch (size) {
7202                  case 0:
7203                     op_lo = Iop_InterleaveOddLanes8x16;
7204                     op_hi = Iop_InterleaveEvenLanes8x16;
7205                     break;
7206                  case 1:
7207                     op_lo = Iop_InterleaveOddLanes16x8;
7208                     op_hi = Iop_InterleaveEvenLanes16x8;
7209                     break;
7210                  case 2:
7211                     op_lo = Iop_InterleaveOddLanes32x4;
7212                     op_hi = Iop_InterleaveEvenLanes32x4;
7213                     break;
7214                  case 3:
7215                     return False;
7216                  default:
7217                     vassert(0);
7218               }
7219            } else {
7220               switch (size) {
7221                  case 0:
7222                     op_lo = Iop_InterleaveOddLanes8x8;
7223                     op_hi = Iop_InterleaveEvenLanes8x8;
7224                     break;
7225                  case 1:
7226                     op_lo = Iop_InterleaveOddLanes16x4;
7227                     op_hi = Iop_InterleaveEvenLanes16x4;
7228                     break;
7229                  case 2:
7230                     op_lo = Iop_InterleaveLO32x2;
7231                     op_hi = Iop_InterleaveHI32x2;
7232                     break;
7233                  case 3:
7234                     return False;
7235                  default:
7236                     vassert(0);
7237               }
7238            }
7239            assign(res1, binop(op_lo, mkexpr(arg_m), mkexpr(arg_d)));
7240            assign(res2, binop(op_hi, mkexpr(arg_m), mkexpr(arg_d)));
7241            if (Q) {
7242               putQReg(dreg, mkexpr(res1), condT);
7243               putQReg(mreg, mkexpr(res2), condT);
7244            } else {
7245               putDRegI64(dreg, mkexpr(res1), condT);
7246               putDRegI64(mreg, mkexpr(res2), condT);
7247            }
7248            DIP("vtrn.%u %c%u, %c%u\n",
7249                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7250            return True;
7251         } else if ((B >> 1) == 2) {
7252            /* VUZP */
7253            IROp op_lo, op_hi;
7254            IRTemp res1, res2;
7255            if (!Q && size == 2)
7256               return False;
7257            if (Q) {
7258               arg_m = newTemp(Ity_V128);
7259               arg_d = newTemp(Ity_V128);
7260               res1 = newTemp(Ity_V128);
7261               res2 = newTemp(Ity_V128);
7262               assign(arg_m, getQReg(mreg));
7263               assign(arg_d, getQReg(dreg));
7264            } else {
7265               res1 = newTemp(Ity_I64);
7266               res2 = newTemp(Ity_I64);
7267               arg_m = newTemp(Ity_I64);
7268               arg_d = newTemp(Ity_I64);
7269               assign(arg_m, getDRegI64(mreg));
7270               assign(arg_d, getDRegI64(dreg));
7271            }
7272            switch (size) {
7273               case 0:
7274                  op_lo = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
7275                  op_hi = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
7276                  break;
7277               case 1:
7278                  op_lo = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
7279                  op_hi = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
7280                  break;
7281               case 2:
7282                  op_lo = Iop_CatOddLanes32x4;
7283                  op_hi = Iop_CatEvenLanes32x4;
7284                  break;
7285               case 3:
7286                  return False;
7287               default:
7288                  vassert(0);
7289            }
7290            assign(res1, binop(op_lo, mkexpr(arg_m), mkexpr(arg_d)));
7291            assign(res2, binop(op_hi, mkexpr(arg_m), mkexpr(arg_d)));
7292            if (Q) {
7293               putQReg(dreg, mkexpr(res1), condT);
7294               putQReg(mreg, mkexpr(res2), condT);
7295            } else {
7296               putDRegI64(dreg, mkexpr(res1), condT);
7297               putDRegI64(mreg, mkexpr(res2), condT);
7298            }
7299            DIP("vuzp.%u %c%u, %c%u\n",
7300                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7301            return True;
7302         } else if ((B >> 1) == 3) {
7303            /* VZIP */
7304            IROp op_lo, op_hi;
7305            IRTemp res1, res2;
7306            if (!Q && size == 2)
7307               return False;
7308            if (Q) {
7309               arg_m = newTemp(Ity_V128);
7310               arg_d = newTemp(Ity_V128);
7311               res1 = newTemp(Ity_V128);
7312               res2 = newTemp(Ity_V128);
7313               assign(arg_m, getQReg(mreg));
7314               assign(arg_d, getQReg(dreg));
7315            } else {
7316               res1 = newTemp(Ity_I64);
7317               res2 = newTemp(Ity_I64);
7318               arg_m = newTemp(Ity_I64);
7319               arg_d = newTemp(Ity_I64);
7320               assign(arg_m, getDRegI64(mreg));
7321               assign(arg_d, getDRegI64(dreg));
7322            }
7323            switch (size) {
7324               case 0:
7325                  op_lo = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
7326                  op_hi = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
7327                  break;
7328               case 1:
7329                  op_lo = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
7330                  op_hi = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
7331                  break;
7332               case 2:
7333                  op_lo = Iop_InterleaveHI32x4;
7334                  op_hi = Iop_InterleaveLO32x4;
7335                  break;
7336               case 3:
7337                  return False;
7338               default:
7339                  vassert(0);
7340            }
7341            assign(res1, binop(op_lo, mkexpr(arg_m), mkexpr(arg_d)));
7342            assign(res2, binop(op_hi, mkexpr(arg_m), mkexpr(arg_d)));
7343            if (Q) {
7344               putQReg(dreg, mkexpr(res1), condT);
7345               putQReg(mreg, mkexpr(res2), condT);
7346            } else {
7347               putDRegI64(dreg, mkexpr(res1), condT);
7348               putDRegI64(mreg, mkexpr(res2), condT);
7349            }
7350            DIP("vzip.%u %c%u, %c%u\n",
7351                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7352            return True;
7353         } else if (B == 8) {
7354            /* VMOVN */
7355            IROp op;
7356            mreg >>= 1;
7357            switch (size) {
7358               case 0: op = Iop_NarrowUn16to8x8;  break;
7359               case 1: op = Iop_NarrowUn32to16x4; break;
7360               case 2: op = Iop_NarrowUn64to32x2; break;
7361               case 3: return False;
7362               default: vassert(0);
7363            }
7364            putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
7365            DIP("vmovn.i%u d%u, q%u\n", 16 << size, dreg, mreg);
7366            return True;
7367         } else if (B == 9 || (B >> 1) == 5) {
7368            /* VQMOVN, VQMOVUN */
7369            IROp op, op2;
7370            IRTemp tmp;
7371            dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
7372            mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
7373            if (mreg & 1)
7374               return False;
7375            mreg >>= 1;
7376            switch (size) {
7377               case 0: op2 = Iop_NarrowUn16to8x8;  break;
7378               case 1: op2 = Iop_NarrowUn32to16x4; break;
7379               case 2: op2 = Iop_NarrowUn64to32x2; break;
7380               case 3: return False;
7381               default: vassert(0);
7382            }
7383            switch (B & 3) {
7384               case 0:
7385                  vassert(0);
7386               case 1:
7387                  switch (size) {
7388                     case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
7389                     case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
7390                     case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
7391                     case 3: return False;
7392                     default: vassert(0);
7393                  }
7394                  DIP("vqmovun.s%u d%u, q%u\n", 16 << size, dreg, mreg);
7395                  break;
7396               case 2:
7397                  switch (size) {
7398                     case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
7399                     case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
7400                     case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
7401                     case 3: return False;
7402                     default: vassert(0);
7403                  }
7404                  DIP("vqmovn.s%u d%u, q%u\n", 16 << size, dreg, mreg);
7405                  break;
7406               case 3:
7407                  switch (size) {
7408                     case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
7409                     case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
7410                     case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
7411                     case 3: return False;
7412                     default: vassert(0);
7413                  }
7414                  DIP("vqmovn.u%u d%u, q%u\n", 16 << size, dreg, mreg);
7415                  break;
7416               default:
7417                  vassert(0);
7418            }
7419            res = newTemp(Ity_I64);
7420            tmp = newTemp(Ity_I64);
7421            assign(res, unop(op, getQReg(mreg)));
7422#ifndef DISABLE_QC_FLAG
7423            assign(tmp, unop(op2, getQReg(mreg)));
7424            setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
7425#endif
7426            putDRegI64(dreg, mkexpr(res), condT);
7427            return True;
7428         } else if (B == 12) {
7429            /* VSHLL (maximum shift) */
7430            IROp op, cvt;
7431            UInt shift_imm;
7432            if (Q)
7433               return False;
7434            if (dreg & 1)
7435               return False;
7436            dreg >>= 1;
7437            shift_imm = 8 << size;
7438            res = newTemp(Ity_V128);
7439            switch (size) {
7440               case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
7441               case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
7442               case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
7443               case 3: return False;
7444               default: vassert(0);
7445            }
7446            assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
7447                                  mkU8(shift_imm)));
7448            putQReg(dreg, mkexpr(res), condT);
7449            DIP("vshll.i%u q%u, d%u, #%u\n", 8 << size, dreg, mreg, 8 << size);
7450            return True;
7451         } else if ((B >> 3) == 3 && (B & 3) == 0) {
7452            /* VCVT (half<->single) */
7453            /* Half-precision extensions are needed to run this */
7454            vassert(0); // ATC
7455            if (((theInstr >> 18) & 3) != 1)
7456               return False;
7457            if ((theInstr >> 8) & 1) {
7458               if (dreg & 1)
7459                  return False;
7460               dreg >>= 1;
7461               putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
7462                     condT);
7463               DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
7464            } else {
7465               if (mreg & 1)
7466                  return False;
7467               mreg >>= 1;
7468               putDRegI64(dreg, unop(Iop_F32toF16x4, getQReg(mreg)),
7469                                condT);
7470               DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
7471            }
7472            return True;
7473         } else {
7474            return False;
7475         }
7476         vassert(0);
7477         return True;
7478      case 3:
7479         if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
7480            /* VRECPE */
7481            IROp op;
7482            F = (theInstr >> 8) & 1;
7483            if (size != 2)
7484               return False;
7485            if (Q) {
7486               op = F ? Iop_Recip32Fx4 : Iop_Recip32x4;
7487               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7488               DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7489            } else {
7490               op = F ? Iop_Recip32Fx2 : Iop_Recip32x2;
7491               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7492               DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7493            }
7494            return True;
7495         } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
7496            /* VRSQRTE */
7497            IROp op;
7498            F = (B >> 2) & 1;
7499            if (size != 2)
7500               return False;
7501            if (F) {
7502               /* fp */
7503               op = Q ? Iop_Rsqrte32Fx4 : Iop_Rsqrte32Fx2;
7504            } else {
7505               /* unsigned int */
7506               op = Q ? Iop_Rsqrte32x4 : Iop_Rsqrte32x2;
7507            }
7508            if (Q) {
7509               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7510               DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7511            } else {
7512               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7513               DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7514            }
7515            return True;
7516         } else if ((B >> 3) == 3) {
7517            /* VCVT (fp<->integer) */
7518            IROp op;
7519            if (size != 2)
7520               return False;
7521            switch ((B >> 1) & 3) {
7522               case 0:
7523                  op = Q ? Iop_I32StoFx4 : Iop_I32StoFx2;
7524                  DIP("vcvt.f32.s32 %c%u, %c%u\n",
7525                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7526                  break;
7527               case 1:
7528                  op = Q ? Iop_I32UtoFx4 : Iop_I32UtoFx2;
7529                  DIP("vcvt.f32.u32 %c%u, %c%u\n",
7530                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7531                  break;
7532               case 2:
7533                  op = Q ? Iop_FtoI32Sx4_RZ : Iop_FtoI32Sx2_RZ;
7534                  DIP("vcvt.s32.f32 %c%u, %c%u\n",
7535                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7536                  break;
7537               case 3:
7538                  op = Q ? Iop_FtoI32Ux4_RZ : Iop_FtoI32Ux2_RZ;
7539                  DIP("vcvt.u32.f32 %c%u, %c%u\n",
7540                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7541                  break;
7542               default:
7543                  vassert(0);
7544            }
7545            if (Q) {
7546               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7547            } else {
7548               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7549            }
7550            return True;
7551         } else {
7552            return False;
7553         }
7554         vassert(0);
7555         return True;
7556      default:
7557         vassert(0);
7558   }
7559   return False;
7560}
7561
7562/* A7.4.6 One register and a modified immediate value */
7563static
7564void ppNeonImm(UInt imm, UInt cmode, UInt op)
7565{
7566   int i;
7567   switch (cmode) {
7568      case 0: case 1: case 8: case 9:
7569         vex_printf("0x%x", imm);
7570         break;
7571      case 2: case 3: case 10: case 11:
7572         vex_printf("0x%x00", imm);
7573         break;
7574      case 4: case 5:
7575         vex_printf("0x%x0000", imm);
7576         break;
7577      case 6: case 7:
7578         vex_printf("0x%x000000", imm);
7579         break;
7580      case 12:
7581         vex_printf("0x%xff", imm);
7582         break;
7583      case 13:
7584         vex_printf("0x%xffff", imm);
7585         break;
7586      case 14:
7587         if (op) {
7588            vex_printf("0x");
7589            for (i = 7; i >= 0; i--)
7590               vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
7591         } else {
7592            vex_printf("0x%x", imm);
7593         }
7594         break;
7595      case 15:
7596         vex_printf("0x%x", imm);
7597         break;
7598   }
7599}
7600
7601static
7602const char *ppNeonImmType(UInt cmode, UInt op)
7603{
7604   switch (cmode) {
7605      case 0 ... 7:
7606      case 12: case 13:
7607         return "i32";
7608      case 8 ... 11:
7609         return "i16";
7610      case 14:
7611         if (op)
7612            return "i64";
7613         else
7614            return "i8";
7615      case 15:
7616         if (op)
7617            vassert(0);
7618         else
7619            return "f32";
7620      default:
7621         vassert(0);
7622   }
7623}
7624
7625static
7626void DIPimm(UInt imm, UInt cmode, UInt op,
7627            const char *instr, UInt Q, UInt dreg)
7628{
7629   if (vex_traceflags & VEX_TRACE_FE) {
7630      vex_printf("%s.%s %c%u, #", instr,
7631                 ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
7632      ppNeonImm(imm, cmode, op);
7633      vex_printf("\n");
7634   }
7635}
7636
7637static
7638Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
7639{
7640   UInt dreg = get_neon_d_regno(theInstr);
7641   ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
7642                  (theInstr & 0xf);
7643   ULong imm_raw_pp = imm_raw;
7644   UInt cmode = (theInstr >> 8) & 0xf;
7645   UInt op_bit = (theInstr >> 5) & 1;
7646   ULong imm = 0;
7647   UInt Q = (theInstr >> 6) & 1;
7648   int i, j;
7649   UInt tmp;
7650   IRExpr *imm_val;
7651   IRExpr *expr;
7652   IRTemp tmp_var;
7653   switch(cmode) {
7654      case 7: case 6:
7655         imm_raw = imm_raw << 8;
7656         /* fallthrough */
7657      case 5: case 4:
7658         imm_raw = imm_raw << 8;
7659         /* fallthrough */
7660      case 3: case 2:
7661         imm_raw = imm_raw << 8;
7662         /* fallthrough */
7663      case 0: case 1:
7664         imm = (imm_raw << 32) | imm_raw;
7665         break;
7666      case 11: case 10:
7667         imm_raw = imm_raw << 8;
7668         /* fallthrough */
7669      case 9: case 8:
7670         imm_raw = (imm_raw << 16) | imm_raw;
7671         imm = (imm_raw << 32) | imm_raw;
7672         break;
7673      case 13:
7674         imm_raw = (imm_raw << 8) | 0xff;
7675         /* fallthrough */
7676      case 12:
7677         imm_raw = (imm_raw << 8) | 0xff;
7678         imm = (imm_raw << 32) | imm_raw;
7679         break;
7680      case 14:
7681         if (! op_bit) {
7682            for(i = 0; i < 8; i++) {
7683               imm = (imm << 8) | imm_raw;
7684            }
7685         } else {
7686            for(i = 7; i >= 0; i--) {
7687               tmp = 0;
7688               for(j = 0; j < 8; j++) {
7689                  tmp = (tmp << 1) | ((imm_raw >> i) & 1);
7690               }
7691               imm = (imm << 8) | tmp;
7692            }
7693         }
7694         break;
7695      case 15:
7696         imm = (imm_raw & 0x80) << 5;
7697         imm |= ((~imm_raw & 0x40) << 5);
7698         for(i = 1; i <= 4; i++)
7699            imm |= (imm_raw & 0x40) << i;
7700         imm |= (imm_raw & 0x7f);
7701         imm = imm << 19;
7702         imm = (imm << 32) | imm;
7703         break;
7704      default:
7705         return False;
7706   }
7707   if (Q) {
7708      imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
7709   } else {
7710      imm_val = mkU64(imm);
7711   }
7712   if (((op_bit == 0) &&
7713      (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
7714      ((op_bit == 1) && (cmode == 14))) {
7715      /* VMOV (immediate) */
7716      if (Q) {
7717         putQReg(dreg, imm_val, condT);
7718      } else {
7719         putDRegI64(dreg, imm_val, condT);
7720      }
7721      DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
7722      return True;
7723   }
7724   if ((op_bit == 1) &&
7725      (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
7726      /* VMVN (immediate) */
7727      if (Q) {
7728         putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
7729      } else {
7730         putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
7731      }
7732      DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
7733      return True;
7734   }
7735   if (Q) {
7736      tmp_var = newTemp(Ity_V128);
7737      assign(tmp_var, getQReg(dreg));
7738   } else {
7739      tmp_var = newTemp(Ity_I64);
7740      assign(tmp_var, getDRegI64(dreg));
7741   }
7742   if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7743      /* VORR (immediate) */
7744      if (Q)
7745         expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
7746      else
7747         expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
7748      DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
7749   } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7750      /* VBIC (immediate) */
7751      if (Q)
7752         expr = binop(Iop_AndV128, mkexpr(tmp_var),
7753                                   unop(Iop_NotV128, imm_val));
7754      else
7755         expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
7756      DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
7757   } else {
7758      return False;
7759   }
7760   if (Q)
7761      putQReg(dreg, expr, condT);
7762   else
7763      putDRegI64(dreg, expr, condT);
7764   return True;
7765}
7766
7767/* A7.4 Advanced SIMD data-processing instructions */
7768static
7769Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
7770{
7771   UInt A = (theInstr >> 19) & 0x1F;
7772   UInt B = (theInstr >>  8) & 0xF;
7773   UInt C = (theInstr >>  4) & 0xF;
7774   UInt U = (theInstr >> 24) & 0x1;
7775
7776   if (! (A & 0x10)) {
7777      return dis_neon_data_3same(theInstr, condT);
7778   }
7779   if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
7780      return dis_neon_data_1reg_and_imm(theInstr, condT);
7781   }
7782   if ((C & 1) == 1) {
7783      return dis_neon_data_2reg_and_shift(theInstr, condT);
7784   }
7785   if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7786      return dis_neon_data_3diff(theInstr, condT);
7787   }
7788   if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7789      return dis_neon_data_2reg_and_scalar(theInstr, condT);
7790   }
7791   if ((A & 0x16) == 0x16) {
7792      if ((U == 0) && ((C & 1) == 0)) {
7793         return dis_neon_vext(theInstr, condT);
7794      }
7795      if ((U != 1) || ((C & 1) == 1))
7796         return False;
7797      if ((B & 8) == 0) {
7798         return dis_neon_data_2reg_misc(theInstr, condT);
7799      }
7800      if ((B & 12) == 8) {
7801         return dis_neon_vtb(theInstr, condT);
7802      }
7803      if ((B == 12) && ((C & 9) == 0)) {
7804         return dis_neon_vdup(theInstr, condT);
7805      }
7806   }
7807   return False;
7808}
7809
7810
7811/*------------------------------------------------------------*/
7812/*--- NEON loads and stores                                ---*/
7813/*------------------------------------------------------------*/
7814
7815/* For NEON memory operations, we use the standard scheme to handle
7816   conditionalisation: generate a jump around the instruction if the
7817   condition is false.  That's only necessary in Thumb mode, however,
7818   since in ARM mode NEON instructions are unconditional. */
7819
7820/* A helper function for what follows.  It assumes we already went
7821   uncond as per comments at the top of this section. */
7822static
7823void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
7824                                    UInt N, UInt size, IRTemp addr )
7825{
7826   UInt i;
7827   switch (size) {
7828      case 0:
7829         putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
7830                    loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
7831         break;
7832      case 1:
7833         putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
7834                    loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
7835         break;
7836      case 2:
7837         putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
7838                    loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
7839         break;
7840      default:
7841         vassert(0);
7842   }
7843   for (i = 1; i <= N; i++) {
7844      switch (size) {
7845         case 0:
7846            putDRegI64(rD + i * inc,
7847                       triop(Iop_SetElem8x8,
7848                             getDRegI64(rD + i * inc),
7849                             mkU8(index),
7850                             loadLE(Ity_I8, binop(Iop_Add32,
7851                                                  mkexpr(addr),
7852                                                  mkU32(i * 1)))),
7853                       IRTemp_INVALID);
7854            break;
7855         case 1:
7856            putDRegI64(rD + i * inc,
7857                       triop(Iop_SetElem16x4,
7858                             getDRegI64(rD + i * inc),
7859                             mkU8(index),
7860                             loadLE(Ity_I16, binop(Iop_Add32,
7861                                                   mkexpr(addr),
7862                                                   mkU32(i * 2)))),
7863                       IRTemp_INVALID);
7864            break;
7865         case 2:
7866            putDRegI64(rD + i * inc,
7867                       triop(Iop_SetElem32x2,
7868                             getDRegI64(rD + i * inc),
7869                             mkU8(index),
7870                             loadLE(Ity_I32, binop(Iop_Add32,
7871                                                   mkexpr(addr),
7872                                                   mkU32(i * 4)))),
7873                       IRTemp_INVALID);
7874            break;
7875         default:
7876            vassert(0);
7877      }
7878   }
7879}
7880
7881/* A(nother) helper function for what follows.  It assumes we already
7882   went uncond as per comments at the top of this section. */
7883static
7884void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
7885                                       UInt N, UInt size, IRTemp addr )
7886{
7887   UInt i;
7888   switch (size) {
7889      case 0:
7890         storeLE(mkexpr(addr),
7891                 binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
7892         break;
7893      case 1:
7894         storeLE(mkexpr(addr),
7895                 binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
7896         break;
7897      case 2:
7898         storeLE(mkexpr(addr),
7899                 binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
7900         break;
7901      default:
7902         vassert(0);
7903   }
7904   for (i = 1; i <= N; i++) {
7905      switch (size) {
7906         case 0:
7907            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
7908                    binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
7909                                          mkU8(index)));
7910            break;
7911         case 1:
7912            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
7913                    binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
7914                                           mkU8(index)));
7915            break;
7916         case 2:
7917            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
7918                    binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
7919                                           mkU8(index)));
7920            break;
7921         default:
7922            vassert(0);
7923      }
7924   }
7925}
7926
7927/* A7.7 Advanced SIMD element or structure load/store instructions */
7928static
7929Bool dis_neon_load_or_store ( UInt theInstr,
7930                              Bool isT, IRTemp condT )
7931{
7932#  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
7933   UInt bA = INSN(23,23);
7934   UInt fB = INSN(11,8);
7935   UInt bL = INSN(21,21);
7936   UInt rD = (INSN(22,22) << 4) | INSN(15,12);
7937   UInt rN = INSN(19,16);
7938   UInt rM = INSN(3,0);
7939   UInt N, size, i, j;
7940   UInt inc;
7941   UInt regs = 1;
7942
7943   if (isT) {
7944      vassert(condT != IRTemp_INVALID);
7945   } else {
7946      vassert(condT == IRTemp_INVALID);
7947   }
7948   /* So now, if condT is not IRTemp_INVALID, we know we're
7949      dealing with Thumb code. */
7950
7951   if (INSN(20,20) != 0)
7952      return False;
7953
7954   IRTemp initialRn = newTemp(Ity_I32);
7955   assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
7956
7957   IRTemp initialRm = newTemp(Ity_I32);
7958   assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
7959
7960   /* There are 3 cases:
7961      (1) VSTn / VLDn (n-element structure from/to one lane)
7962      (2) VLDn (single element to all lanes)
7963      (3) VSTn / VLDn (multiple n-element structures)
7964   */
7965   if (bA) {
7966      N = fB & 3;
7967      if ((fB >> 2) < 3) {
7968         /* ------------ Case (1) ------------
7969            VSTn / VLDn (n-element structure from/to one lane) */
7970
7971         size = fB >> 2;
7972
7973         switch (size) {
7974            case 0: i = INSN(7,5); inc = 1; break;
7975            case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
7976            case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
7977            case 3: return False;
7978            default: vassert(0);
7979         }
7980
7981         IRTemp addr = newTemp(Ity_I32);
7982         assign(addr, mkexpr(initialRn));
7983
7984         // go uncond
7985         if (condT != IRTemp_INVALID)
7986            mk_skip_over_T32_if_cond_is_false(condT);
7987         // now uncond
7988
7989         if (bL)
7990            mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
7991         else
7992            mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
7993         DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << size);
7994         for (j = 0; j <= N; j++) {
7995            if (j)
7996               DIP(", ");
7997            DIP("d%u[%u]", rD + j * inc, i);
7998         }
7999         DIP("}, [r%u]", rN);
8000         if (rM != 13 && rM != 15) {
8001            DIP(", r%u\n", rM);
8002         } else {
8003            DIP("%s\n", (rM != 15) ? "!" : "");
8004         }
8005      } else {
8006         /* ------------ Case (2) ------------
8007            VLDn (single element to all lanes) */
8008         UInt r;
8009         if (bL == 0)
8010            return False;
8011
8012         inc = INSN(5,5) + 1;
8013         size = INSN(7,6);
8014
8015         /* size == 3 and size == 2 cases differ in alignment constraints */
8016         if (size == 3 && N == 3 && INSN(4,4) == 1)
8017            size = 2;
8018
8019         if (size == 0 && N == 0 && INSN(4,4) == 1)
8020            return False;
8021         if (N == 2 && INSN(4,4) == 1)
8022            return False;
8023         if (size == 3)
8024            return False;
8025
8026         // go uncond
8027         if (condT != IRTemp_INVALID)
8028            mk_skip_over_T32_if_cond_is_false(condT);
8029         // now uncond
8030
8031         IRTemp addr = newTemp(Ity_I32);
8032         assign(addr, mkexpr(initialRn));
8033
8034         if (N == 0 && INSN(5,5))
8035            regs = 2;
8036
8037         for (r = 0; r < regs; r++) {
8038            switch (size) {
8039               case 0:
8040                  putDRegI64(rD + r, unop(Iop_Dup8x8,
8041                                          loadLE(Ity_I8, mkexpr(addr))),
8042                             IRTemp_INVALID);
8043                  break;
8044               case 1:
8045                  putDRegI64(rD + r, unop(Iop_Dup16x4,
8046                                          loadLE(Ity_I16, mkexpr(addr))),
8047                             IRTemp_INVALID);
8048                  break;
8049               case 2:
8050                  putDRegI64(rD + r, unop(Iop_Dup32x2,
8051                                          loadLE(Ity_I32, mkexpr(addr))),
8052                             IRTemp_INVALID);
8053                  break;
8054               default:
8055                  vassert(0);
8056            }
8057            for (i = 1; i <= N; i++) {
8058               switch (size) {
8059                  case 0:
8060                     putDRegI64(rD + r + i * inc,
8061                                unop(Iop_Dup8x8,
8062                                     loadLE(Ity_I8, binop(Iop_Add32,
8063                                                          mkexpr(addr),
8064                                                          mkU32(i * 1)))),
8065                                IRTemp_INVALID);
8066                     break;
8067                  case 1:
8068                     putDRegI64(rD + r + i * inc,
8069                                unop(Iop_Dup16x4,
8070                                     loadLE(Ity_I16, binop(Iop_Add32,
8071                                                           mkexpr(addr),
8072                                                           mkU32(i * 2)))),
8073                                IRTemp_INVALID);
8074                     break;
8075                  case 2:
8076                     putDRegI64(rD + r + i * inc,
8077                                unop(Iop_Dup32x2,
8078                                     loadLE(Ity_I32, binop(Iop_Add32,
8079                                                           mkexpr(addr),
8080                                                           mkU32(i * 4)))),
8081                                IRTemp_INVALID);
8082                     break;
8083                  default:
8084                     vassert(0);
8085               }
8086            }
8087         }
8088         DIP("vld%u.%u {", N + 1, 8 << size);
8089         for (r = 0; r < regs; r++) {
8090            for (i = 0; i <= N; i++) {
8091               if (i || r)
8092                  DIP(", ");
8093               DIP("d%u[]", rD + r + i * inc);
8094            }
8095         }
8096         DIP("}, [r%u]", rN);
8097         if (rM != 13 && rM != 15) {
8098            DIP(", r%u\n", rM);
8099         } else {
8100            DIP("%s\n", (rM != 15) ? "!" : "");
8101         }
8102      }
8103      /* Writeback.  We're uncond here, so no condT-ing. */
8104      if (rM != 15) {
8105         if (rM == 13) {
8106            IRExpr* e = binop(Iop_Add32,
8107                              mkexpr(initialRn),
8108                              mkU32((1 << size) * (N + 1)));
8109            if (isT)
8110               putIRegT(rN, e, IRTemp_INVALID);
8111            else
8112               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8113         } else {
8114            IRExpr* e = binop(Iop_Add32,
8115                              mkexpr(initialRn),
8116                              mkexpr(initialRm));
8117            if (isT)
8118               putIRegT(rN, e, IRTemp_INVALID);
8119            else
8120               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8121         }
8122      }
8123      return True;
8124   } else {
8125      /* ------------ Case (3) ------------
8126         VSTn / VLDn (multiple n-element structures) */
8127      IRTemp tmp;
8128      UInt r, elems;
8129      if (fB == BITS4(0,0,1,0) || fB == BITS4(0,1,1,0)
8130          || fB == BITS4(0,1,1,1) || fB == BITS4(1,0,1,0)) {
8131         N = 0;
8132      } else if (fB == BITS4(0,0,1,1) || fB == BITS4(1,0,0,0)
8133                 || fB == BITS4(1,0,0,1)) {
8134         N = 1;
8135      } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
8136         N = 2;
8137      } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
8138         N = 3;
8139      } else {
8140         return False;
8141      }
8142      inc = (fB & 1) + 1;
8143      if (N == 1 && fB == BITS4(0,0,1,1)) {
8144         regs = 2;
8145      } else if (N == 0) {
8146         if (fB == BITS4(1,0,1,0)) {
8147            regs = 2;
8148         } else if (fB == BITS4(0,1,1,0)) {
8149            regs = 3;
8150         } else if (fB == BITS4(0,0,1,0)) {
8151            regs = 4;
8152         }
8153      }
8154
8155      size = INSN(7,6);
8156      if (N == 0 && size == 3)
8157         size = 2;
8158      if (size == 3)
8159         return False;
8160
8161      elems = 8 / (1 << size);
8162
8163      // go uncond
8164      if (condT != IRTemp_INVALID)
8165         mk_skip_over_T32_if_cond_is_false(condT);
8166      // now uncond
8167
8168      IRTemp addr = newTemp(Ity_I32);
8169      assign(addr, mkexpr(initialRn));
8170
8171      for (r = 0; r < regs; r++) {
8172         for (i = 0; i < elems; i++) {
8173            if (bL)
8174               mk_neon_elem_load_to_one_lane(rD + r, inc, i, N, size, addr);
8175            else
8176               mk_neon_elem_store_from_one_lane(rD + r, inc, i, N, size, addr);
8177            tmp = newTemp(Ity_I32);
8178            assign(tmp, binop(Iop_Add32, mkexpr(addr),
8179                                         mkU32((1 << size) * (N + 1))));
8180            addr = tmp;
8181         }
8182      }
8183      /* Writeback */
8184      if (rM != 15) {
8185         if (rM == 13) {
8186            IRExpr* e = binop(Iop_Add32,
8187                              mkexpr(initialRn),
8188                              mkU32(8 * (N + 1) * regs));
8189            if (isT)
8190               putIRegT(rN, e, IRTemp_INVALID);
8191            else
8192               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8193         } else {
8194            IRExpr* e = binop(Iop_Add32,
8195                              mkexpr(initialRn),
8196                              mkexpr(initialRm));
8197            if (isT)
8198               putIRegT(rN, e, IRTemp_INVALID);
8199            else
8200               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8201         }
8202      }
8203      DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
8204      if ((inc == 1 && regs * (N + 1) > 1)
8205          || (inc == 2 && regs > 1 && N > 0)) {
8206         DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
8207      } else {
8208         for (r = 0; r < regs; r++) {
8209            for (i = 0; i <= N; i++) {
8210               if (i || r)
8211                  DIP(", ");
8212               DIP("d%u", rD + r + i * inc);
8213            }
8214         }
8215      }
8216      DIP("}, [r%u]", rN);
8217      if (rM != 13 && rM != 15) {
8218         DIP(", r%u\n", rM);
8219      } else {
8220         DIP("%s\n", (rM != 15) ? "!" : "");
8221      }
8222      return True;
8223   }
8224#  undef INSN
8225}
8226
8227
8228/*------------------------------------------------------------*/
8229/*--- NEON, top level control                              ---*/
8230/*------------------------------------------------------------*/
8231
8232/* Both ARM and Thumb */
8233
8234/* Translate a NEON instruction.    If successful, returns
8235   True and *dres may or may not be updated.  If failure, returns
8236   False and doesn't change *dres nor create any IR.
8237
8238   The Thumb and ARM encodings are similar for the 24 bottom bits, but
8239   the top 8 bits are slightly different.  In both cases, the caller
8240   must pass the entire 32 bits.  Callers may pass any instruction;
8241   this ignores non-NEON ones.
8242
8243   Caller must supply an IRTemp 'condT' holding the gating condition,
8244   or IRTemp_INVALID indicating the insn is always executed.  In ARM
8245   code, this must always be IRTemp_INVALID because NEON insns are
8246   unconditional for ARM.
8247
8248   Finally, the caller must indicate whether this occurs in ARM or in
8249   Thumb code.
8250*/
8251static Bool decode_NEON_instruction (
8252               /*MOD*/DisResult* dres,
8253               UInt              insn32,
8254               IRTemp            condT,
8255               Bool              isT
8256            )
8257{
8258#  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
8259
8260   /* There are two kinds of instruction to deal with: load/store and
8261      data processing.  In each case, in ARM mode we merely identify
8262      the kind, and pass it on to the relevant sub-handler.  In Thumb
8263      mode we identify the kind, swizzle the bits around to make it
8264      have the same encoding as in ARM, and hand it on to the
8265      sub-handler.
8266   */
8267
8268   /* In ARM mode, NEON instructions can't be conditional. */
8269   if (!isT)
8270      vassert(condT == IRTemp_INVALID);
8271
8272   /* Data processing:
8273      Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
8274      ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
8275   */
8276   if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
8277      // ARM, DP
8278      return dis_neon_data_processing(INSN(31,0), condT);
8279   }
8280   if (isT && INSN(31,29) == BITS3(1,1,1)
8281       && INSN(27,24) == BITS4(1,1,1,1)) {
8282      // Thumb, DP
8283      UInt reformatted = INSN(23,0);
8284      reformatted |= (INSN(28,28) << 24); // U bit
8285      reformatted |= (BITS7(1,1,1,1,0,0,1) << 25);
8286      return dis_neon_data_processing(reformatted, condT);
8287   }
8288
8289   /* Load/store:
8290      Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
8291      ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
8292   */
8293   if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
8294      // ARM, memory
8295      return dis_neon_load_or_store(INSN(31,0), isT, condT);
8296   }
8297   if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
8298      UInt reformatted = INSN(23,0);
8299      reformatted |= (BITS8(1,1,1,1,0,1,0,0) << 24);
8300      return dis_neon_load_or_store(reformatted, isT, condT);
8301   }
8302
8303   /* Doesn't match. */
8304   return False;
8305
8306#  undef INSN
8307}
8308
8309
8310/*------------------------------------------------------------*/
8311/*--- V6 MEDIA instructions                                ---*/
8312/*------------------------------------------------------------*/
8313
8314/* Both ARM and Thumb */
8315
8316/* Translate a V6 media instruction.    If successful, returns
8317   True and *dres may or may not be updated.  If failure, returns
8318   False and doesn't change *dres nor create any IR.
8319
8320   The Thumb and ARM encodings are completely different.  In Thumb
8321   mode, the caller must pass the entire 32 bits.  In ARM mode it must
8322   pass the lower 28 bits.  Apart from that, callers may pass any
8323   instruction; this function ignores anything it doesn't recognise.
8324
8325   Caller must supply an IRTemp 'condT' holding the gating condition,
8326   or IRTemp_INVALID indicating the insn is always executed.
8327
8328   Caller must also supply an ARMCondcode 'cond'.  This is only used
8329   for debug printing, no other purpose.  For ARM, this is simply the
8330   top 4 bits of the original instruction.  For Thumb, the condition
8331   is not (really) known until run time, and so ARMCondAL should be
8332   passed, only so that printing of these instructions does not show
8333   any condition.
8334
8335   Finally, the caller must indicate whether this occurs in ARM or in
8336   Thumb code.
8337*/
8338static Bool decode_V6MEDIA_instruction (
8339               /*MOD*/DisResult* dres,
8340               UInt              insnv6m,
8341               IRTemp            condT,
8342               ARMCondcode       conq,
8343               Bool              isT
8344            )
8345{
8346#  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
8347#  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
8348                                           (_bMax), (_bMin) )
8349#  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
8350                                           (_bMax), (_bMin) )
8351   HChar dis_buf[128];
8352   dis_buf[0] = 0;
8353
8354   if (isT) {
8355      vassert(conq == ARMCondAL);
8356   } else {
8357      vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
8358      vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
8359   }
8360
8361   /* ----------- smulbb, smulbt, smultb, smultt ----------- */
8362   {
8363     UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
8364     Bool gate = False;
8365
8366     if (isT) {
8367        if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
8368            && INSNT1(7,6) == BITS2(0,0)) {
8369           regD = INSNT1(11,8);
8370           regM = INSNT1(3,0);
8371           regN = INSNT0(3,0);
8372           bitM = INSNT1(4,4);
8373           bitN = INSNT1(5,5);
8374           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8375              gate = True;
8376        }
8377     } else {
8378        if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
8379            BITS4(0,0,0,0)         == INSNA(15,12) &&
8380            BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
8381           regD = INSNA(19,16);
8382           regM = INSNA(11,8);
8383           regN = INSNA(3,0);
8384           bitM = INSNA(6,6);
8385           bitN = INSNA(5,5);
8386           if (regD != 15 && regN != 15 && regM != 15)
8387              gate = True;
8388        }
8389     }
8390
8391     if (gate) {
8392        IRTemp srcN = newTemp(Ity_I32);
8393        IRTemp srcM = newTemp(Ity_I32);
8394        IRTemp res  = newTemp(Ity_I32);
8395
8396        assign( srcN, binop(Iop_Sar32,
8397                            binop(Iop_Shl32,
8398                                  isT ? getIRegT(regN) : getIRegA(regN),
8399                                  mkU8(bitN ? 0 : 16)), mkU8(16)) );
8400        assign( srcM, binop(Iop_Sar32,
8401                            binop(Iop_Shl32,
8402                                  isT ? getIRegT(regM) : getIRegA(regM),
8403                                  mkU8(bitM ? 0 : 16)), mkU8(16)) );
8404        assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
8405
8406        if (isT)
8407           putIRegT( regD, mkexpr(res), condT );
8408        else
8409           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
8410
8411        DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
8412             nCC(conq), regD, regN, regM );
8413        return True;
8414     }
8415     /* fall through */
8416   }
8417
8418   /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
8419   /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
8420   {
8421     UInt regD = 99, regN = 99, regM = 99, bitM = 0;
8422     Bool gate = False;
8423
8424     if (isT) {
8425        if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
8426            && INSNT1(7,5) == BITS3(0,0,0)) {
8427          regN = INSNT0(3,0);
8428          regD = INSNT1(11,8);
8429          regM = INSNT1(3,0);
8430          bitM = INSNT1(4,4);
8431          if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8432             gate = True;
8433        }
8434     } else {
8435        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
8436            INSNA(15,12) == BITS4(0,0,0,0)         &&
8437            (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
8438           regD = INSNA(19,16);
8439           regN = INSNA(3,0);
8440           regM = INSNA(11,8);
8441           bitM = INSNA(6,6);
8442           if (regD != 15 && regN != 15 && regM != 15)
8443              gate = True;
8444        }
8445     }
8446
8447     if (gate) {
8448        IRTemp irt_prod = newTemp(Ity_I64);
8449
8450        assign( irt_prod,
8451                binop(Iop_MullS32,
8452                      isT ? getIRegT(regN) : getIRegA(regN),
8453                      binop(Iop_Sar32,
8454                            binop(Iop_Shl32,
8455                                  isT ? getIRegT(regM) : getIRegA(regM),
8456                                  mkU8(bitM ? 0 : 16)),
8457                            mkU8(16))) );
8458
8459        IRExpr* ire_result = binop(Iop_Or32,
8460                                   binop( Iop_Shl32,
8461                                          unop(Iop_64HIto32, mkexpr(irt_prod)),
8462                                          mkU8(16) ),
8463                                   binop( Iop_Shr32,
8464                                          unop(Iop_64to32, mkexpr(irt_prod)),
8465                                          mkU8(16) ) );
8466
8467        if (isT)
8468           putIRegT( regD, ire_result, condT );
8469        else
8470           putIRegA( regD, ire_result, condT, Ijk_Boring );
8471
8472        DIP("smulw%c%s r%u, r%u, r%u\n",
8473            bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
8474        return True;
8475     }
8476     /* fall through */
8477   }
8478
8479   /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
8480   /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
8481   {
8482     UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
8483     Bool tbform = False;
8484     Bool gate = False;
8485
8486     if (isT) {
8487        if (INSNT0(15,4) == 0xEAC
8488            && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
8489           regN = INSNT0(3,0);
8490           regD = INSNT1(11,8);
8491           regM = INSNT1(3,0);
8492           imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
8493           shift_type = (INSNT1(5,5) << 1) | 0;
8494           tbform = (INSNT1(5,5) == 0) ? False : True;
8495           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8496              gate = True;
8497        }
8498     } else {
8499        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
8500            INSNA(5,4)   == BITS2(0,1)             &&
8501            (INSNA(6,6)  == 0 || INSNA(6,6) == 1) ) {
8502           regD = INSNA(15,12);
8503           regN = INSNA(19,16);
8504           regM = INSNA(3,0);
8505           imm5 = INSNA(11,7);
8506           shift_type = (INSNA(6,6) << 1) | 0;
8507           tbform = (INSNA(6,6) == 0) ? False : True;
8508           if (regD != 15 && regN != 15 && regM != 15)
8509              gate = True;
8510        }
8511     }
8512
8513     if (gate) {
8514        IRTemp irt_regM       = newTemp(Ity_I32);
8515        IRTemp irt_regM_shift = newTemp(Ity_I32);
8516        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
8517        compute_result_and_C_after_shift_by_imm5(
8518           dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
8519
8520        UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
8521        IRExpr* ire_result
8522          = binop( Iop_Or32,
8523                   binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
8524                   binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
8525                                    unop(Iop_Not32, mkU32(mask))) );
8526
8527        if (isT)
8528           putIRegT( regD, ire_result, condT );
8529        else
8530           putIRegA( regD, ire_result, condT, Ijk_Boring );
8531
8532        DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
8533             nCC(conq), regD, regN, regM, dis_buf );
8534
8535        return True;
8536     }
8537     /* fall through */
8538   }
8539
8540   /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
8541   {
8542     UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
8543     Bool gate = False;
8544
8545     if (isT) {
8546        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
8547            && INSNT0(4,4) == 0
8548            && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
8549           regD       = INSNT1(11,8);
8550           regN       = INSNT0(3,0);
8551           shift_type = (INSNT0(5,5) << 1) | 0;
8552           imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
8553           sat_imm    = INSNT1(4,0);
8554           if (!isBadRegT(regD) && !isBadRegT(regN))
8555              gate = True;
8556           if (shift_type == BITS2(1,0) && imm5 == 0)
8557              gate = False;
8558        }
8559     } else {
8560        if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
8561            INSNA(5,4)   == BITS2(0,1)) {
8562           regD       = INSNA(15,12);
8563           regN       = INSNA(3,0);
8564           shift_type = (INSNA(6,6) << 1) | 0;
8565           imm5       = INSNA(11,7);
8566           sat_imm    = INSNA(20,16);
8567           if (regD != 15 && regN != 15)
8568              gate = True;
8569        }
8570     }
8571
8572     if (gate) {
8573        IRTemp irt_regN       = newTemp(Ity_I32);
8574        IRTemp irt_regN_shift = newTemp(Ity_I32);
8575        IRTemp irt_sat_Q      = newTemp(Ity_I32);
8576        IRTemp irt_result     = newTemp(Ity_I32);
8577
8578        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
8579        compute_result_and_C_after_shift_by_imm5(
8580                dis_buf, &irt_regN_shift, NULL,
8581                irt_regN, shift_type, imm5, regN );
8582
8583        armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
8584        or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
8585
8586        if (isT)
8587           putIRegT( regD, mkexpr(irt_result), condT );
8588        else
8589           putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
8590
8591        DIP("usat%s r%u, #0x%04x, %s\n",
8592            nCC(conq), regD, imm5, dis_buf);
8593        return True;
8594     }
8595     /* fall through */
8596   }
8597
8598  /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
8599   {
8600     UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
8601     Bool gate = False;
8602
8603     if (isT) {
8604        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
8605            && INSNT0(4,4) == 0
8606            && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
8607           regD       = INSNT1(11,8);
8608           regN       = INSNT0(3,0);
8609           shift_type = (INSNT0(5,5) << 1) | 0;
8610           imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
8611           sat_imm    = INSNT1(4,0) + 1;
8612           if (!isBadRegT(regD) && !isBadRegT(regN))
8613              gate = True;
8614           if (shift_type == BITS2(1,0) && imm5 == 0)
8615              gate = False;
8616        }
8617     } else {
8618        if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
8619            INSNA(5,4)   == BITS2(0,1)) {
8620           regD       = INSNA(15,12);
8621           regN       = INSNA(3,0);
8622           shift_type = (INSNA(6,6) << 1) | 0;
8623           imm5       = INSNA(11,7);
8624           sat_imm    = INSNA(20,16) + 1;
8625           if (regD != 15 && regN != 15)
8626              gate = True;
8627        }
8628     }
8629
8630     if (gate) {
8631        IRTemp irt_regN       = newTemp(Ity_I32);
8632        IRTemp irt_regN_shift = newTemp(Ity_I32);
8633        IRTemp irt_sat_Q      = newTemp(Ity_I32);
8634        IRTemp irt_result     = newTemp(Ity_I32);
8635
8636        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
8637        compute_result_and_C_after_shift_by_imm5(
8638                dis_buf, &irt_regN_shift, NULL,
8639                irt_regN, shift_type, imm5, regN );
8640
8641        armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
8642        or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
8643
8644        if (isT)
8645           putIRegT( regD, mkexpr(irt_result), condT );
8646        else
8647           putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
8648
8649        DIP( "ssat%s r%u, #0x%04x, %s\n",
8650             nCC(conq), regD, imm5, dis_buf);
8651        return True;
8652    }
8653    /* fall through */
8654  }
8655
8656   /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
8657   {
8658     UInt regD = 99, regN = 99, sat_imm = 99;
8659     Bool gate = False;
8660
8661     if (isT) {
8662        if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
8663           regN = INSNT0(3,0);
8664           regD = INSNT1(11,8);
8665           sat_imm = INSNT1(3,0);
8666           if (!isBadRegT(regD) && !isBadRegT(regN))
8667              gate = True;
8668       }
8669     } else {
8670        if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
8671            INSNA(11,8)  == BITS4(1,1,1,1)         &&
8672            INSNA(7,4)   == BITS4(0,0,1,1)) {
8673           regD    = INSNA(15,12);
8674           regN    = INSNA(3,0);
8675           sat_imm = INSNA(19,16);
8676           if (regD != 15 && regN != 15)
8677              gate = True;
8678        }
8679     }
8680
8681     if (gate) {
8682        IRTemp irt_regN    = newTemp(Ity_I32);
8683        IRTemp irt_regN_lo = newTemp(Ity_I32);
8684        IRTemp irt_regN_hi = newTemp(Ity_I32);
8685        IRTemp irt_Q_lo    = newTemp(Ity_I32);
8686        IRTemp irt_Q_hi    = newTemp(Ity_I32);
8687        IRTemp irt_res_lo  = newTemp(Ity_I32);
8688        IRTemp irt_res_hi  = newTemp(Ity_I32);
8689
8690        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
8691        assign( irt_regN_lo, binop( Iop_Sar32,
8692                                    binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
8693                                    mkU8(16)) );
8694        assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
8695
8696        armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
8697        or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
8698
8699        armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
8700        or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
8701
8702        IRExpr* ire_result = binop( Iop_Or32,
8703                                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
8704                                    mkexpr(irt_res_lo) );
8705
8706        if (isT)
8707           putIRegT( regD, ire_result, condT );
8708        else
8709           putIRegA( regD, ire_result, condT, Ijk_Boring );
8710
8711        DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
8712        return True;
8713     }
8714     /* fall through */
8715   }
8716
8717   /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
8718   {
8719     UInt regD = 99, regN = 99, regM = 99;
8720     Bool gate = False;
8721
8722     if (isT) {
8723        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
8724           regN = INSNT0(3,0);
8725           regD = INSNT1(11,8);
8726           regM = INSNT1(3,0);
8727           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8728              gate = True;
8729        }
8730     } else {
8731        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
8732            INSNA(11,8)  == BITS4(1,1,1,1)         &&
8733            INSNA(7,4)   == BITS4(0,0,0,1)) {
8734           regD = INSNA(15,12);
8735           regN = INSNA(19,16);
8736           regM = INSNA(3,0);
8737           if (regD != 15 && regN != 15 && regM != 15)
8738              gate = True;
8739        }
8740     }
8741
8742     if (gate) {
8743        IRTemp rNt  = newTemp(Ity_I32);
8744        IRTemp rMt  = newTemp(Ity_I32);
8745        IRTemp res  = newTemp(Ity_I32);
8746        IRTemp reso = newTemp(Ity_I32);
8747
8748        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
8749        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
8750
8751        assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
8752        if (isT)
8753           putIRegT( regD, mkexpr(res), condT );
8754        else
8755           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
8756
8757        assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
8758        set_GE_32_10_from_bits_31_15(reso, condT);
8759
8760        DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
8761        return True;
8762     }
8763     /* fall through */
8764   }
8765
8766   /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
8767   {
8768     UInt regD = 99, regN = 99, regM = 99;
8769     Bool gate = False;
8770
8771     if (isT) {
8772        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
8773           regN = INSNT0(3,0);
8774           regD = INSNT1(11,8);
8775           regM = INSNT1(3,0);
8776           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8777              gate = True;
8778        }
8779     } else {
8780        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
8781            INSNA(11,8)  == BITS4(1,1,1,1)         &&
8782            INSNA(7,4)   == BITS4(0,0,0,1)) {
8783           regD = INSNA(15,12);
8784           regN = INSNA(19,16);
8785           regM = INSNA(3,0);
8786           if (regD != 15 && regN != 15 && regM != 15)
8787              gate = True;
8788        }
8789     }
8790
8791     if (gate) {
8792        IRTemp rNt  = newTemp(Ity_I32);
8793        IRTemp rMt  = newTemp(Ity_I32);
8794        IRTemp res  = newTemp(Ity_I32);
8795        IRTemp reso = newTemp(Ity_I32);
8796
8797        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
8798        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
8799
8800        assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
8801        if (isT)
8802           putIRegT( regD, mkexpr(res), condT );
8803        else
8804           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
8805
8806        assign(reso, unop(Iop_Not32,
8807                          binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
8808        set_GE_32_10_from_bits_31_15(reso, condT);
8809
8810        DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
8811        return True;
8812     }
8813     /* fall through */
8814   }
8815
8816   /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
8817   {
8818     UInt regD = 99, regN = 99, regM = 99;
8819     Bool gate = False;
8820
8821     if (isT) {
8822        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
8823           regN = INSNT0(3,0);
8824           regD = INSNT1(11,8);
8825           regM = INSNT1(3,0);
8826           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8827              gate = True;
8828        }
8829     } else {
8830        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
8831            INSNA(11,8)  == BITS4(1,1,1,1)         &&
8832            INSNA(7,4)   == BITS4(0,1,1,1)) {
8833           regD = INSNA(15,12);
8834           regN = INSNA(19,16);
8835           regM = INSNA(3,0);
8836           if (regD != 15 && regN != 15 && regM != 15)
8837             gate = True;
8838        }
8839     }
8840
8841     if (gate) {
8842        IRTemp rNt  = newTemp(Ity_I32);
8843        IRTemp rMt  = newTemp(Ity_I32);
8844        IRTemp res  = newTemp(Ity_I32);
8845        IRTemp reso = newTemp(Ity_I32);
8846
8847        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
8848        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
8849
8850        assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
8851        if (isT)
8852           putIRegT( regD, mkexpr(res), condT );
8853        else
8854           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
8855
8856        assign(reso, unop(Iop_Not32,
8857                          binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
8858        set_GE_32_10_from_bits_31_15(reso, condT);
8859
8860        DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
8861        return True;
8862     }
8863     /* fall through */
8864   }
8865
8866   /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
8867   {
8868     UInt regD = 99, regN = 99, regM = 99;
8869     Bool gate = False;
8870
8871     if (isT) {
8872        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
8873           regN = INSNT0(3,0);
8874           regD = INSNT1(11,8);
8875           regM = INSNT1(3,0);
8876           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8877              gate = True;
8878        }
8879     } else {
8880        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
8881            INSNA(11,8)  == BITS4(1,1,1,1)         &&
8882            INSNA(7,4)   == BITS4(0,1,1,1)) {
8883           regD = INSNA(15,12);
8884           regN = INSNA(19,16);
8885           regM = INSNA(3,0);
8886           if (regD != 15 && regN != 15 && regM != 15)
8887              gate = True;
8888        }
8889     }
8890
8891     if (gate) {
8892        IRTemp rNt  = newTemp(Ity_I32);
8893        IRTemp rMt  = newTemp(Ity_I32);
8894        IRTemp res  = newTemp(Ity_I32);
8895        IRTemp reso = newTemp(Ity_I32);
8896
8897        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
8898        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
8899
8900        assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
8901        if (isT)
8902           putIRegT( regD, mkexpr(res), condT );
8903        else
8904           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
8905
8906        assign(reso, unop(Iop_Not32,
8907                          binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
8908        set_GE_32_10_from_bits_31_15(reso, condT);
8909
8910        DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
8911        return True;
8912     }
8913     /* fall through */
8914   }
8915
8916   /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
8917   {
8918     UInt regD = 99, regN = 99, regM = 99;
8919     Bool gate = False;
8920
8921     if (isT) {
8922        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
8923           regN = INSNT0(3,0);
8924           regD = INSNT1(11,8);
8925           regM = INSNT1(3,0);
8926           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8927              gate = True;
8928        }
8929     } else {
8930        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
8931            INSNA(11,8)  == BITS4(1,1,1,1)         &&
8932            (INSNA(7,4)  == BITS4(1,0,0,1))) {
8933           regD = INSNA(15,12);
8934           regN = INSNA(19,16);
8935           regM = INSNA(3,0);
8936           if (regD != 15 && regN != 15 && regM != 15)
8937              gate = True;
8938        }
8939     }
8940
8941     if (gate) {
8942        IRTemp rNt  = newTemp(Ity_I32);
8943        IRTemp rMt  = newTemp(Ity_I32);
8944        IRTemp res  = newTemp(Ity_I32);
8945        IRTemp reso = newTemp(Ity_I32);
8946
8947        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
8948        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
8949
8950        assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
8951        if (isT)
8952           putIRegT( regD, mkexpr(res), condT );
8953        else
8954           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
8955
8956        assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
8957        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
8958
8959        DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
8960        return True;
8961     }
8962     /* fall through */
8963   }
8964
8965   /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
8966   {
8967     UInt regD = 99, regN = 99, regM = 99;
8968     Bool gate = False;
8969
8970     if (isT) {
8971        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
8972           regN = INSNT0(3,0);
8973           regD = INSNT1(11,8);
8974           regM = INSNT1(3,0);
8975           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8976              gate = True;
8977        }
8978     } else {
8979        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
8980            INSNA(11,8)  == BITS4(1,1,1,1)         &&
8981            (INSNA(7,4)  == BITS4(1,0,0,1))) {
8982           regD = INSNA(15,12);
8983           regN = INSNA(19,16);
8984           regM = INSNA(3,0);
8985           if (regD != 15 && regN != 15 && regM != 15)
8986              gate = True;
8987        }
8988     }
8989
8990     if (gate) {
8991        IRTemp rNt  = newTemp(Ity_I32);
8992        IRTemp rMt  = newTemp(Ity_I32);
8993        IRTemp res  = newTemp(Ity_I32);
8994        IRTemp reso = newTemp(Ity_I32);
8995
8996        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
8997        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
8998
8999        assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9000        if (isT)
9001           putIRegT( regD, mkexpr(res), condT );
9002        else
9003           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9004
9005        assign(reso, unop(Iop_Not32,
9006                          binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
9007        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9008
9009        DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9010        return True;
9011     }
9012     /* fall through */
9013   }
9014
9015   /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9016   {
9017     UInt regD = 99, regN = 99, regM = 99;
9018     Bool gate = False;
9019
9020     if (isT) {
9021        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9022           regN = INSNT0(3,0);
9023           regD = INSNT1(11,8);
9024           regM = INSNT1(3,0);
9025           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9026              gate = True;
9027        }
9028     } else {
9029        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9030            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9031            (INSNA(7,4)  == BITS4(1,1,1,1))) {
9032           regD = INSNA(15,12);
9033           regN = INSNA(19,16);
9034           regM = INSNA(3,0);
9035           if (regD != 15 && regN != 15 && regM != 15)
9036             gate = True;
9037        }
9038     }
9039
9040     if (gate) {
9041        IRTemp rNt  = newTemp(Ity_I32);
9042        IRTemp rMt  = newTemp(Ity_I32);
9043        IRTemp res  = newTemp(Ity_I32);
9044        IRTemp reso = newTemp(Ity_I32);
9045
9046        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9047        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9048
9049        assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9050        if (isT)
9051           putIRegT( regD, mkexpr(res), condT );
9052        else
9053           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9054
9055        assign(reso, unop(Iop_Not32,
9056                          binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
9057        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9058
9059        DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9060        return True;
9061     }
9062     /* fall through */
9063   }
9064
9065   /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9066   {
9067     UInt regD = 99, regN = 99, regM = 99;
9068     Bool gate = False;
9069
9070     if (isT) {
9071        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9072           regN = INSNT0(3,0);
9073           regD = INSNT1(11,8);
9074           regM = INSNT1(3,0);
9075           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9076              gate = True;
9077        }
9078     } else {
9079        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9080            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9081            INSNA(7,4)   == BITS4(1,1,1,1)) {
9082           regD = INSNA(15,12);
9083           regN = INSNA(19,16);
9084           regM = INSNA(3,0);
9085           if (regD != 15 && regN != 15 && regM != 15)
9086              gate = True;
9087        }
9088     }
9089
9090     if (gate) {
9091        IRTemp rNt  = newTemp(Ity_I32);
9092        IRTemp rMt  = newTemp(Ity_I32);
9093        IRTemp res  = newTemp(Ity_I32);
9094        IRTemp reso = newTemp(Ity_I32);
9095
9096        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9097        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9098
9099        assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9100        if (isT)
9101           putIRegT( regD, mkexpr(res), condT );
9102        else
9103           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9104
9105        assign(reso, unop(Iop_Not32,
9106                          binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
9107        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9108
9109        DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9110        return True;
9111     }
9112     /* fall through */
9113   }
9114
9115   /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9116   {
9117     UInt regD = 99, regN = 99, regM = 99;
9118     Bool gate = False;
9119
9120     if (isT) {
9121        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9122           regN = INSNT0(3,0);
9123           regD = INSNT1(11,8);
9124           regM = INSNT1(3,0);
9125           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9126              gate = True;
9127        }
9128     } else {
9129        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9130            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9131            INSNA(7,4)   == BITS4(1,0,0,1)) {
9132           regD = INSNA(15,12);
9133           regN = INSNA(19,16);
9134           regM = INSNA(3,0);
9135           if (regD != 15 && regN != 15 && regM != 15)
9136              gate = True;
9137        }
9138     }
9139
9140     if (gate) {
9141        IRTemp rNt   = newTemp(Ity_I32);
9142        IRTemp rMt   = newTemp(Ity_I32);
9143        IRTemp res_q = newTemp(Ity_I32);
9144
9145        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9146        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9147
9148        assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
9149        if (isT)
9150           putIRegT( regD, mkexpr(res_q), condT );
9151        else
9152           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9153
9154        DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9155        return True;
9156     }
9157     /* fall through */
9158   }
9159
9160   /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
9161   {
9162     UInt regD = 99, regN = 99, regM = 99;
9163     Bool gate = False;
9164
9165     if (isT) {
9166        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9167           regN = INSNT0(3,0);
9168           regD = INSNT1(11,8);
9169           regM = INSNT1(3,0);
9170           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9171              gate = True;
9172        }
9173     } else {
9174        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9175            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9176            INSNA(7,4)   == BITS4(1,1,1,1)) {
9177           regD = INSNA(15,12);
9178           regN = INSNA(19,16);
9179           regM = INSNA(3,0);
9180           if (regD != 15 && regN != 15 && regM != 15)
9181              gate = True;
9182        }
9183     }
9184
9185     if (gate) {
9186        IRTemp rNt   = newTemp(Ity_I32);
9187        IRTemp rMt   = newTemp(Ity_I32);
9188        IRTemp res_q = newTemp(Ity_I32);
9189
9190        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9191        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9192
9193        assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
9194        if (isT)
9195           putIRegT( regD, mkexpr(res_q), condT );
9196        else
9197           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9198
9199        DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9200        return True;
9201     }
9202     /* fall through */
9203   }
9204
9205   /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9206   {
9207     UInt regD = 99, regN = 99, regM = 99;
9208     Bool gate = False;
9209
9210     if (isT) {
9211        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9212           regN = INSNT0(3,0);
9213           regD = INSNT1(11,8);
9214           regM = INSNT1(3,0);
9215           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9216              gate = True;
9217        }
9218     } else {
9219        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9220            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9221            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9222           regD = INSNA(15,12);
9223           regN = INSNA(19,16);
9224           regM = INSNA(3,0);
9225           if (regD != 15 && regN != 15 && regM != 15)
9226              gate = True;
9227        }
9228     }
9229
9230     if (gate) {
9231        IRTemp rNt   = newTemp(Ity_I32);
9232        IRTemp rMt   = newTemp(Ity_I32);
9233        IRTemp res_q = newTemp(Ity_I32);
9234
9235        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9236        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9237
9238        assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9239        if (isT)
9240           putIRegT( regD, mkexpr(res_q), condT );
9241        else
9242           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9243
9244        DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9245        return True;
9246     }
9247     /* fall through */
9248   }
9249
9250   /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9251   {
9252     UInt regD = 99, regN = 99, regM = 99;
9253     Bool gate = False;
9254
9255     if (isT) {
9256        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9257           regN = INSNT0(3,0);
9258           regD = INSNT1(11,8);
9259           regM = INSNT1(3,0);
9260           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9261              gate = True;
9262        }
9263     } else {
9264        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9265            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9266            (INSNA(7,4)  == BITS4(1,1,1,1))) {
9267           regD = INSNA(15,12);
9268           regN = INSNA(19,16);
9269           regM = INSNA(3,0);
9270           if (regD != 15 && regN != 15 && regM != 15)
9271             gate = True;
9272        }
9273     }
9274
9275     if (gate) {
9276        IRTemp rNt   = newTemp(Ity_I32);
9277        IRTemp rMt   = newTemp(Ity_I32);
9278        IRTemp res_q = newTemp(Ity_I32);
9279
9280        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9281        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9282
9283        assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
9284        if (isT)
9285           putIRegT( regD, mkexpr(res_q), condT );
9286        else
9287           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9288
9289        DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9290        return True;
9291     }
9292     /* fall through */
9293   }
9294
9295   /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9296   {
9297     UInt regD = 99, regN = 99, regM = 99;
9298     Bool gate = False;
9299
9300     if (isT) {
9301        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
9302           regN = INSNT0(3,0);
9303           regD = INSNT1(11,8);
9304           regM = INSNT1(3,0);
9305           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9306              gate = True;
9307        }
9308     } else {
9309        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
9310            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9311            INSNA(7,4)   == BITS4(1,0,0,1)) {
9312           regD = INSNA(15,12);
9313           regN = INSNA(19,16);
9314           regM = INSNA(3,0);
9315           if (regD != 15 && regN != 15 && regM != 15)
9316              gate = True;
9317        }
9318     }
9319
9320     if (gate) {
9321        IRTemp rNt   = newTemp(Ity_I32);
9322        IRTemp rMt   = newTemp(Ity_I32);
9323        IRTemp res_q = newTemp(Ity_I32);
9324
9325        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9326        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9327
9328        assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9329        if (isT)
9330           putIRegT( regD, mkexpr(res_q), condT );
9331        else
9332           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9333
9334        DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9335        return True;
9336     }
9337     /* fall through */
9338   }
9339
9340   /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9341   {
9342     UInt regD = 99, regN = 99, regM = 99;
9343     Bool gate = False;
9344
9345     if (isT) {
9346        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
9347           regN = INSNT0(3,0);
9348           regD = INSNT1(11,8);
9349           regM = INSNT1(3,0);
9350           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9351              gate = True;
9352        }
9353     } else {
9354        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
9355            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9356            INSNA(7,4)   == BITS4(1,0,0,1)) {
9357           regD = INSNA(15,12);
9358           regN = INSNA(19,16);
9359           regM = INSNA(3,0);
9360           if (regD != 15 && regN != 15 && regM != 15)
9361              gate = True;
9362        }
9363     }
9364
9365     if (gate) {
9366        IRTemp rNt   = newTemp(Ity_I32);
9367        IRTemp rMt   = newTemp(Ity_I32);
9368        IRTemp res_q = newTemp(Ity_I32);
9369
9370        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9371        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9372
9373        assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
9374        if (isT)
9375           putIRegT( regD, mkexpr(res_q), condT );
9376        else
9377           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9378
9379        DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9380        return True;
9381     }
9382     /* fall through */
9383   }
9384
9385   /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
9386   {
9387     UInt regD = 99, regN = 99, regM = 99;
9388     Bool gate = False;
9389
9390     if (isT) {
9391        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9392           regN = INSNT0(3,0);
9393           regD = INSNT1(11,8);
9394           regM = INSNT1(3,0);
9395           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9396              gate = True;
9397        }
9398     } else {
9399        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9400            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9401            INSNA(7,4)   == BITS4(0,0,0,1)) {
9402           regD = INSNA(15,12);
9403           regN = INSNA(19,16);
9404           regM = INSNA(3,0);
9405           if (regD != 15 && regN != 15 && regM != 15)
9406              gate = True;
9407        }
9408     }
9409
9410     if (gate) {
9411        IRTemp rNt   = newTemp(Ity_I32);
9412        IRTemp rMt   = newTemp(Ity_I32);
9413        IRTemp res_q = newTemp(Ity_I32);
9414
9415        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9416        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9417
9418        assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
9419        if (isT)
9420           putIRegT( regD, mkexpr(res_q), condT );
9421        else
9422           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9423
9424        DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9425        return True;
9426     }
9427     /* fall through */
9428   }
9429
9430   /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
9431   {
9432     UInt regD = 99, regN = 99, regM = 99;
9433     Bool gate = False;
9434
9435      if (isT) {
9436        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9437           regN = INSNT0(3,0);
9438           regD = INSNT1(11,8);
9439           regM = INSNT1(3,0);
9440           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9441              gate = True;
9442        }
9443     } else {
9444        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9445            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9446            INSNA(7,4)   == BITS4(0,1,1,1)) {
9447           regD = INSNA(15,12);
9448           regN = INSNA(19,16);
9449           regM = INSNA(3,0);
9450           if (regD != 15 && regN != 15 && regM != 15)
9451             gate = True;
9452        }
9453     }
9454
9455     if (gate) {
9456        IRTemp rNt   = newTemp(Ity_I32);
9457        IRTemp rMt   = newTemp(Ity_I32);
9458        IRTemp res_q = newTemp(Ity_I32);
9459
9460        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9461        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9462
9463        assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
9464        if (isT)
9465           putIRegT( regD, mkexpr(res_q), condT );
9466        else
9467           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9468
9469        DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9470        return True;
9471     }
9472     /* fall through */
9473   }
9474
9475   /////////////////////////////////////////////////////////////////
9476   /////////////////////////////////////////////////////////////////
9477   /////////////////////////////////////////////////////////////////
9478   /////////////////////////////////////////////////////////////////
9479   /////////////////////////////////////////////////////////////////
9480
9481   /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
9482   /* note: the hardware seems to construct the result differently
9483      from wot the manual says. */
9484   {
9485     UInt regD = 99, regN = 99, regM = 99;
9486     Bool gate = False;
9487
9488     if (isT) {
9489        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9490           regN = INSNT0(3,0);
9491           regD = INSNT1(11,8);
9492           regM = INSNT1(3,0);
9493           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9494              gate = True;
9495        }
9496     } else {
9497        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9498            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9499            INSNA(7,4)   == BITS4(0,1,0,1)) {
9500           regD = INSNA(15,12);
9501           regN = INSNA(19,16);
9502           regM = INSNA(3,0);
9503           if (regD != 15 && regN != 15 && regM != 15)
9504              gate = True;
9505        }
9506     }
9507
9508     if (gate) {
9509        IRTemp irt_regN     = newTemp(Ity_I32);
9510        IRTemp irt_regM     = newTemp(Ity_I32);
9511        IRTemp irt_sum      = newTemp(Ity_I32);
9512        IRTemp irt_diff     = newTemp(Ity_I32);
9513        IRTemp irt_sum_res  = newTemp(Ity_I32);
9514        IRTemp irt_diff_res = newTemp(Ity_I32);
9515
9516        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9517        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
9518
9519        assign( irt_diff,
9520                binop( Iop_Sub32,
9521                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
9522                       binop( Iop_Sar32,
9523                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
9524                              mkU8(16) ) ) );
9525        armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
9526
9527        assign( irt_sum,
9528                binop( Iop_Add32,
9529                       binop( Iop_Sar32,
9530                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
9531                              mkU8(16) ),
9532                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
9533        armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
9534
9535        IRExpr* ire_result = binop( Iop_Or32,
9536                                    binop( Iop_Shl32, mkexpr(irt_diff_res),
9537                                           mkU8(16) ),
9538                                    binop( Iop_And32, mkexpr(irt_sum_res),
9539                                           mkU32(0xFFFF)) );
9540
9541        if (isT)
9542           putIRegT( regD, ire_result, condT );
9543        else
9544           putIRegA( regD, ire_result, condT, Ijk_Boring );
9545
9546        DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
9547        return True;
9548     }
9549     /* fall through */
9550   }
9551
9552   /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
9553   {
9554     UInt regD = 99, regN = 99, regM = 99;
9555     Bool gate = False;
9556
9557     if (isT) {
9558        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9559           regN = INSNT0(3,0);
9560           regD = INSNT1(11,8);
9561           regM = INSNT1(3,0);
9562           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9563              gate = True;
9564        }
9565     } else {
9566        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9567            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9568            INSNA(7,4)   == BITS4(0,0,1,1)) {
9569           regD = INSNA(15,12);
9570           regN = INSNA(19,16);
9571           regM = INSNA(3,0);
9572           if (regD != 15 && regN != 15 && regM != 15)
9573              gate = True;
9574        }
9575     }
9576
9577     if (gate) {
9578        IRTemp irt_regN     = newTemp(Ity_I32);
9579        IRTemp irt_regM     = newTemp(Ity_I32);
9580        IRTemp irt_sum      = newTemp(Ity_I32);
9581        IRTemp irt_diff     = newTemp(Ity_I32);
9582        IRTemp irt_res_sum  = newTemp(Ity_I32);
9583        IRTemp irt_res_diff = newTemp(Ity_I32);
9584
9585        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9586        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
9587
9588        assign( irt_diff,
9589                binop( Iop_Sub32,
9590                       binop( Iop_Sar32,
9591                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
9592                              mkU8(16) ),
9593                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
9594        armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
9595
9596        assign( irt_sum,
9597                binop( Iop_Add32,
9598                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
9599                       binop( Iop_Sar32,
9600                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
9601                              mkU8(16) ) ) );
9602        armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
9603
9604        IRExpr* ire_result
9605          = binop( Iop_Or32,
9606                   binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
9607                   binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
9608
9609        if (isT)
9610           putIRegT( regD, ire_result, condT );
9611        else
9612           putIRegA( regD, ire_result, condT, Ijk_Boring );
9613
9614        DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
9615        return True;
9616     }
9617     /* fall through */
9618   }
9619
9620   /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
9621   {
9622     UInt regD = 99, regN = 99, regM = 99;
9623     Bool gate = False;
9624
9625     if (isT) {
9626        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9627           regN = INSNT0(3,0);
9628           regD = INSNT1(11,8);
9629           regM = INSNT1(3,0);
9630           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9631              gate = True;
9632        }
9633     } else {
9634        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9635            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9636            INSNA(7,4)   == BITS4(0,0,1,1)) {
9637           regD = INSNA(15,12);
9638           regN = INSNA(19,16);
9639           regM = INSNA(3,0);
9640           if (regD != 15 && regN != 15 && regM != 15)
9641              gate = True;
9642        }
9643     }
9644
9645     if (gate) {
9646        IRTemp irt_regN = newTemp(Ity_I32);
9647        IRTemp irt_regM = newTemp(Ity_I32);
9648        IRTemp irt_sum  = newTemp(Ity_I32);
9649        IRTemp irt_diff = newTemp(Ity_I32);
9650
9651        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9652        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
9653
9654        assign( irt_diff,
9655                binop( Iop_Sub32,
9656                       binop( Iop_Sar32,
9657                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
9658                              mkU8(16) ),
9659                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
9660
9661        assign( irt_sum,
9662                binop( Iop_Add32,
9663                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
9664                       binop( Iop_Sar32,
9665                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
9666                              mkU8(16) ) ) );
9667
9668        IRExpr* ire_result
9669          = binop( Iop_Or32,
9670                   binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
9671                   binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
9672
9673        IRTemp ge10 = newTemp(Ity_I32);
9674        assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
9675        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
9676        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
9677
9678        IRTemp ge32 = newTemp(Ity_I32);
9679        assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
9680        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
9681        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
9682
9683        if (isT)
9684           putIRegT( regD, ire_result, condT );
9685        else
9686           putIRegA( regD, ire_result, condT, Ijk_Boring );
9687
9688        DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
9689        return True;
9690     }
9691     /* fall through */
9692   }
9693
9694   /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
9695   /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
9696   {
9697     UInt regD = 99, regN = 99, regM = 99, bitM = 99;
9698     Bool gate = False, isAD = False;
9699
9700     if (isT) {
9701        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
9702            && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
9703           regN = INSNT0(3,0);
9704           regD = INSNT1(11,8);
9705           regM = INSNT1(3,0);
9706           bitM = INSNT1(4,4);
9707           isAD = INSNT0(15,4) == 0xFB2;
9708           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9709              gate = True;
9710        }
9711     } else {
9712        if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
9713            INSNA(15,12) == BITS4(1,1,1,1)         &&
9714            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
9715           regD = INSNA(19,16);
9716           regN = INSNA(3,0);
9717           regM = INSNA(11,8);
9718           bitM = INSNA(5,5);
9719           isAD = INSNA(6,6) == 0;
9720           if (regD != 15 && regN != 15 && regM != 15)
9721              gate = True;
9722        }
9723     }
9724
9725     if (gate) {
9726        IRTemp irt_regN    = newTemp(Ity_I32);
9727        IRTemp irt_regM    = newTemp(Ity_I32);
9728        IRTemp irt_prod_lo = newTemp(Ity_I32);
9729        IRTemp irt_prod_hi = newTemp(Ity_I32);
9730        IRTemp tmpM        = newTemp(Ity_I32);
9731
9732        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9733
9734        assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
9735        assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
9736
9737        assign( irt_prod_lo,
9738                binop( Iop_Mul32,
9739                       binop( Iop_Sar32,
9740                              binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9741                              mkU8(16) ),
9742                       binop( Iop_Sar32,
9743                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
9744                              mkU8(16) ) ) );
9745        assign( irt_prod_hi, binop(Iop_Mul32,
9746                                   binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
9747                                   binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
9748        IRExpr* ire_result
9749           = binop( isAD ? Iop_Add32 : Iop_Sub32,
9750                    mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
9751
9752        if (isT)
9753           putIRegT( regD, ire_result, condT );
9754        else
9755           putIRegA( regD, ire_result, condT, Ijk_Boring );
9756
9757        if (isAD) {
9758           or_into_QFLAG32(
9759              signed_overflow_after_Add32( ire_result,
9760                                           irt_prod_lo, irt_prod_hi ),
9761              condT
9762           );
9763        }
9764
9765        DIP("smu%cd%s%s r%u, r%u, r%u\n",
9766            isAD ? 'a' : 's',
9767            bitM ? "x" : "", nCC(conq), regD, regN, regM);
9768        return True;
9769     }
9770     /* fall through */
9771   }
9772
9773   /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
9774   /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
9775   {
9776     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
9777     Bool gate = False, isAD = False;
9778
9779     if (isT) {
9780       if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
9781           && INSNT1(7,5) == BITS3(0,0,0)) {
9782           regN = INSNT0(3,0);
9783           regD = INSNT1(11,8);
9784           regM = INSNT1(3,0);
9785           regA = INSNT1(15,12);
9786           bitM = INSNT1(4,4);
9787           isAD = INSNT0(15,4) == 0xFB2;
9788           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
9789               && !isBadRegT(regA))
9790              gate = True;
9791        }
9792     } else {
9793        if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
9794            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
9795           regD = INSNA(19,16);
9796           regA = INSNA(15,12);
9797           regN = INSNA(3,0);
9798           regM = INSNA(11,8);
9799           bitM = INSNA(5,5);
9800           isAD = INSNA(6,6) == 0;
9801           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
9802              gate = True;
9803        }
9804     }
9805
9806     if (gate) {
9807        IRTemp irt_regN    = newTemp(Ity_I32);
9808        IRTemp irt_regM    = newTemp(Ity_I32);
9809        IRTemp irt_regA    = newTemp(Ity_I32);
9810        IRTemp irt_prod_lo = newTemp(Ity_I32);
9811        IRTemp irt_prod_hi = newTemp(Ity_I32);
9812        IRTemp irt_sum     = newTemp(Ity_I32);
9813        IRTemp tmpM        = newTemp(Ity_I32);
9814
9815        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9816        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
9817
9818        assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
9819        assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
9820
9821        assign( irt_prod_lo,
9822                binop(Iop_Mul32,
9823                      binop(Iop_Sar32,
9824                            binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
9825                            mkU8(16)),
9826                      binop(Iop_Sar32,
9827                            binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
9828                            mkU8(16))) );
9829        assign( irt_prod_hi,
9830                binop( Iop_Mul32,
9831                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
9832                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
9833        assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
9834                                mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
9835
9836        IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
9837
9838        if (isT)
9839           putIRegT( regD, ire_result, condT );
9840        else
9841           putIRegA( regD, ire_result, condT, Ijk_Boring );
9842
9843        if (isAD) {
9844           or_into_QFLAG32(
9845              signed_overflow_after_Add32( mkexpr(irt_sum),
9846                                           irt_prod_lo, irt_prod_hi ),
9847              condT
9848           );
9849        }
9850
9851        or_into_QFLAG32(
9852           signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
9853           condT
9854        );
9855
9856        DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
9857            isAD ? 'a' : 's',
9858            bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
9859        return True;
9860     }
9861     /* fall through */
9862   }
9863
9864   /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
9865   {
9866     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
9867     Bool gate = False;
9868
9869     if (isT) {
9870        if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
9871           regN = INSNT0(3,0);
9872           regD = INSNT1(11,8);
9873           regM = INSNT1(3,0);
9874           regA = INSNT1(15,12);
9875           bitM = INSNT1(4,4);
9876           bitN = INSNT1(5,5);
9877           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
9878               && !isBadRegT(regA))
9879              gate = True;
9880        }
9881     } else {
9882        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
9883            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
9884           regD = INSNA(19,16);
9885           regN = INSNA(3,0);
9886           regM = INSNA(11,8);
9887           regA = INSNA(15,12);
9888           bitM = INSNA(6,6);
9889           bitN = INSNA(5,5);
9890           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
9891              gate = True;
9892        }
9893     }
9894
9895     if (gate) {
9896        IRTemp irt_regA = newTemp(Ity_I32);
9897        IRTemp irt_prod = newTemp(Ity_I32);
9898
9899        assign( irt_prod,
9900                binop(Iop_Mul32,
9901                      binop(Iop_Sar32,
9902                            binop(Iop_Shl32,
9903                                  isT ? getIRegT(regN) : getIRegA(regN),
9904                                  mkU8(bitN ? 0 : 16)),
9905                            mkU8(16)),
9906                      binop(Iop_Sar32,
9907                            binop(Iop_Shl32,
9908                                  isT ? getIRegT(regM) : getIRegA(regM),
9909                                  mkU8(bitM ? 0 : 16)),
9910                            mkU8(16))) );
9911
9912        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
9913
9914        IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
9915
9916        if (isT)
9917           putIRegT( regD, ire_result, condT );
9918        else
9919           putIRegA( regD, ire_result, condT, Ijk_Boring );
9920
9921        or_into_QFLAG32(
9922           signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
9923           condT
9924        );
9925
9926        DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
9927             bitN ? 't' : 'b', bitM ? 't' : 'b',
9928             nCC(conq), regD, regN, regM, regA );
9929        return True;
9930     }
9931     /* fall through */
9932   }
9933
9934   /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
9935   {
9936     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
9937     Bool gate = False;
9938
9939     if (isT) {
9940        if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
9941           regN = INSNT0(3,0);
9942           regD = INSNT1(11,8);
9943           regM = INSNT1(3,0);
9944           regA = INSNT1(15,12);
9945           bitM = INSNT1(4,4);
9946           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
9947               && !isBadRegT(regA))
9948              gate = True;
9949        }
9950     } else {
9951        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
9952            (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
9953           regD = INSNA(19,16);
9954           regN = INSNA(3,0);
9955           regM = INSNA(11,8);
9956           regA = INSNA(15,12);
9957           bitM = INSNA(6,6);
9958           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
9959              gate = True;
9960        }
9961     }
9962
9963     if (gate) {
9964        IRTemp irt_regA = newTemp(Ity_I32);
9965        IRTemp irt_prod = newTemp(Ity_I64);
9966
9967        assign( irt_prod,
9968                binop(Iop_MullS32,
9969                      isT ? getIRegT(regN) : getIRegA(regN),
9970                      binop(Iop_Sar32,
9971                            binop(Iop_Shl32,
9972                                  isT ? getIRegT(regM) : getIRegA(regM),
9973                                  mkU8(bitM ? 0 : 16)),
9974                            mkU8(16))) );
9975
9976        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
9977
9978        IRTemp prod32 = newTemp(Ity_I32);
9979        assign(prod32,
9980               binop(Iop_Or32,
9981                     binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
9982                     binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
9983        ));
9984
9985        IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
9986
9987        if (isT)
9988           putIRegT( regD, ire_result, condT );
9989        else
9990           putIRegA( regD, ire_result, condT, Ijk_Boring );
9991
9992        or_into_QFLAG32(
9993           signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
9994           condT
9995        );
9996
9997        DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
9998             bitM ? 't' : 'b',
9999             nCC(conq), regD, regN, regM, regA );
10000        return True;
10001     }
10002     /* fall through */
10003   }
10004
10005   /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
10006   /* fixme: fix up the test in v6media.c so that we can pass the ge
10007      flags as part of the test. */
10008   {
10009     UInt regD = 99, regN = 99, regM = 99;
10010     Bool gate = False;
10011
10012     if (isT) {
10013        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10014           regN = INSNT0(3,0);
10015           regD = INSNT1(11,8);
10016           regM = INSNT1(3,0);
10017           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10018              gate = True;
10019        }
10020     } else {
10021        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
10022            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10023            INSNA(7,4)   == BITS4(1,0,1,1)) {
10024           regD = INSNA(15,12);
10025           regN = INSNA(19,16);
10026           regM = INSNA(3,0);
10027           if (regD != 15 && regN != 15 && regM != 15)
10028              gate = True;
10029        }
10030     }
10031
10032     if (gate) {
10033        IRTemp irt_ge_flag0 = newTemp(Ity_I32);
10034        IRTemp irt_ge_flag1 = newTemp(Ity_I32);
10035        IRTemp irt_ge_flag2 = newTemp(Ity_I32);
10036        IRTemp irt_ge_flag3 = newTemp(Ity_I32);
10037
10038        assign( irt_ge_flag0, get_GEFLAG32(0) );
10039        assign( irt_ge_flag1, get_GEFLAG32(1) );
10040        assign( irt_ge_flag2, get_GEFLAG32(2) );
10041        assign( irt_ge_flag3, get_GEFLAG32(3) );
10042
10043        IRExpr* ire_ge_flag0_or
10044          = binop(Iop_Or32, mkexpr(irt_ge_flag0),
10045                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
10046        IRExpr* ire_ge_flag1_or
10047          = binop(Iop_Or32, mkexpr(irt_ge_flag1),
10048                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
10049        IRExpr* ire_ge_flag2_or
10050          = binop(Iop_Or32, mkexpr(irt_ge_flag2),
10051                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
10052        IRExpr* ire_ge_flag3_or
10053          = binop(Iop_Or32, mkexpr(irt_ge_flag3),
10054                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
10055
10056        IRExpr* ire_ge_flags
10057          = binop( Iop_Or32,
10058                   binop(Iop_Or32,
10059                         binop(Iop_And32,
10060                               binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
10061                               mkU32(0x000000ff)),
10062                         binop(Iop_And32,
10063                               binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
10064                               mkU32(0x0000ff00))),
10065                   binop(Iop_Or32,
10066                         binop(Iop_And32,
10067                               binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
10068                               mkU32(0x00ff0000)),
10069                         binop(Iop_And32,
10070                               binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
10071                               mkU32(0xff000000))) );
10072
10073        IRExpr* ire_result
10074          = binop(Iop_Or32,
10075                  binop(Iop_And32,
10076                        isT ? getIRegT(regN) : getIRegA(regN),
10077                        ire_ge_flags ),
10078                  binop(Iop_And32,
10079                        isT ? getIRegT(regM) : getIRegA(regM),
10080                        unop(Iop_Not32, ire_ge_flags)));
10081
10082        if (isT)
10083           putIRegT( regD, ire_result, condT );
10084        else
10085           putIRegA( regD, ire_result, condT, Ijk_Boring );
10086
10087        DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10088        return True;
10089     }
10090     /* fall through */
10091   }
10092
10093   /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
10094   {
10095     UInt regD = 99, regN = 99, regM = 99, rotate = 99;
10096     Bool gate = False;
10097
10098     if (isT) {
10099        if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
10100           regN   = INSNT0(3,0);
10101           regD   = INSNT1(11,8);
10102           regM   = INSNT1(3,0);
10103           rotate = INSNT1(5,4);
10104           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10105              gate = True;
10106        }
10107     } else {
10108        if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
10109            INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
10110           regD   = INSNA(15,12);
10111           regN   = INSNA(19,16);
10112           regM   = INSNA(3,0);
10113           rotate = INSNA(11,10);
10114           if (regD != 15 && regN != 15 && regM != 15)
10115             gate = True;
10116        }
10117     }
10118
10119     if (gate) {
10120        IRTemp irt_regN = newTemp(Ity_I32);
10121        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10122
10123        IRTemp irt_regM = newTemp(Ity_I32);
10124        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10125
10126        IRTemp irt_rot = newTemp(Ity_I32);
10127        assign( irt_rot, binop(Iop_And32,
10128                               genROR32(irt_regM, 8 * rotate),
10129                               mkU32(0x00FF00FF)) );
10130
10131        IRExpr* resLo
10132           = binop(Iop_And32,
10133                   binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
10134                   mkU32(0x0000FFFF));
10135
10136        IRExpr* resHi
10137           = binop(Iop_Add32,
10138                   binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
10139                   binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
10140
10141        IRExpr* ire_result
10142           = binop( Iop_Or32, resHi, resLo );
10143
10144        if (isT)
10145           putIRegT( regD, ire_result, condT );
10146        else
10147           putIRegA( regD, ire_result, condT, Ijk_Boring );
10148
10149        DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
10150             nCC(conq), regD, regN, regM, 8 * rotate );
10151        return True;
10152     }
10153     /* fall through */
10154   }
10155
10156   /* --------------- usad8  Rd,Rn,Rm    ---------------- */
10157   /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
10158   {
10159     UInt rD = 99, rN = 99, rM = 99, rA = 99;
10160     Bool gate = False;
10161
10162     if (isT) {
10163       if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
10164           rN = INSNT0(3,0);
10165           rA = INSNT1(15,12);
10166           rD = INSNT1(11,8);
10167           rM = INSNT1(3,0);
10168           if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
10169              gate = True;
10170        }
10171     } else {
10172        if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
10173            INSNA(7,4)   == BITS4(0,0,0,1) ) {
10174           rD = INSNA(19,16);
10175           rA = INSNA(15,12);
10176           rM = INSNA(11,8);
10177           rN = INSNA(3,0);
10178           if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
10179              gate = True;
10180        }
10181     }
10182     /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
10183
10184     if (gate) {
10185        IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
10186        IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
10187        IRExpr* rAe = rA == 15 ? mkU32(0)
10188                               : (isT ? getIRegT(rA) : getIRegA(rA));
10189        IRExpr* res = binop(Iop_Add32,
10190                            binop(Iop_Sad8Ux4, rNe, rMe),
10191                            rAe);
10192        if (isT)
10193           putIRegT( rD, res, condT );
10194        else
10195           putIRegA( rD, res, condT, Ijk_Boring );
10196
10197        if (rA == 15) {
10198           DIP( "usad8%s r%u, r%u, r%u\n",
10199                nCC(conq), rD, rN, rM );
10200        } else {
10201           DIP( "usada8%s r%u, r%u, r%u, r%u\n",
10202                nCC(conq), rD, rN, rM, rA );
10203        }
10204        return True;
10205     }
10206     /* fall through */
10207   }
10208
10209   /* ---------- Doesn't match anything. ---------- */
10210   return False;
10211
10212#  undef INSNA
10213#  undef INSNT0
10214#  undef INSNT1
10215}
10216
10217
10218/*------------------------------------------------------------*/
10219/*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
10220/*------------------------------------------------------------*/
10221
10222/* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
10223   unconditional, so the caller must produce a jump-around before
10224   calling this, if the insn is to be conditional.  Caller is
10225   responsible for all validation of parameters.  For LDMxx, if PC is
10226   amongst the values loaded, caller is also responsible for
10227   generating the jump. */
10228static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
10229                         UInt rN,      /* base reg */
10230                         UInt bINC,    /* 1: inc,  0: dec */
10231                         UInt bBEFORE, /* 1: inc/dec before, 0: after */
10232                         UInt bW,      /* 1: writeback to Rn */
10233                         UInt bL,      /* 1: load, 0: store */
10234                         UInt regList )
10235{
10236   Int i, r, m, nRegs;
10237   IRTemp jk = Ijk_Boring;
10238
10239   /* Get hold of the old Rn value.  We might need to write its value
10240      to memory during a store, and if it's also the writeback
10241      register then we need to get its value now.  We can't treat it
10242      exactly like the other registers we're going to transfer,
10243      because for xxMDA and xxMDB writeback forms, the generated IR
10244      updates Rn in the guest state before any transfers take place.
10245      We have to do this as per comments below, in order that if Rn is
10246      the stack pointer then it always has a value is below or equal
10247      to any of the transfer addresses.  Ick. */
10248   IRTemp oldRnT = newTemp(Ity_I32);
10249   assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
10250
10251   IRTemp anchorT = newTemp(Ity_I32);
10252   /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
10253      ignore the bottom two bits of the address.  However, Cortex-A8
10254      doesn't seem to care.  Hence: */
10255   /* No .. don't force alignment .. */
10256   /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
10257   /* Instead, use the potentially misaligned address directly. */
10258   assign(anchorT, mkexpr(oldRnT));
10259
10260   IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
10261   // bINC == 1:  xxMIA, xxMIB
10262   // bINC == 0:  xxMDA, xxMDB
10263
10264   // For xxMDA and xxMDB, update Rn first if necessary.  We have
10265   // to do this first so that, for the common idiom of the transfers
10266   // faulting because we're pushing stuff onto a stack and the stack
10267   // is growing down onto allocate-on-fault pages (as Valgrind simulates),
10268   // we need to have the SP up-to-date "covering" (pointing below) the
10269   // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
10270   // do the transfer first, and then update rN afterwards.
10271   nRegs = 0;
10272   for (i = 0; i < 16; i++) {
10273     if ((regList & (1 << i)) != 0)
10274         nRegs++;
10275   }
10276   if (bW == 1 && !bINC) {
10277      IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
10278      if (arm)
10279         putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
10280      else
10281         putIRegT( rN, e, IRTemp_INVALID );
10282   }
10283
10284   // Make up a list of the registers to transfer, and their offsets
10285   // in memory relative to the anchor.  If the base reg (Rn) is part
10286   // of the transfer, then do it last for a load and first for a store.
10287   UInt xReg[16], xOff[16];
10288   Int  nX = 0;
10289   m = 0;
10290   for (i = 0; i < 16; i++) {
10291      r = bINC ? i : (15-i);
10292      if (0 == (regList & (1<<r)))
10293         continue;
10294      if (bBEFORE)
10295         m++;
10296      /* paranoia: check we aren't transferring the writeback
10297         register during a load. Should be assured by decode-point
10298         check above. */
10299      if (bW == 1 && bL == 1)
10300         vassert(r != rN);
10301
10302      xOff[nX] = 4 * m;
10303      xReg[nX] = r;
10304      nX++;
10305
10306      if (!bBEFORE)
10307         m++;
10308   }
10309   vassert(m == nRegs);
10310   vassert(nX == nRegs);
10311   vassert(nX <= 16);
10312
10313   if (bW == 0 && (regList & (1<<rN)) != 0) {
10314      /* Non-writeback, and basereg is to be transferred.  Do its
10315         transfer last for a load and first for a store.  Requires
10316         reordering xOff/xReg. */
10317      if (0) {
10318         vex_printf("\nREG_LIST_PRE: (rN=%d)\n", rN);
10319         for (i = 0; i < nX; i++)
10320            vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
10321         vex_printf("\n");
10322      }
10323
10324      vassert(nX > 0);
10325      for (i = 0; i < nX; i++) {
10326         if (xReg[i] == rN)
10327             break;
10328      }
10329      vassert(i < nX); /* else we didn't find it! */
10330      UInt tReg = xReg[i];
10331      UInt tOff = xOff[i];
10332      if (bL == 1) {
10333         /* load; make this transfer happen last */
10334         if (i < nX-1) {
10335            for (m = i+1; m < nX; m++) {
10336               xReg[m-1] = xReg[m];
10337               xOff[m-1] = xOff[m];
10338            }
10339            vassert(m == nX);
10340            xReg[m-1] = tReg;
10341            xOff[m-1] = tOff;
10342         }
10343      } else {
10344         /* store; make this transfer happen first */
10345         if (i > 0) {
10346            for (m = i-1; m >= 0; m--) {
10347               xReg[m+1] = xReg[m];
10348               xOff[m+1] = xOff[m];
10349            }
10350            vassert(m == -1);
10351            xReg[0] = tReg;
10352            xOff[0] = tOff;
10353         }
10354      }
10355
10356      if (0) {
10357         vex_printf("REG_LIST_POST:\n");
10358         for (i = 0; i < nX; i++)
10359            vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
10360         vex_printf("\n");
10361      }
10362   }
10363
10364   /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
10365       register and PC in the register list is a return for purposes of branch
10366       prediction.
10367      The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
10368       to be counted in event 0x0E (Procedure return).*/
10369   if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
10370      jk = Ijk_Ret;
10371   }
10372
10373   /* Actually generate the transfers */
10374   for (i = 0; i < nX; i++) {
10375      r = xReg[i];
10376      if (bL == 1) {
10377         IRExpr* e = loadLE(Ity_I32,
10378                            binop(opADDorSUB, mkexpr(anchorT),
10379                                  mkU32(xOff[i])));
10380         if (arm) {
10381            putIRegA( r, e, IRTemp_INVALID, jk );
10382         } else {
10383            // no: putIRegT( r, e, IRTemp_INVALID );
10384            // putIRegT refuses to write to R15.  But that might happen.
10385            // Since this is uncond, and we need to be able to
10386            // write the PC, just use the low level put:
10387            llPutIReg( r, e );
10388         }
10389      } else {
10390         /* if we're storing Rn, make sure we use the correct
10391            value, as per extensive comments above */
10392         storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
10393                  r == rN ? mkexpr(oldRnT)
10394                          : (arm ? getIRegA(r) : getIRegT(r) ) );
10395      }
10396   }
10397
10398   // If we are doing xxMIA or xxMIB,
10399   // do the transfer first, and then update rN afterwards.
10400   if (bW == 1 && bINC) {
10401      IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
10402      if (arm)
10403         putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
10404      else
10405         putIRegT( rN, e, IRTemp_INVALID );
10406   }
10407}
10408
10409
10410/*------------------------------------------------------------*/
10411/*--- VFP (CP 10 and 11) instructions                      ---*/
10412/*------------------------------------------------------------*/
10413
10414/* Both ARM and Thumb */
10415
10416/* Translate a CP10 or CP11 instruction.  If successful, returns
10417   True and *dres may or may not be updated.  If failure, returns
10418   False and doesn't change *dres nor create any IR.
10419
10420   The ARM and Thumb encodings are identical for the low 28 bits of
10421   the insn (yay!) and that's what the caller must supply, iow, imm28
10422   has the top 4 bits masked out.  Caller is responsible for
10423   determining whether the masked-out bits are valid for a CP10/11
10424   insn.  The rules for the top 4 bits are:
10425
10426     ARM: 0000 to 1110 allowed, and this is the gating condition.
10427     1111 (NV) is not allowed.
10428
10429     Thumb: must be 1110.  The gating condition is taken from
10430     ITSTATE in the normal way.
10431
10432   Conditionalisation:
10433
10434   Caller must supply an IRTemp 'condT' holding the gating condition,
10435   or IRTemp_INVALID indicating the insn is always executed.
10436
10437   Caller must also supply an ARMCondcode 'cond'.  This is only used
10438   for debug printing, no other purpose.  For ARM, this is simply the
10439   top 4 bits of the original instruction.  For Thumb, the condition
10440   is not (really) known until run time, and so ARMCondAL should be
10441   passed, only so that printing of these instructions does not show
10442   any condition.
10443
10444   Finally, the caller must indicate whether this occurs in ARM or
10445   Thumb code.
10446*/
10447static Bool decode_CP10_CP11_instruction (
10448               /*MOD*/DisResult* dres,
10449               UInt              insn28,
10450               IRTemp            condT,
10451               ARMCondcode       conq,
10452               Bool              isT
10453            )
10454{
10455#  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
10456
10457   vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
10458
10459   if (isT) {
10460      vassert(conq == ARMCondAL);
10461   } else {
10462      vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
10463   }
10464
10465   /* ----------------------------------------------------------- */
10466   /* -- VFP instructions -- double precision (mostly)         -- */
10467   /* ----------------------------------------------------------- */
10468
10469   /* --------------------- fldmx, fstmx --------------------- */
10470   /*
10471                                 31   27   23   19 15 11   7   0
10472                                         P U WL
10473      C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
10474      C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
10475      C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
10476
10477      C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
10478      C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
10479      C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
10480
10481      Regs transferred: Dd .. D(d + (offset-3)/2)
10482      offset must be odd, must not imply a reg > 15
10483      IA/DB: Rn is changed by (4 + 8 x # regs transferred)
10484
10485      case coding:
10486         1  at-Rn   (access at Rn)
10487         2  ia-Rn   (access at Rn, then Rn += 4+8n)
10488         3  db-Rn   (Rn -= 4+8n,   then access at Rn)
10489   */
10490   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
10491       && INSN(11,8) == BITS4(1,0,1,1)) {
10492      UInt bP      = (insn28 >> 24) & 1;
10493      UInt bU      = (insn28 >> 23) & 1;
10494      UInt bW      = (insn28 >> 21) & 1;
10495      UInt bL      = (insn28 >> 20) & 1;
10496      UInt offset  = (insn28 >> 0) & 0xFF;
10497      UInt rN      = INSN(19,16);
10498      UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
10499      UInt nRegs   = (offset - 1) / 2;
10500      UInt summary = 0;
10501      Int  i;
10502
10503      /**/ if (bP == 0 && bU == 1 && bW == 0) {
10504         summary = 1;
10505      }
10506      else if (bP == 0 && bU == 1 && bW == 1) {
10507         summary = 2;
10508      }
10509      else if (bP == 1 && bU == 0 && bW == 1) {
10510         summary = 3;
10511      }
10512      else goto after_vfp_fldmx_fstmx;
10513
10514      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
10515      if (rN == 15 && (summary == 2 || summary == 3 || isT))
10516         goto after_vfp_fldmx_fstmx;
10517
10518      /* offset must be odd, and specify at least one register */
10519      if (0 == (offset & 1) || offset < 3)
10520         goto after_vfp_fldmx_fstmx;
10521
10522      /* can't transfer regs after D15 */
10523      if (dD + nRegs - 1 >= 32)
10524         goto after_vfp_fldmx_fstmx;
10525
10526      /* Now, we can't do a conditional load or store, since that very
10527         likely will generate an exception.  So we have to take a side
10528         exit at this point if the condition is false. */
10529      if (condT != IRTemp_INVALID) {
10530         if (isT)
10531            mk_skip_over_T32_if_cond_is_false( condT );
10532         else
10533            mk_skip_over_A32_if_cond_is_false( condT );
10534         condT = IRTemp_INVALID;
10535      }
10536      /* Ok, now we're unconditional.  Do the load or store. */
10537
10538      /* get the old Rn value */
10539      IRTemp rnT = newTemp(Ity_I32);
10540      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
10541                           rN == 15));
10542
10543      /* make a new value for Rn, post-insn */
10544      IRTemp rnTnew = IRTemp_INVALID;
10545      if (summary == 2 || summary == 3) {
10546         rnTnew = newTemp(Ity_I32);
10547         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
10548                              mkexpr(rnT),
10549                              mkU32(4 + 8 * nRegs)));
10550      }
10551
10552      /* decide on the base transfer address */
10553      IRTemp taT = newTemp(Ity_I32);
10554      assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
10555
10556      /* update Rn if necessary -- in case 3, we're moving it down, so
10557         update before any memory reference, in order to keep Memcheck
10558         and V's stack-extending logic (on linux) happy */
10559      if (summary == 3) {
10560         if (isT)
10561            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
10562         else
10563            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
10564      }
10565
10566      /* generate the transfers */
10567      for (i = 0; i < nRegs; i++) {
10568         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
10569         if (bL) {
10570            putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
10571         } else {
10572            storeLE(addr, getDReg(dD + i));
10573         }
10574      }
10575
10576      /* update Rn if necessary -- in case 2, we're moving it up, so
10577         update after any memory reference, in order to keep Memcheck
10578         and V's stack-extending logic (on linux) happy */
10579      if (summary == 2) {
10580         if (isT)
10581            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
10582         else
10583            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
10584      }
10585
10586      HChar* nm = bL==1 ? "ld" : "st";
10587      switch (summary) {
10588         case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
10589                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
10590                  break;
10591         case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
10592                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
10593                  break;
10594         case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
10595                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
10596                  break;
10597         default: vassert(0);
10598      }
10599
10600      goto decode_success_vfp;
10601      /* FIXME alignment constraints? */
10602   }
10603
10604  after_vfp_fldmx_fstmx:
10605
10606   /* --------------------- fldmd, fstmd --------------------- */
10607   /*
10608                                 31   27   23   19 15 11   7   0
10609                                         P U WL
10610      C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
10611      C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
10612      C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
10613
10614      C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
10615      C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
10616      C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
10617
10618      Regs transferred: Dd .. D(d + (offset-2)/2)
10619      offset must be even, must not imply a reg > 15
10620      IA/DB: Rn is changed by (8 x # regs transferred)
10621
10622      case coding:
10623         1  at-Rn   (access at Rn)
10624         2  ia-Rn   (access at Rn, then Rn += 8n)
10625         3  db-Rn   (Rn -= 8n,     then access at Rn)
10626   */
10627   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
10628       && INSN(11,8) == BITS4(1,0,1,1)) {
10629      UInt bP      = (insn28 >> 24) & 1;
10630      UInt bU      = (insn28 >> 23) & 1;
10631      UInt bW      = (insn28 >> 21) & 1;
10632      UInt bL      = (insn28 >> 20) & 1;
10633      UInt offset  = (insn28 >> 0) & 0xFF;
10634      UInt rN      = INSN(19,16);
10635      UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
10636      UInt nRegs   = offset / 2;
10637      UInt summary = 0;
10638      Int  i;
10639
10640      /**/ if (bP == 0 && bU == 1 && bW == 0) {
10641         summary = 1;
10642      }
10643      else if (bP == 0 && bU == 1 && bW == 1) {
10644         summary = 2;
10645      }
10646      else if (bP == 1 && bU == 0 && bW == 1) {
10647         summary = 3;
10648      }
10649      else goto after_vfp_fldmd_fstmd;
10650
10651      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
10652      if (rN == 15 && (summary == 2 || summary == 3 || isT))
10653         goto after_vfp_fldmd_fstmd;
10654
10655      /* offset must be even, and specify at least one register */
10656      if (1 == (offset & 1) || offset < 2)
10657         goto after_vfp_fldmd_fstmd;
10658
10659      /* can't transfer regs after D15 */
10660      if (dD + nRegs - 1 >= 32)
10661         goto after_vfp_fldmd_fstmd;
10662
10663      /* Now, we can't do a conditional load or store, since that very
10664         likely will generate an exception.  So we have to take a side
10665         exit at this point if the condition is false. */
10666      if (condT != IRTemp_INVALID) {
10667         if (isT)
10668            mk_skip_over_T32_if_cond_is_false( condT );
10669         else
10670            mk_skip_over_A32_if_cond_is_false( condT );
10671         condT = IRTemp_INVALID;
10672      }
10673      /* Ok, now we're unconditional.  Do the load or store. */
10674
10675      /* get the old Rn value */
10676      IRTemp rnT = newTemp(Ity_I32);
10677      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
10678                           rN == 15));
10679
10680      /* make a new value for Rn, post-insn */
10681      IRTemp rnTnew = IRTemp_INVALID;
10682      if (summary == 2 || summary == 3) {
10683         rnTnew = newTemp(Ity_I32);
10684         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
10685                              mkexpr(rnT),
10686                              mkU32(8 * nRegs)));
10687      }
10688
10689      /* decide on the base transfer address */
10690      IRTemp taT = newTemp(Ity_I32);
10691      assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
10692
10693      /* update Rn if necessary -- in case 3, we're moving it down, so
10694         update before any memory reference, in order to keep Memcheck
10695         and V's stack-extending logic (on linux) happy */
10696      if (summary == 3) {
10697         if (isT)
10698            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
10699         else
10700            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
10701      }
10702
10703      /* generate the transfers */
10704      for (i = 0; i < nRegs; i++) {
10705         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
10706         if (bL) {
10707            putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
10708         } else {
10709            storeLE(addr, getDReg(dD + i));
10710         }
10711      }
10712
10713      /* update Rn if necessary -- in case 2, we're moving it up, so
10714         update after any memory reference, in order to keep Memcheck
10715         and V's stack-extending logic (on linux) happy */
10716      if (summary == 2) {
10717         if (isT)
10718            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
10719         else
10720            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
10721      }
10722
10723      HChar* nm = bL==1 ? "ld" : "st";
10724      switch (summary) {
10725         case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
10726                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
10727                  break;
10728         case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
10729                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
10730                  break;
10731         case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
10732                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
10733                  break;
10734         default: vassert(0);
10735      }
10736
10737      goto decode_success_vfp;
10738      /* FIXME alignment constraints? */
10739   }
10740
10741  after_vfp_fldmd_fstmd:
10742
10743   /* ------------------- fmrx, fmxr ------------------- */
10744   if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
10745       && BITS4(1,0,1,0) == INSN(11,8)
10746       && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
10747      UInt rD  = INSN(15,12);
10748      UInt reg = INSN(19,16);
10749      if (reg == BITS4(0,0,0,1)) {
10750         if (rD == 15) {
10751            IRTemp nzcvT = newTemp(Ity_I32);
10752            /* When rD is 15, we are copying the top 4 bits of FPSCR
10753               into CPSR.  That is, set the flags thunk to COPY and
10754               install FPSCR[31:28] as the value to copy. */
10755            assign(nzcvT, binop(Iop_And32,
10756                                IRExpr_Get(OFFB_FPSCR, Ity_I32),
10757                                mkU32(0xF0000000)));
10758            setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
10759            DIP("fmstat%s\n", nCC(conq));
10760         } else {
10761            /* Otherwise, merely transfer FPSCR to r0 .. r14. */
10762            IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
10763            if (isT)
10764               putIRegT(rD, e, condT);
10765            else
10766               putIRegA(rD, e, condT, Ijk_Boring);
10767            DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
10768         }
10769         goto decode_success_vfp;
10770      }
10771      /* fall through */
10772   }
10773
10774   if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
10775       && BITS4(1,0,1,0) == INSN(11,8)
10776       && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
10777      UInt rD  = INSN(15,12);
10778      UInt reg = INSN(19,16);
10779      if (reg == BITS4(0,0,0,1)) {
10780         putMiscReg32(OFFB_FPSCR,
10781                      isT ? getIRegT(rD) : getIRegA(rD), condT);
10782         DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
10783         goto decode_success_vfp;
10784      }
10785      /* fall through */
10786   }
10787
10788   /* --------------------- vmov --------------------- */
10789   // VMOV dM, rD, rN
10790   if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
10791      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
10792      UInt rD = INSN(15,12); /* lo32 */
10793      UInt rN = INSN(19,16); /* hi32 */
10794      if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
10795         /* fall through */
10796      } else {
10797         putDReg(dM,
10798                 unop(Iop_ReinterpI64asF64,
10799                      binop(Iop_32HLto64,
10800                            isT ? getIRegT(rN) : getIRegA(rN),
10801                            isT ? getIRegT(rD) : getIRegA(rD))),
10802                 condT);
10803         DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
10804         goto decode_success_vfp;
10805      }
10806      /* fall through */
10807   }
10808
10809   // VMOV rD, rN, dM
10810   if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
10811      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
10812      UInt rD = INSN(15,12); /* lo32 */
10813      UInt rN = INSN(19,16); /* hi32 */
10814      if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
10815          || rD == rN) {
10816         /* fall through */
10817      } else {
10818         IRTemp i64 = newTemp(Ity_I64);
10819         assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
10820         IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
10821         IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
10822         if (isT) {
10823            putIRegT(rN, hi32, condT);
10824            putIRegT(rD, lo32, condT);
10825         } else {
10826            putIRegA(rN, hi32, condT, Ijk_Boring);
10827            putIRegA(rD, lo32, condT, Ijk_Boring);
10828         }
10829         DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
10830         goto decode_success_vfp;
10831      }
10832      /* fall through */
10833   }
10834
10835   // VMOV sD, sD+1, rN, rM
10836   if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
10837      UInt sD = (INSN(3,0) << 1) | INSN(5,5);
10838      UInt rN = INSN(15,12);
10839      UInt rM = INSN(19,16);
10840      if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
10841          || sD == 31) {
10842         /* fall through */
10843      } else {
10844         putFReg(sD,
10845                 unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
10846                 condT);
10847         putFReg(sD+1,
10848                 unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
10849                 condT);
10850         DIP("vmov%s, s%u, s%u, r%u, r%u\n",
10851              nCC(conq), sD, sD + 1, rN, rM);
10852         goto decode_success_vfp;
10853      }
10854   }
10855
10856   // VMOV rN, rM, sD, sD+1
10857   if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
10858      UInt sD = (INSN(3,0) << 1) | INSN(5,5);
10859      UInt rN = INSN(15,12);
10860      UInt rM = INSN(19,16);
10861      if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
10862          || sD == 31 || rN == rM) {
10863         /* fall through */
10864      } else {
10865         IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
10866         IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
10867         if (isT) {
10868            putIRegT(rN, res0, condT);
10869            putIRegT(rM, res1, condT);
10870         } else {
10871            putIRegA(rN, res0, condT, Ijk_Boring);
10872            putIRegA(rM, res1, condT, Ijk_Boring);
10873         }
10874         DIP("vmov%s, r%u, r%u, s%u, s%u\n",
10875             nCC(conq), rN, rM, sD, sD + 1);
10876         goto decode_success_vfp;
10877      }
10878   }
10879
10880   // VMOV rD[x], rT  (ARM core register to scalar)
10881   if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
10882      UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
10883      UInt rT  = INSN(15,12);
10884      UInt opc = (INSN(22,21) << 2) | INSN(6,5);
10885      UInt index;
10886      if (rT == 15 || (isT && rT == 13)) {
10887         /* fall through */
10888      } else {
10889         if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
10890            index = opc & 7;
10891            putDRegI64(rD, triop(Iop_SetElem8x8,
10892                                 getDRegI64(rD),
10893                                 mkU8(index),
10894                                 unop(Iop_32to8,
10895                                      isT ? getIRegT(rT) : getIRegA(rT))),
10896                           condT);
10897            DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
10898            goto decode_success_vfp;
10899         }
10900         else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
10901            index = (opc >> 1) & 3;
10902            putDRegI64(rD, triop(Iop_SetElem16x4,
10903                                 getDRegI64(rD),
10904                                 mkU8(index),
10905                                 unop(Iop_32to16,
10906                                      isT ? getIRegT(rT) : getIRegA(rT))),
10907                           condT);
10908            DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
10909            goto decode_success_vfp;
10910         }
10911         else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
10912            index = (opc >> 2) & 1;
10913            putDRegI64(rD, triop(Iop_SetElem32x2,
10914                                 getDRegI64(rD),
10915                                 mkU8(index),
10916                                 isT ? getIRegT(rT) : getIRegA(rT)),
10917                           condT);
10918            DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
10919            goto decode_success_vfp;
10920         } else {
10921            /* fall through */
10922         }
10923      }
10924   }
10925
10926   // VMOV (scalar to ARM core register)
10927   // VMOV rT, rD[x]
10928   if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
10929      UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
10930      UInt rT  = INSN(15,12);
10931      UInt U   = INSN(23,23);
10932      UInt opc = (INSN(22,21) << 2) | INSN(6,5);
10933      UInt index;
10934      if (rT == 15 || (isT && rT == 13)) {
10935         /* fall through */
10936      } else {
10937         if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
10938            index = opc & 7;
10939            IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
10940                             binop(Iop_GetElem8x8,
10941                                   getDRegI64(rN),
10942                                   mkU8(index)));
10943            if (isT)
10944               putIRegT(rT, e, condT);
10945            else
10946               putIRegA(rT, e, condT, Ijk_Boring);
10947            DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
10948                  rT, rN, index);
10949            goto decode_success_vfp;
10950         }
10951         else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
10952            index = (opc >> 1) & 3;
10953            IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
10954                             binop(Iop_GetElem16x4,
10955                                   getDRegI64(rN),
10956                                   mkU8(index)));
10957            if (isT)
10958               putIRegT(rT, e, condT);
10959            else
10960               putIRegA(rT, e, condT, Ijk_Boring);
10961            DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
10962                  rT, rN, index);
10963            goto decode_success_vfp;
10964         }
10965         else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
10966            index = (opc >> 2) & 1;
10967            IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
10968            if (isT)
10969               putIRegT(rT, e, condT);
10970            else
10971               putIRegA(rT, e, condT, Ijk_Boring);
10972            DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
10973            goto decode_success_vfp;
10974         } else {
10975            /* fall through */
10976         }
10977      }
10978   }
10979
10980   // VMOV.F32 sD, #imm
10981   // FCONSTS sD, #imm
10982   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
10983       && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
10984      UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
10985      UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
10986      UInt b    = (imm8 >> 6) & 1;
10987      UInt imm;
10988      imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
10989             | ((imm8 & 0x1f) << 3);
10990      imm <<= 16;
10991      putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
10992      DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
10993      goto decode_success_vfp;
10994   }
10995
10996   // VMOV.F64 dD, #imm
10997   // FCONSTD dD, #imm
10998   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
10999       && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
11000      UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
11001      UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
11002      UInt b    = (imm8 >> 6) & 1;
11003      ULong imm;
11004      imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
11005             | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
11006      imm <<= 48;
11007      putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
11008      DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
11009      goto decode_success_vfp;
11010   }
11011
11012   /* ---------------------- vdup ------------------------- */
11013   // VDUP dD, rT
11014   // VDUP qD, rT
11015   if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
11016       && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
11017      UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
11018      UInt rT   = INSN(15,12);
11019      UInt Q    = INSN(21,21);
11020      UInt size = (INSN(22,22) << 1) | INSN(5,5);
11021      if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
11022         /* fall through */
11023      } else {
11024         IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
11025         if (Q) {
11026            rD >>= 1;
11027            switch (size) {
11028               case 0:
11029                  putQReg(rD, unop(Iop_Dup32x4, e), condT);
11030                  break;
11031               case 1:
11032                  putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
11033                              condT);
11034                  break;
11035               case 2:
11036                  putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
11037                              condT);
11038                  break;
11039               default:
11040                  vassert(0);
11041            }
11042            DIP("vdup.%u q%u, r%u\n", 32 / (1<<size), rD, rT);
11043         } else {
11044            switch (size) {
11045               case 0:
11046                  putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
11047                  break;
11048               case 1:
11049                  putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
11050                               condT);
11051                  break;
11052               case 2:
11053                  putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
11054                               condT);
11055                  break;
11056               default:
11057                  vassert(0);
11058            }
11059            DIP("vdup.%u d%u, r%u\n", 32 / (1<<size), rD, rT);
11060         }
11061         goto decode_success_vfp;
11062      }
11063   }
11064
11065   /* --------------------- f{ld,st}d --------------------- */
11066   // FLDD, FSTD
11067   if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
11068       && BITS4(1,0,1,1) == INSN(11,8)) {
11069      UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
11070      UInt rN     = INSN(19,16);
11071      UInt offset = (insn28 & 0xFF) << 2;
11072      UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
11073      UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
11074      /* make unconditional */
11075      if (condT != IRTemp_INVALID) {
11076         if (isT)
11077            mk_skip_over_T32_if_cond_is_false( condT );
11078         else
11079            mk_skip_over_A32_if_cond_is_false( condT );
11080         condT = IRTemp_INVALID;
11081      }
11082      IRTemp ea = newTemp(Ity_I32);
11083      assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
11084                       align4if(isT ? getIRegT(rN) : getIRegA(rN),
11085                                rN == 15),
11086                       mkU32(offset)));
11087      if (bL) {
11088         putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
11089      } else {
11090         storeLE(mkexpr(ea), getDReg(dD));
11091      }
11092      DIP("f%sd%s d%u, [r%u, %c#%u]\n",
11093          bL ? "ld" : "st", nCC(conq), dD, rN,
11094          bU ? '+' : '-', offset);
11095      goto decode_success_vfp;
11096   }
11097
11098   /* --------------------- dp insns (D) --------------------- */
11099   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
11100       && BITS4(1,0,1,1) == INSN(11,8)
11101       && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
11102      UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
11103      UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
11104      UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
11105      UInt    bP  = (insn28 >> 23) & 1;
11106      UInt    bQ  = (insn28 >> 21) & 1;
11107      UInt    bR  = (insn28 >> 20) & 1;
11108      UInt    bS  = (insn28 >> 6) & 1;
11109      UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
11110      IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
11111      switch (opc) {
11112         case BITS4(0,0,0,0): /* MAC: d + n * m */
11113            putDReg(dD, triop(Iop_AddF64, rm,
11114                              getDReg(dD),
11115                              triop(Iop_MulF64, rm, getDReg(dN),
11116                                                    getDReg(dM))),
11117                        condT);
11118            DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
11119            goto decode_success_vfp;
11120         case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
11121            putDReg(dD, triop(Iop_AddF64, rm,
11122                              getDReg(dD),
11123                              unop(Iop_NegF64,
11124                                   triop(Iop_MulF64, rm, getDReg(dN),
11125                                                         getDReg(dM)))),
11126                        condT);
11127            DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
11128            goto decode_success_vfp;
11129         case BITS4(0,0,1,0): /* MSC: - d + n * m */
11130            putDReg(dD, triop(Iop_AddF64, rm,
11131                              unop(Iop_NegF64, getDReg(dD)),
11132                              triop(Iop_MulF64, rm, getDReg(dN),
11133                                                    getDReg(dM))),
11134                        condT);
11135            DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
11136            goto decode_success_vfp;
11137         case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
11138            putDReg(dD, triop(Iop_AddF64, rm,
11139                              unop(Iop_NegF64, getDReg(dD)),
11140                              unop(Iop_NegF64,
11141                                   triop(Iop_MulF64, rm, getDReg(dN),
11142                                                         getDReg(dM)))),
11143                        condT);
11144            DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
11145            goto decode_success_vfp;
11146         case BITS4(0,1,0,0): /* MUL: n * m */
11147            putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
11148                        condT);
11149            DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
11150            goto decode_success_vfp;
11151         case BITS4(0,1,0,1): /* NMUL: - n * m */
11152            putDReg(dD, unop(Iop_NegF64,
11153                             triop(Iop_MulF64, rm, getDReg(dN),
11154                                                   getDReg(dM))),
11155                    condT);
11156            DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
11157            goto decode_success_vfp;
11158         case BITS4(0,1,1,0): /* ADD: n + m */
11159            putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
11160                        condT);
11161            DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
11162            goto decode_success_vfp;
11163         case BITS4(0,1,1,1): /* SUB: n - m */
11164            putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
11165                        condT);
11166            DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
11167            goto decode_success_vfp;
11168         case BITS4(1,0,0,0): /* DIV: n / m */
11169            putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
11170                        condT);
11171            DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
11172            goto decode_success_vfp;
11173         default:
11174            break;
11175      }
11176   }
11177
11178   /* --------------------- compares (D) --------------------- */
11179   /*          31   27   23   19   15 11   7    3
11180                 28   24   20   16 12    8    4    0
11181      FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
11182      FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
11183      FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
11184      FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
11185                                 Z         N
11186
11187      Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
11188      Z=1 Compare Dd vs zero
11189
11190      N=1 generates Invalid Operation exn if either arg is any kind of NaN
11191      N=0 generates Invalid Operation exn if either arg is a signalling NaN
11192      (Not that we pay any attention to N here)
11193   */
11194   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
11195       && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
11196       && BITS4(1,0,1,1) == INSN(11,8)
11197       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
11198      UInt bZ = (insn28 >> 16) & 1;
11199      UInt bN = (insn28 >> 7) & 1;
11200      UInt dD = INSN(15,12) | (INSN(22,22) << 4);
11201      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
11202      if (bZ && INSN(3,0) != 0) {
11203         /* does not decode; fall through */
11204      } else {
11205         IRTemp argL = newTemp(Ity_F64);
11206         IRTemp argR = newTemp(Ity_F64);
11207         IRTemp irRes = newTemp(Ity_I32);
11208         assign(argL, getDReg(dD));
11209         assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
11210         assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
11211
11212         IRTemp nzcv     = IRTemp_INVALID;
11213         IRTemp oldFPSCR = newTemp(Ity_I32);
11214         IRTemp newFPSCR = newTemp(Ity_I32);
11215
11216         /* This is where the fun starts.  We have to convert 'irRes'
11217            from an IR-convention return result (IRCmpF64Result) to an
11218            ARM-encoded (N,Z,C,V) group.  The final result is in the
11219            bottom 4 bits of 'nzcv'. */
11220         /* Map compare result from IR to ARM(nzcv) */
11221         /*
11222            FP cmp result | IR   | ARM(nzcv)
11223            --------------------------------
11224            UN              0x45   0011
11225            LT              0x01   1000
11226            GT              0x00   0010
11227            EQ              0x40   0110
11228         */
11229         nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
11230
11231         /* And update FPSCR accordingly */
11232         assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
11233         assign(newFPSCR,
11234                binop(Iop_Or32,
11235                      binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
11236                      binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
11237
11238         putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
11239
11240         if (bZ) {
11241            DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
11242         } else {
11243            DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
11244         }
11245         goto decode_success_vfp;
11246      }
11247      /* fall through */
11248   }
11249
11250   /* --------------------- unary (D) --------------------- */
11251   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
11252       && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
11253       && BITS4(1,0,1,1) == INSN(11,8)
11254       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
11255      UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
11256      UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
11257      UInt b16 = (insn28 >> 16) & 1;
11258      UInt b7  = (insn28 >> 7) & 1;
11259      /**/ if (b16 == 0 && b7 == 0) {
11260         // FCPYD
11261         putDReg(dD, getDReg(dM), condT);
11262         DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
11263         goto decode_success_vfp;
11264      }
11265      else if (b16 == 0 && b7 == 1) {
11266         // FABSD
11267         putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
11268         DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
11269         goto decode_success_vfp;
11270      }
11271      else if (b16 == 1 && b7 == 0) {
11272         // FNEGD
11273         putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
11274         DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
11275         goto decode_success_vfp;
11276      }
11277      else if (b16 == 1 && b7 == 1) {
11278         // FSQRTD
11279         IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
11280         putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
11281         DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
11282         goto decode_success_vfp;
11283      }
11284      else
11285         vassert(0);
11286
11287      /* fall through */
11288   }
11289
11290   /* ----------------- I <-> D conversions ----------------- */
11291
11292   // F{S,U}ITOD dD, fM
11293   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
11294       && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
11295       && BITS4(1,0,1,1) == INSN(11,8)
11296       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
11297      UInt bM    = (insn28 >> 5) & 1;
11298      UInt fM    = (INSN(3,0) << 1) | bM;
11299      UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
11300      UInt syned = (insn28 >> 7) & 1;
11301      if (syned) {
11302         // FSITOD
11303         putDReg(dD, unop(Iop_I32StoF64,
11304                          unop(Iop_ReinterpF32asI32, getFReg(fM))),
11305                 condT);
11306         DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
11307      } else {
11308         // FUITOD
11309         putDReg(dD, unop(Iop_I32UtoF64,
11310                          unop(Iop_ReinterpF32asI32, getFReg(fM))),
11311                 condT);
11312         DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
11313      }
11314      goto decode_success_vfp;
11315   }
11316
11317   // FTO{S,U}ID fD, dM
11318   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
11319       && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
11320       && BITS4(1,0,1,1) == INSN(11,8)
11321       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
11322      UInt   bD    = (insn28 >> 22) & 1;
11323      UInt   fD    = (INSN(15,12) << 1) | bD;
11324      UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
11325      UInt   bZ    = (insn28 >> 7) & 1;
11326      UInt   syned = (insn28 >> 16) & 1;
11327      IRTemp rmode = newTemp(Ity_I32);
11328      assign(rmode, bZ ? mkU32(Irrm_ZERO)
11329                       : mkexpr(mk_get_IR_rounding_mode()));
11330      if (syned) {
11331         // FTOSID
11332         putFReg(fD, unop(Iop_ReinterpI32asF32,
11333                          binop(Iop_F64toI32S, mkexpr(rmode),
11334                                getDReg(dM))),
11335                 condT);
11336         DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
11337             nCC(conq), fD, dM);
11338      } else {
11339         // FTOUID
11340         putFReg(fD, unop(Iop_ReinterpI32asF32,
11341                          binop(Iop_F64toI32U, mkexpr(rmode),
11342                                getDReg(dM))),
11343                 condT);
11344         DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
11345             nCC(conq), fD, dM);
11346      }
11347      goto decode_success_vfp;
11348   }
11349
11350   /* ----------------------------------------------------------- */
11351   /* -- VFP instructions -- single precision                  -- */
11352   /* ----------------------------------------------------------- */
11353
11354   /* --------------------- fldms, fstms --------------------- */
11355   /*
11356                                 31   27   23   19 15 11   7   0
11357                                         P UDWL
11358      C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
11359      C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
11360      C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
11361
11362      C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
11363      C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
11364      C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
11365
11366      Regs transferred: F(Fd:D) .. F(Fd:d + offset)
11367      offset must not imply a reg > 15
11368      IA/DB: Rn is changed by (4 x # regs transferred)
11369
11370      case coding:
11371         1  at-Rn   (access at Rn)
11372         2  ia-Rn   (access at Rn, then Rn += 4n)
11373         3  db-Rn   (Rn -= 4n,     then access at Rn)
11374   */
11375   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
11376       && INSN(11,8) == BITS4(1,0,1,0)) {
11377      UInt bP      = (insn28 >> 24) & 1;
11378      UInt bU      = (insn28 >> 23) & 1;
11379      UInt bW      = (insn28 >> 21) & 1;
11380      UInt bL      = (insn28 >> 20) & 1;
11381      UInt bD      = (insn28 >> 22) & 1;
11382      UInt offset  = (insn28 >> 0) & 0xFF;
11383      UInt rN      = INSN(19,16);
11384      UInt fD      = (INSN(15,12) << 1) | bD;
11385      UInt nRegs   = offset;
11386      UInt summary = 0;
11387      Int  i;
11388
11389      /**/ if (bP == 0 && bU == 1 && bW == 0) {
11390         summary = 1;
11391      }
11392      else if (bP == 0 && bU == 1 && bW == 1) {
11393         summary = 2;
11394      }
11395      else if (bP == 1 && bU == 0 && bW == 1) {
11396         summary = 3;
11397      }
11398      else goto after_vfp_fldms_fstms;
11399
11400      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
11401      if (rN == 15 && (summary == 2 || summary == 3 || isT))
11402         goto after_vfp_fldms_fstms;
11403
11404      /* offset must specify at least one register */
11405      if (offset < 1)
11406         goto after_vfp_fldms_fstms;
11407
11408      /* can't transfer regs after S31 */
11409      if (fD + nRegs - 1 >= 32)
11410         goto after_vfp_fldms_fstms;
11411
11412      /* Now, we can't do a conditional load or store, since that very
11413         likely will generate an exception.  So we have to take a side
11414         exit at this point if the condition is false. */
11415      if (condT != IRTemp_INVALID) {
11416         if (isT)
11417            mk_skip_over_T32_if_cond_is_false( condT );
11418         else
11419            mk_skip_over_A32_if_cond_is_false( condT );
11420         condT = IRTemp_INVALID;
11421      }
11422      /* Ok, now we're unconditional.  Do the load or store. */
11423
11424      /* get the old Rn value */
11425      IRTemp rnT = newTemp(Ity_I32);
11426      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
11427                           rN == 15));
11428
11429      /* make a new value for Rn, post-insn */
11430      IRTemp rnTnew = IRTemp_INVALID;
11431      if (summary == 2 || summary == 3) {
11432         rnTnew = newTemp(Ity_I32);
11433         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
11434                              mkexpr(rnT),
11435                              mkU32(4 * nRegs)));
11436      }
11437
11438      /* decide on the base transfer address */
11439      IRTemp taT = newTemp(Ity_I32);
11440      assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
11441
11442      /* update Rn if necessary -- in case 3, we're moving it down, so
11443         update before any memory reference, in order to keep Memcheck
11444         and V's stack-extending logic (on linux) happy */
11445      if (summary == 3) {
11446         if (isT)
11447            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
11448         else
11449            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
11450      }
11451
11452      /* generate the transfers */
11453      for (i = 0; i < nRegs; i++) {
11454         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
11455         if (bL) {
11456            putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
11457         } else {
11458            storeLE(addr, getFReg(fD + i));
11459         }
11460      }
11461
11462      /* update Rn if necessary -- in case 2, we're moving it up, so
11463         update after any memory reference, in order to keep Memcheck
11464         and V's stack-extending logic (on linux) happy */
11465      if (summary == 2) {
11466         if (isT)
11467            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
11468         else
11469            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
11470      }
11471
11472      HChar* nm = bL==1 ? "ld" : "st";
11473      switch (summary) {
11474         case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
11475                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
11476                  break;
11477         case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
11478                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
11479                  break;
11480         case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
11481                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
11482                  break;
11483         default: vassert(0);
11484      }
11485
11486      goto decode_success_vfp;
11487      /* FIXME alignment constraints? */
11488   }
11489
11490  after_vfp_fldms_fstms:
11491
11492   /* --------------------- fmsr, fmrs --------------------- */
11493   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
11494       && BITS4(1,0,1,0) == INSN(11,8)
11495       && BITS4(0,0,0,0) == INSN(3,0)
11496       && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
11497      UInt rD  = INSN(15,12);
11498      UInt b7  = (insn28 >> 7) & 1;
11499      UInt fN  = (INSN(19,16) << 1) | b7;
11500      UInt b20 = (insn28 >> 20) & 1;
11501      if (rD == 15) {
11502         /* fall through */
11503         /* Let's assume that no sane person would want to do
11504            floating-point transfers to or from the program counter,
11505            and simply decline to decode the instruction.  The ARM ARM
11506            doesn't seem to explicitly disallow this case, though. */
11507      } else {
11508         if (b20) {
11509            IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
11510            if (isT)
11511               putIRegT(rD, res, condT);
11512            else
11513               putIRegA(rD, res, condT, Ijk_Boring);
11514            DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
11515         } else {
11516            putFReg(fN, unop(Iop_ReinterpI32asF32,
11517                             isT ? getIRegT(rD) : getIRegA(rD)),
11518                        condT);
11519            DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
11520         }
11521         goto decode_success_vfp;
11522      }
11523      /* fall through */
11524   }
11525
11526   /* --------------------- f{ld,st}s --------------------- */
11527   // FLDS, FSTS
11528   if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
11529       && BITS4(1,0,1,0) == INSN(11,8)) {
11530      UInt bD     = (insn28 >> 22) & 1;
11531      UInt fD     = (INSN(15,12) << 1) | bD;
11532      UInt rN     = INSN(19,16);
11533      UInt offset = (insn28 & 0xFF) << 2;
11534      UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
11535      UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
11536      /* make unconditional */
11537      if (condT != IRTemp_INVALID) {
11538         if (isT)
11539            mk_skip_over_T32_if_cond_is_false( condT );
11540         else
11541            mk_skip_over_A32_if_cond_is_false( condT );
11542         condT = IRTemp_INVALID;
11543      }
11544      IRTemp ea = newTemp(Ity_I32);
11545      assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
11546                       align4if(isT ? getIRegT(rN) : getIRegA(rN),
11547                                rN == 15),
11548                       mkU32(offset)));
11549      if (bL) {
11550         putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
11551      } else {
11552         storeLE(mkexpr(ea), getFReg(fD));
11553      }
11554      DIP("f%ss%s s%u, [r%u, %c#%u]\n",
11555          bL ? "ld" : "st", nCC(conq), fD, rN,
11556          bU ? '+' : '-', offset);
11557      goto decode_success_vfp;
11558   }
11559
11560   /* --------------------- dp insns (F) --------------------- */
11561   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
11562       && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
11563       && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
11564      UInt    bM  = (insn28 >> 5) & 1;
11565      UInt    bD  = (insn28 >> 22) & 1;
11566      UInt    bN  = (insn28 >> 7) & 1;
11567      UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
11568      UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
11569      UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
11570      UInt    bP  = (insn28 >> 23) & 1;
11571      UInt    bQ  = (insn28 >> 21) & 1;
11572      UInt    bR  = (insn28 >> 20) & 1;
11573      UInt    bS  = (insn28 >> 6) & 1;
11574      UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
11575      IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
11576      switch (opc) {
11577         case BITS4(0,0,0,0): /* MAC: d + n * m */
11578            putFReg(fD, triop(Iop_AddF32, rm,
11579                              getFReg(fD),
11580                              triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
11581                        condT);
11582            DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
11583            goto decode_success_vfp;
11584         case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
11585            putFReg(fD, triop(Iop_AddF32, rm,
11586                              getFReg(fD),
11587                              unop(Iop_NegF32,
11588                                   triop(Iop_MulF32, rm, getFReg(fN),
11589                                                         getFReg(fM)))),
11590                        condT);
11591            DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
11592            goto decode_success_vfp;
11593         case BITS4(0,0,1,0): /* MSC: - d + n * m */
11594            putFReg(fD, triop(Iop_AddF32, rm,
11595                              unop(Iop_NegF32, getFReg(fD)),
11596                              triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
11597                        condT);
11598            DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
11599            goto decode_success_vfp;
11600         case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
11601            putFReg(fD, triop(Iop_AddF32, rm,
11602                              unop(Iop_NegF32, getFReg(fD)),
11603                              unop(Iop_NegF32,
11604                                   triop(Iop_MulF32, rm,
11605                                                     getFReg(fN),
11606                                                    getFReg(fM)))),
11607                        condT);
11608            DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
11609            goto decode_success_vfp;
11610         case BITS4(0,1,0,0): /* MUL: n * m */
11611            putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
11612                        condT);
11613            DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
11614            goto decode_success_vfp;
11615         case BITS4(0,1,0,1): /* NMUL: - n * m */
11616            putFReg(fD, unop(Iop_NegF32,
11617                             triop(Iop_MulF32, rm, getFReg(fN),
11618                                                   getFReg(fM))),
11619                    condT);
11620            DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
11621            goto decode_success_vfp;
11622         case BITS4(0,1,1,0): /* ADD: n + m */
11623            putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
11624                        condT);
11625            DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
11626            goto decode_success_vfp;
11627         case BITS4(0,1,1,1): /* SUB: n - m */
11628            putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
11629                        condT);
11630            DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
11631            goto decode_success_vfp;
11632         case BITS4(1,0,0,0): /* DIV: n / m */
11633            putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
11634                        condT);
11635            DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
11636            goto decode_success_vfp;
11637         default:
11638            break;
11639      }
11640   }
11641
11642   /* --------------------- compares (S) --------------------- */
11643   /*          31   27   23   19   15 11   7    3
11644                 28   24   20   16 12    8    4    0
11645      FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
11646      FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
11647      FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
11648      FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
11649                                 Z         N
11650
11651      Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
11652      Z=1 Compare Fd:D vs zero
11653
11654      N=1 generates Invalid Operation exn if either arg is any kind of NaN
11655      N=0 generates Invalid Operation exn if either arg is a signalling NaN
11656      (Not that we pay any attention to N here)
11657   */
11658   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
11659       && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
11660       && BITS4(1,0,1,0) == INSN(11,8)
11661       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
11662      UInt bZ = (insn28 >> 16) & 1;
11663      UInt bN = (insn28 >> 7) & 1;
11664      UInt bD = (insn28 >> 22) & 1;
11665      UInt bM = (insn28 >> 5) & 1;
11666      UInt fD = (INSN(15,12) << 1) | bD;
11667      UInt fM = (INSN(3,0) << 1) | bM;
11668      if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
11669         /* does not decode; fall through */
11670      } else {
11671         IRTemp argL = newTemp(Ity_F64);
11672         IRTemp argR = newTemp(Ity_F64);
11673         IRTemp irRes = newTemp(Ity_I32);
11674
11675         assign(argL, unop(Iop_F32toF64, getFReg(fD)));
11676         assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
11677                         : unop(Iop_F32toF64, getFReg(fM)));
11678         assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
11679
11680         IRTemp nzcv     = IRTemp_INVALID;
11681         IRTemp oldFPSCR = newTemp(Ity_I32);
11682         IRTemp newFPSCR = newTemp(Ity_I32);
11683
11684         /* This is where the fun starts.  We have to convert 'irRes'
11685            from an IR-convention return result (IRCmpF64Result) to an
11686            ARM-encoded (N,Z,C,V) group.  The final result is in the
11687            bottom 4 bits of 'nzcv'. */
11688         /* Map compare result from IR to ARM(nzcv) */
11689         /*
11690            FP cmp result | IR   | ARM(nzcv)
11691            --------------------------------
11692            UN              0x45   0011
11693            LT              0x01   1000
11694            GT              0x00   0010
11695            EQ              0x40   0110
11696         */
11697         nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
11698
11699         /* And update FPSCR accordingly */
11700         assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
11701         assign(newFPSCR,
11702                binop(Iop_Or32,
11703                      binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
11704                      binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
11705
11706         putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
11707
11708         if (bZ) {
11709            DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
11710         } else {
11711            DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
11712                nCC(conq), fD, fM);
11713         }
11714         goto decode_success_vfp;
11715      }
11716      /* fall through */
11717   }
11718
11719   /* --------------------- unary (S) --------------------- */
11720   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
11721       && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
11722       && BITS4(1,0,1,0) == INSN(11,8)
11723       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
11724      UInt bD = (insn28 >> 22) & 1;
11725      UInt bM = (insn28 >> 5) & 1;
11726      UInt fD  = (INSN(15,12) << 1) | bD;
11727      UInt fM  = (INSN(3,0) << 1) | bM;
11728      UInt b16 = (insn28 >> 16) & 1;
11729      UInt b7  = (insn28 >> 7) & 1;
11730      /**/ if (b16 == 0 && b7 == 0) {
11731         // FCPYS
11732         putFReg(fD, getFReg(fM), condT);
11733         DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
11734         goto decode_success_vfp;
11735      }
11736      else if (b16 == 0 && b7 == 1) {
11737         // FABSS
11738         putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
11739         DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
11740         goto decode_success_vfp;
11741      }
11742      else if (b16 == 1 && b7 == 0) {
11743         // FNEGS
11744         putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
11745         DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
11746         goto decode_success_vfp;
11747      }
11748      else if (b16 == 1 && b7 == 1) {
11749         // FSQRTS
11750         IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
11751         putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
11752         DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
11753         goto decode_success_vfp;
11754      }
11755      else
11756         vassert(0);
11757
11758      /* fall through */
11759   }
11760
11761   /* ----------------- I <-> S conversions ----------------- */
11762
11763   // F{S,U}ITOS fD, fM
11764   /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
11765      bit int will always fit within the 53 bit mantissa, so there's
11766      no possibility of a loss of precision, but that's obviously not
11767      the case here.  Hence this case possibly requires rounding, and
11768      so it drags in the current rounding mode. */
11769   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
11770       && BITS4(1,0,0,0) == INSN(19,16)
11771       && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
11772       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
11773      UInt bM    = (insn28 >> 5) & 1;
11774      UInt bD    = (insn28 >> 22) & 1;
11775      UInt fM    = (INSN(3,0) << 1) | bM;
11776      UInt fD    = (INSN(15,12) << 1) | bD;
11777      UInt syned = (insn28 >> 7) & 1;
11778      IRTemp rmode = newTemp(Ity_I32);
11779      assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
11780      if (syned) {
11781         // FSITOS
11782         putFReg(fD, binop(Iop_F64toF32,
11783                           mkexpr(rmode),
11784                           unop(Iop_I32StoF64,
11785                                unop(Iop_ReinterpF32asI32, getFReg(fM)))),
11786                 condT);
11787         DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
11788      } else {
11789         // FUITOS
11790         putFReg(fD, binop(Iop_F64toF32,
11791                           mkexpr(rmode),
11792                           unop(Iop_I32UtoF64,
11793                                unop(Iop_ReinterpF32asI32, getFReg(fM)))),
11794                 condT);
11795         DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
11796      }
11797      goto decode_success_vfp;
11798   }
11799
11800   // FTO{S,U}IS fD, fM
11801   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
11802       && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
11803       && BITS4(1,0,1,0) == INSN(11,8)
11804       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
11805      UInt   bM    = (insn28 >> 5) & 1;
11806      UInt   bD    = (insn28 >> 22) & 1;
11807      UInt   fD    = (INSN(15,12) << 1) | bD;
11808      UInt   fM    = (INSN(3,0) << 1) | bM;
11809      UInt   bZ    = (insn28 >> 7) & 1;
11810      UInt   syned = (insn28 >> 16) & 1;
11811      IRTemp rmode = newTemp(Ity_I32);
11812      assign(rmode, bZ ? mkU32(Irrm_ZERO)
11813                       : mkexpr(mk_get_IR_rounding_mode()));
11814      if (syned) {
11815         // FTOSIS
11816         putFReg(fD, unop(Iop_ReinterpI32asF32,
11817                          binop(Iop_F64toI32S, mkexpr(rmode),
11818                                unop(Iop_F32toF64, getFReg(fM)))),
11819                 condT);
11820         DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
11821             nCC(conq), fD, fM);
11822         goto decode_success_vfp;
11823      } else {
11824         // FTOUIS
11825         putFReg(fD, unop(Iop_ReinterpI32asF32,
11826                          binop(Iop_F64toI32U, mkexpr(rmode),
11827                                unop(Iop_F32toF64, getFReg(fM)))),
11828                 condT);
11829         DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
11830             nCC(conq), fD, fM);
11831         goto decode_success_vfp;
11832      }
11833   }
11834
11835   /* ----------------- S <-> D conversions ----------------- */
11836
11837   // FCVTDS
11838   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
11839       && BITS4(0,1,1,1) == INSN(19,16)
11840       && BITS4(1,0,1,0) == INSN(11,8)
11841       && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
11842      UInt dD = INSN(15,12) | (INSN(22,22) << 4);
11843      UInt bM = (insn28 >> 5) & 1;
11844      UInt fM = (INSN(3,0) << 1) | bM;
11845      putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
11846      DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
11847      goto decode_success_vfp;
11848   }
11849
11850   // FCVTSD
11851   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
11852       && BITS4(0,1,1,1) == INSN(19,16)
11853       && BITS4(1,0,1,1) == INSN(11,8)
11854       && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
11855      UInt   bD    = (insn28 >> 22) & 1;
11856      UInt   fD    = (INSN(15,12) << 1) | bD;
11857      UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
11858      IRTemp rmode = newTemp(Ity_I32);
11859      assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
11860      putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
11861                  condT);
11862      DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
11863      goto decode_success_vfp;
11864   }
11865
11866   /* FAILURE */
11867   return False;
11868
11869  decode_success_vfp:
11870   /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
11871      assert that we aren't accepting, in this fn, insns that actually
11872      should be handled somewhere else. */
11873   vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
11874   return True;
11875
11876#  undef INSN
11877}
11878
11879
11880/*------------------------------------------------------------*/
11881/*--- Instructions in NV (never) space                     ---*/
11882/*------------------------------------------------------------*/
11883
11884/* ARM only */
11885/* Translate a NV space instruction.  If successful, returns True and
11886   *dres may or may not be updated.  If failure, returns False and
11887   doesn't change *dres nor create any IR.
11888
11889   Note that all NEON instructions (in ARM mode) are handled through
11890   here, since they are all in NV space.
11891*/
11892static Bool decode_NV_instruction ( /*MOD*/DisResult* dres,
11893                                    VexArchInfo* archinfo,
11894                                    UInt insn )
11895{
11896#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
11897#  define INSN_COND          SLICE_UInt(insn, 31, 28)
11898
11899   HChar dis_buf[128];
11900
11901   // Should only be called for NV instructions
11902   vassert(BITS4(1,1,1,1) == INSN_COND);
11903
11904   /* ------------------------ pld ------------------------ */
11905   if (BITS8(0,1,0,1, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
11906       && BITS4(1,1,1,1) == INSN(15,12)) {
11907      UInt rN    = INSN(19,16);
11908      UInt imm12 = INSN(11,0);
11909      UInt bU    = INSN(23,23);
11910      DIP("pld [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
11911      return True;
11912   }
11913
11914   if (BITS8(0,1,1,1, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
11915       && BITS4(1,1,1,1) == INSN(15,12)
11916       && 0 == INSN(4,4)) {
11917      UInt rN   = INSN(19,16);
11918      UInt rM   = INSN(3,0);
11919      UInt imm5 = INSN(11,7);
11920      UInt sh2  = INSN(6,5);
11921      UInt bU   = INSN(23,23);
11922      if (rM != 15) {
11923         IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
11924                                                       sh2, imm5, dis_buf);
11925         IRTemp eaT = newTemp(Ity_I32);
11926         /* Bind eaE to a temp merely for debugging-vex purposes, so we
11927            can check it's a plausible decoding.  It will get removed
11928            by iropt a little later on. */
11929         vassert(eaE);
11930         assign(eaT, eaE);
11931         DIP("pld %s\n", dis_buf);
11932         return True;
11933      }
11934      /* fall through */
11935   }
11936
11937   /* ------------------------ pli ------------------------ */
11938   if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
11939       && BITS4(1,1,1,1) == INSN(15,12)) {
11940      UInt rN    = INSN(19,16);
11941      UInt imm12 = INSN(11,0);
11942      UInt bU    = INSN(23,23);
11943      DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
11944      return True;
11945   }
11946
11947   /* --------------------- Interworking branches --------------------- */
11948
11949   // BLX (1), viz, unconditional branch and link to R15+simm24
11950   // and set CPSR.T = 1, that is, switch to Thumb mode
11951   if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
11952      UInt bitH   = INSN(24,24);
11953      Int  uimm24 = INSN(23,0);
11954      Int  simm24 = (((uimm24 << 8) >> 8) << 2) + (bitH << 1);
11955      /* Now this is a bit tricky.  Since we're decoding an ARM insn,
11956         it is implies that CPSR.T == 0.  Hence the current insn's
11957         address is guaranteed to be of the form X--(30)--X00.  So, no
11958         need to mask any bits off it.  But need to set the lowest bit
11959         to 1 to denote we're in Thumb mode after this, since
11960         guest_R15T has CPSR.T as the lowest bit.  And we can't chase
11961         into the call, so end the block at this point. */
11962      UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
11963      putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
11964                    IRTemp_INVALID/*because AL*/, Ijk_Boring );
11965      irsb->next     = mkU32(dst);
11966      irsb->jumpkind = Ijk_Call;
11967      dres->whatNext = Dis_StopHere;
11968      DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
11969      return True;
11970   }
11971
11972   /* ------------------- v7 barrier insns ------------------- */
11973   switch (insn) {
11974      case 0xF57FF06F: /* ISB */
11975         stmt( IRStmt_MBE(Imbe_Fence) );
11976         DIP("ISB\n");
11977         return True;
11978      case 0xF57FF04F: /* DSB sy */
11979      case 0xF57FF04E: /* DSB st */
11980      case 0xF57FF04B: /* DSB ish */
11981      case 0xF57FF04A: /* DSB ishst */
11982      case 0xF57FF047: /* DSB nsh */
11983      case 0xF57FF046: /* DSB nshst */
11984      case 0xF57FF043: /* DSB osh */
11985      case 0xF57FF042: /* DSB oshst */
11986         stmt( IRStmt_MBE(Imbe_Fence) );
11987         DIP("DSB\n");
11988         return True;
11989      case 0xF57FF05F: /* DMB sy */
11990      case 0xF57FF05E: /* DMB st */
11991      case 0xF57FF05B: /* DMB ish */
11992      case 0xF57FF05A: /* DMB ishst */
11993      case 0xF57FF057: /* DMB nsh */
11994      case 0xF57FF056: /* DMB nshst */
11995      case 0xF57FF053: /* DMB osh */
11996      case 0xF57FF052: /* DMB oshst */
11997         stmt( IRStmt_MBE(Imbe_Fence) );
11998         DIP("DMB\n");
11999         return True;
12000      default:
12001         break;
12002   }
12003
12004   /* ------------------- CLREX ------------------ */
12005   if (insn == 0xF57FF01F) {
12006      /* AFAICS, this simply cancels a (all?) reservations made by a
12007         (any?) preceding LDREX(es).  Arrange to hand it through to
12008         the back end. */
12009      stmt( IRStmt_MBE(Imbe_CancelReservation) );
12010      DIP("clrex\n");
12011      return True;
12012   }
12013
12014   /* ------------------- NEON ------------------- */
12015   if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
12016      Bool ok_neon = decode_NEON_instruction(
12017                        dres, insn, IRTemp_INVALID/*unconditional*/,
12018                        False/*!isT*/
12019                     );
12020      if (ok_neon)
12021         return True;
12022   }
12023
12024   // unrecognised
12025   return False;
12026
12027#  undef INSN_COND
12028#  undef INSN
12029}
12030
12031
12032/*------------------------------------------------------------*/
12033/*--- Disassemble a single ARM instruction                 ---*/
12034/*------------------------------------------------------------*/
12035
12036/* Disassemble a single ARM instruction into IR.  The instruction is
12037   located in host memory at guest_instr, and has (decoded) guest IP
12038   of guest_R15_curr_instr_notENC, which will have been set before the
12039   call here. */
12040
12041static
12042DisResult disInstr_ARM_WRK (
12043             Bool         put_IP,
12044             Bool         (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
12045             Bool         resteerCisOk,
12046             void*        callback_opaque,
12047             UChar*       guest_instr,
12048             VexArchInfo* archinfo,
12049             VexAbiInfo*  abiinfo
12050          )
12051{
12052   // A macro to fish bits out of 'insn'.
12053#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
12054#  define INSN_COND          SLICE_UInt(insn, 31, 28)
12055
12056   DisResult dres;
12057   UInt      insn;
12058   //Bool      allow_VFP = False;
12059   //UInt      hwcaps = archinfo->hwcaps;
12060   IRTemp    condT; /* :: Ity_I32 */
12061   UInt      summary;
12062   HChar     dis_buf[128];  // big enough to hold LDMIA etc text
12063
12064   /* What insn variants are we supporting today? */
12065   //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
12066   // etc etc
12067
12068   /* Set result defaults. */
12069   dres.whatNext   = Dis_Continue;
12070   dres.len        = 4;
12071   dres.continueAt = 0;
12072
12073   /* Set default actions for post-insn handling of writes to r15, if
12074      required. */
12075   r15written = False;
12076   r15guard   = IRTemp_INVALID; /* unconditional */
12077   r15kind    = Ijk_Boring;
12078
12079   /* At least this is simple on ARM: insns are all 4 bytes long, and
12080      4-aligned.  So just fish the whole thing out of memory right now
12081      and have done. */
12082   insn = getUIntLittleEndianly( guest_instr );
12083
12084   if (0) vex_printf("insn: 0x%x\n", insn);
12085
12086   DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
12087
12088   /* We may be asked to update the guest R15 before going further. */
12089   vassert(0 == (guest_R15_curr_instr_notENC & 3));
12090   if (put_IP) {
12091      llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
12092   }
12093
12094   /* ----------------------------------------------------------- */
12095
12096   /* Spot "Special" instructions (see comment at top of file). */
12097   {
12098      UChar* code = (UChar*)guest_instr;
12099      /* Spot the 16-byte preamble:
12100
12101         e1a0c1ec  mov r12, r12, ROR #3
12102         e1a0c6ec  mov r12, r12, ROR #13
12103         e1a0ceec  mov r12, r12, ROR #29
12104         e1a0c9ec  mov r12, r12, ROR #19
12105      */
12106      UInt word1 = 0xE1A0C1EC;
12107      UInt word2 = 0xE1A0C6EC;
12108      UInt word3 = 0xE1A0CEEC;
12109      UInt word4 = 0xE1A0C9EC;
12110      if (getUIntLittleEndianly(code+ 0) == word1 &&
12111          getUIntLittleEndianly(code+ 4) == word2 &&
12112          getUIntLittleEndianly(code+ 8) == word3 &&
12113          getUIntLittleEndianly(code+12) == word4) {
12114         /* Got a "Special" instruction preamble.  Which one is it? */
12115         if (getUIntLittleEndianly(code+16) == 0xE18AA00A
12116                                               /* orr r10,r10,r10 */) {
12117            /* R3 = client_request ( R4 ) */
12118            DIP("r3 = client_request ( %%r4 )\n");
12119            irsb->next     = mkU32( guest_R15_curr_instr_notENC + 20 );
12120            irsb->jumpkind = Ijk_ClientReq;
12121            dres.whatNext  = Dis_StopHere;
12122            goto decode_success;
12123         }
12124         else
12125         if (getUIntLittleEndianly(code+16) == 0xE18BB00B
12126                                               /* orr r11,r11,r11 */) {
12127            /* R3 = guest_NRADDR */
12128            DIP("r3 = guest_NRADDR\n");
12129            dres.len = 20;
12130            llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
12131            goto decode_success;
12132         }
12133         else
12134         if (getUIntLittleEndianly(code+16) == 0xE18CC00C
12135                                               /* orr r12,r12,r12 */) {
12136            /*  branch-and-link-to-noredir R4 */
12137            DIP("branch-and-link-to-noredir r4\n");
12138            llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
12139            irsb->next     = llGetIReg(4);
12140            irsb->jumpkind = Ijk_NoRedir;
12141            dres.whatNext  = Dis_StopHere;
12142            goto decode_success;
12143         }
12144         /* We don't know what it is.  Set opc1/opc2 so decode_failure
12145            can print the insn following the Special-insn preamble. */
12146         insn = getUIntLittleEndianly(code+16);
12147         goto decode_failure;
12148         /*NOTREACHED*/
12149      }
12150
12151   }
12152
12153   /* ----------------------------------------------------------- */
12154
12155   /* Main ARM instruction decoder starts here. */
12156
12157   /* Deal with the condition.  Strategy is to merely generate a
12158      condition temporary at this point (or IRTemp_INVALID, meaning
12159      unconditional).  We leave it to lower-level instruction decoders
12160      to decide whether they can generate straight-line code, or
12161      whether they must generate a side exit before the instruction.
12162      condT :: Ity_I32 and is always either zero or one. */
12163   condT = IRTemp_INVALID;
12164   switch ( (ARMCondcode)INSN_COND ) {
12165      case ARMCondNV: {
12166         // Illegal instruction prior to v5 (see ARM ARM A3-5), but
12167         // some cases are acceptable
12168         Bool ok = decode_NV_instruction(&dres, archinfo, insn);
12169         if (ok)
12170            goto decode_success;
12171         else
12172            goto decode_failure;
12173      }
12174      case ARMCondAL: // Always executed
12175         break;
12176      case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
12177      case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
12178      case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
12179      case ARMCondGT: case ARMCondLE:
12180         condT = newTemp(Ity_I32);
12181         assign( condT, mk_armg_calculate_condition( INSN_COND ));
12182         break;
12183   }
12184
12185   /* ----------------------------------------------------------- */
12186   /* -- ARMv5 integer instructions                            -- */
12187   /* ----------------------------------------------------------- */
12188
12189   /* ---------------- Data processing ops ------------------- */
12190
12191   if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
12192       && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
12193      IRTemp  shop = IRTemp_INVALID; /* shifter operand */
12194      IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
12195      UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
12196      UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
12197      UInt    bitS = (insn >> 20) & 1; /* 20:20 */
12198      IRTemp  rNt  = IRTemp_INVALID;
12199      IRTemp  res  = IRTemp_INVALID;
12200      IRTemp  oldV = IRTemp_INVALID;
12201      IRTemp  oldC = IRTemp_INVALID;
12202      HChar*  name = NULL;
12203      IROp    op   = Iop_INVALID;
12204      Bool    ok;
12205
12206      switch (INSN(24,21)) {
12207
12208         /* --------- ADD, SUB, AND, OR --------- */
12209         case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
12210            name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
12211         case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
12212            name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
12213         case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
12214            name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
12215         case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
12216            name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
12217         case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
12218            name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
12219         case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
12220            name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
12221         case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
12222            name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
12223         rd_eq_rn_op_SO: {
12224            Bool isRSB = False;
12225            Bool isBIC = False;
12226            switch (INSN(24,21)) {
12227               case BITS4(0,0,1,1):
12228                  vassert(op == Iop_Sub32); isRSB = True; break;
12229               case BITS4(1,1,1,0):
12230                  vassert(op == Iop_And32); isBIC = True; break;
12231               default:
12232                  break;
12233            }
12234            rNt = newTemp(Ity_I32);
12235            assign(rNt, getIRegA(rN));
12236            ok = mk_shifter_operand(
12237                    INSN(25,25), INSN(11,0),
12238                    &shop, bitS ? &shco : NULL, dis_buf
12239                 );
12240            if (!ok)
12241               break;
12242            res = newTemp(Ity_I32);
12243            // compute the main result
12244            if (isRSB) {
12245               // reverse-subtract: shifter_operand - Rn
12246               vassert(op == Iop_Sub32);
12247               assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
12248            } else if (isBIC) {
12249               // andn: shifter_operand & ~Rn
12250               vassert(op == Iop_And32);
12251               assign(res, binop(op, mkexpr(rNt),
12252                                     unop(Iop_Not32, mkexpr(shop))) );
12253            } else {
12254               // normal: Rn op shifter_operand
12255               assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
12256            }
12257            // but don't commit it until after we've finished
12258            // all necessary reads from the guest state
12259            if (bitS
12260                && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
12261               oldV = newTemp(Ity_I32);
12262               assign( oldV, mk_armg_calculate_flag_v() );
12263            }
12264            // can't safely read guest state after here
12265            // now safe to put the main result
12266            putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
12267            // XXXX!! not safe to read any guest state after
12268            // this point (I think the code below doesn't do that).
12269            if (!bitS)
12270               vassert(shco == IRTemp_INVALID);
12271            /* Update the flags thunk if necessary */
12272            if (bitS) {
12273               vassert(shco != IRTemp_INVALID);
12274               switch (op) {
12275                  case Iop_Add32:
12276                     setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
12277                     break;
12278                  case Iop_Sub32:
12279                     if (isRSB) {
12280                        setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
12281                     } else {
12282                        setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
12283                     }
12284                     break;
12285                  case Iop_And32: /* BIC and AND set the flags the same */
12286                  case Iop_Or32:
12287                  case Iop_Xor32:
12288                     // oldV has been read just above
12289                     setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
12290                                        res, shco, oldV, condT );
12291                     break;
12292                  default:
12293                     vassert(0);
12294               }
12295            }
12296            DIP("%s%s%s r%u, r%u, %s\n",
12297                name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
12298            goto decode_success;
12299         }
12300
12301         /* --------- MOV, MVN --------- */
12302         case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
12303         case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
12304            Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
12305            IRTemp jk = Ijk_Boring;
12306            if (rN != 0)
12307               break; /* rN must be zero */
12308            ok = mk_shifter_operand(
12309                    INSN(25,25), INSN(11,0),
12310                    &shop, bitS ? &shco : NULL, dis_buf
12311                 );
12312            if (!ok)
12313               break;
12314            res = newTemp(Ity_I32);
12315            assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
12316                               : mkexpr(shop) );
12317            if (bitS) {
12318               vassert(shco != IRTemp_INVALID);
12319               oldV = newTemp(Ity_I32);
12320               assign( oldV, mk_armg_calculate_flag_v() );
12321            } else {
12322               vassert(shco == IRTemp_INVALID);
12323            }
12324            /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
12325                return for purposes of branch prediction. */
12326            if (!isMVN && INSN(11,0) == 14) {
12327              jk = Ijk_Ret;
12328            }
12329            // can't safely read guest state after here
12330            putIRegA( rD, mkexpr(res), condT, jk );
12331            /* Update the flags thunk if necessary */
12332            if (bitS) {
12333               setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
12334                                  res, shco, oldV, condT );
12335            }
12336            DIP("%s%s%s r%u, %s\n",
12337                isMVN ? "mvn" : "mov",
12338                nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
12339            goto decode_success;
12340         }
12341
12342         /* --------- CMP --------- */
12343         case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
12344         case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
12345            Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
12346            if (rD != 0)
12347               break; /* rD must be zero */
12348            if (bitS == 0)
12349               break; /* if S (bit 20) is not set, it's not CMP/CMN */
12350            rNt = newTemp(Ity_I32);
12351            assign(rNt, getIRegA(rN));
12352            ok = mk_shifter_operand(
12353                    INSN(25,25), INSN(11,0),
12354                    &shop, NULL, dis_buf
12355                 );
12356            if (!ok)
12357               break;
12358            // can't safely read guest state after here
12359            /* Update the flags thunk. */
12360            setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
12361                            rNt, shop, condT );
12362            DIP("%s%s r%u, %s\n",
12363                isCMN ? "cmn" : "cmp",
12364                nCC(INSN_COND), rN, dis_buf );
12365            goto decode_success;
12366         }
12367
12368         /* --------- TST --------- */
12369         case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
12370         case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
12371            Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
12372            if (rD != 0)
12373               break; /* rD must be zero */
12374            if (bitS == 0)
12375               break; /* if S (bit 20) is not set, it's not TST/TEQ */
12376            rNt = newTemp(Ity_I32);
12377            assign(rNt, getIRegA(rN));
12378            ok = mk_shifter_operand(
12379                    INSN(25,25), INSN(11,0),
12380                    &shop, &shco, dis_buf
12381                 );
12382            if (!ok)
12383               break;
12384            /* Update the flags thunk. */
12385            res = newTemp(Ity_I32);
12386            assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
12387                               mkexpr(rNt), mkexpr(shop)) );
12388            oldV = newTemp(Ity_I32);
12389            assign( oldV, mk_armg_calculate_flag_v() );
12390            // can't safely read guest state after here
12391            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
12392                               res, shco, oldV, condT );
12393            DIP("%s%s r%u, %s\n",
12394                isTEQ ? "teq" : "tst",
12395                nCC(INSN_COND), rN, dis_buf );
12396            goto decode_success;
12397         }
12398
12399         /* --------- ADC, SBC, RSC --------- */
12400         case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
12401            name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
12402         case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
12403            name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
12404         case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
12405            name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
12406         rd_eq_rn_op_SO_op_oldC: {
12407            // FIXME: shco isn't used for anything.  Get rid of it.
12408            rNt = newTemp(Ity_I32);
12409            assign(rNt, getIRegA(rN));
12410            ok = mk_shifter_operand(
12411                    INSN(25,25), INSN(11,0),
12412                    &shop, bitS ? &shco : NULL, dis_buf
12413                 );
12414            if (!ok)
12415               break;
12416            oldC = newTemp(Ity_I32);
12417            assign( oldC, mk_armg_calculate_flag_c() );
12418            res = newTemp(Ity_I32);
12419            // compute the main result
12420            switch (INSN(24,21)) {
12421               case BITS4(0,1,0,1): /* ADC */
12422                  assign(res,
12423                         binop(Iop_Add32,
12424                               binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
12425                               mkexpr(oldC) ));
12426                  break;
12427               case BITS4(0,1,1,0): /* SBC */
12428                  assign(res,
12429                         binop(Iop_Sub32,
12430                               binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
12431                               binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
12432                  break;
12433               case BITS4(0,1,1,1): /* RSC */
12434                  assign(res,
12435                         binop(Iop_Sub32,
12436                               binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
12437                               binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
12438                  break;
12439               default:
12440                  vassert(0);
12441            }
12442            // but don't commit it until after we've finished
12443            // all necessary reads from the guest state
12444            // now safe to put the main result
12445            putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
12446            // XXXX!! not safe to read any guest state after
12447            // this point (I think the code below doesn't do that).
12448            if (!bitS)
12449               vassert(shco == IRTemp_INVALID);
12450            /* Update the flags thunk if necessary */
12451            if (bitS) {
12452               vassert(shco != IRTemp_INVALID);
12453               switch (INSN(24,21)) {
12454                  case BITS4(0,1,0,1): /* ADC */
12455                     setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
12456                                        rNt, shop, oldC, condT );
12457                     break;
12458                  case BITS4(0,1,1,0): /* SBC */
12459                     setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
12460                                        rNt, shop, oldC, condT );
12461                     break;
12462                  case BITS4(0,1,1,1): /* RSC */
12463                     setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
12464                                        shop, rNt, oldC, condT );
12465                     break;
12466                  default:
12467                     vassert(0);
12468               }
12469            }
12470            DIP("%s%s%s r%u, r%u, %s\n",
12471                name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
12472            goto decode_success;
12473         }
12474
12475         /* --------- ??? --------- */
12476         default:
12477            break;
12478      }
12479   } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
12480
12481   /* --------------------- Load/store (ubyte & word) -------- */
12482   // LDR STR LDRB STRB
12483   /*                 31   27   23   19 15 11    6   4 3  # highest bit
12484                        28   24   20 16 12
12485      A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
12486      A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
12487      A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
12488      A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
12489      A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
12490      A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
12491   */
12492   /* case coding:
12493             1   at-ea               (access at ea)
12494             2   at-ea-then-upd      (access at ea, then Rn = ea)
12495             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
12496      ea coding
12497             16  Rn +/- imm12
12498             32  Rn +/- Rm sh2 imm5
12499   */
12500   /* Quickly skip over all of this for hopefully most instructions */
12501   if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
12502      goto after_load_store_ubyte_or_word;
12503
12504   summary = 0;
12505
12506   /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
12507      summary = 1 | 16;
12508   }
12509   else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
12510                                          && INSN(4,4) == 0) {
12511      summary = 1 | 32;
12512   }
12513   else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
12514      summary = 2 | 16;
12515   }
12516   else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
12517                                          && INSN(4,4) == 0) {
12518      summary = 2 | 32;
12519   }
12520   else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
12521      summary = 3 | 16;
12522   }
12523   else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
12524                                          && INSN(4,4) == 0) {
12525      summary = 3 | 32;
12526   }
12527   else goto after_load_store_ubyte_or_word;
12528
12529   { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
12530     UInt rD = (insn >> 12) & 0xF; /* 15:12 */
12531     UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
12532     UInt bU = (insn >> 23) & 1;      /* 23 */
12533     UInt bB = (insn >> 22) & 1;      /* 22 */
12534     UInt bL = (insn >> 20) & 1;      /* 20 */
12535     UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
12536     UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
12537     UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
12538
12539     /* Skip some invalid cases, which would lead to two competing
12540        updates to the same register, or which are otherwise
12541        disallowed by the spec. */
12542     switch (summary) {
12543        case 1 | 16:
12544           break;
12545        case 1 | 32:
12546           if (rM == 15) goto after_load_store_ubyte_or_word;
12547           break;
12548        case 2 | 16: case 3 | 16:
12549           if (rN == 15) goto after_load_store_ubyte_or_word;
12550           if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
12551           break;
12552        case 2 | 32: case 3 | 32:
12553           if (rM == 15) goto after_load_store_ubyte_or_word;
12554           if (rN == 15) goto after_load_store_ubyte_or_word;
12555           if (rN == rM) goto after_load_store_ubyte_or_word;
12556           if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
12557           break;
12558        default:
12559           vassert(0);
12560     }
12561
12562     /* Now, we can't do a conditional load or store, since that very
12563        likely will generate an exception.  So we have to take a side
12564        exit at this point if the condition is false. */
12565     if (condT != IRTemp_INVALID) {
12566        mk_skip_over_A32_if_cond_is_false( condT );
12567        condT = IRTemp_INVALID;
12568     }
12569     /* Ok, now we're unconditional.  Do the load or store. */
12570
12571     /* compute the effective address.  Bind it to a tmp since we
12572        may need to use it twice. */
12573     IRExpr* eaE = NULL;
12574     switch (summary & 0xF0) {
12575        case 16:
12576           eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
12577           break;
12578        case 32:
12579           eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
12580                                                  dis_buf );
12581           break;
12582     }
12583     vassert(eaE);
12584     IRTemp eaT = newTemp(Ity_I32);
12585     assign(eaT, eaE);
12586
12587     /* get the old Rn value */
12588     IRTemp rnT = newTemp(Ity_I32);
12589     assign(rnT, getIRegA(rN));
12590
12591     /* decide on the transfer address */
12592     IRTemp taT = IRTemp_INVALID;
12593     switch (summary & 0x0F) {
12594        case 1: case 2: taT = eaT; break;
12595        case 3:         taT = rnT; break;
12596     }
12597     vassert(taT != IRTemp_INVALID);
12598
12599     if (bL == 0) {
12600       /* Store.  If necessary, update the base register before the
12601          store itself, so that the common idiom of "str rX, [sp,
12602          #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
12603          rX") doesn't cause Memcheck to complain that the access is
12604          below the stack pointer.  Also, not updating sp before the
12605          store confuses Valgrind's dynamic stack-extending logic.  So
12606          do it before the store.  Hence we need to snarf the store
12607          data before doing the basereg update. */
12608
12609        /* get hold of the data to be stored */
12610        IRTemp rDt = newTemp(Ity_I32);
12611        assign(rDt, getIRegA(rD));
12612
12613        /* Update Rn if necessary. */
12614        switch (summary & 0x0F) {
12615           case 2: case 3:
12616              putIRegA( rN, mkexpr(eaT), IRTemp_INVALID, Ijk_Boring );
12617              break;
12618        }
12619
12620        /* generate the transfer */
12621        if (bB == 0) { // word store
12622           storeLE( mkexpr(taT), mkexpr(rDt) );
12623        } else { // byte store
12624           vassert(bB == 1);
12625           storeLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)) );
12626        }
12627
12628     } else {
12629        /* Load */
12630        vassert(bL == 1);
12631
12632        /* generate the transfer */
12633        if (bB == 0) { // word load
12634           IRTemp jk = Ijk_Boring;
12635           /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
12636               base register and PC as the destination register is a return for
12637               purposes of branch prediction.
12638              The ARM ARM Sec. C9.10.1 further specifies that it must use a
12639               post-increment by immediate addressing mode to be counted in
12640               event 0x0E (Procedure return).*/
12641           if (rN == 13 && summary == (3 | 16) && bB == 0) {
12642              jk = Ijk_Ret;
12643           }
12644           putIRegA( rD, loadLE(Ity_I32, mkexpr(taT)),
12645                     IRTemp_INVALID, jk );
12646        } else { // byte load
12647           vassert(bB == 1);
12648           putIRegA( rD, unop(Iop_8Uto32, loadLE(Ity_I8, mkexpr(taT))),
12649                     IRTemp_INVALID, Ijk_Boring );
12650        }
12651
12652        /* Update Rn if necessary. */
12653        switch (summary & 0x0F) {
12654           case 2: case 3:
12655              // should be assured by logic above:
12656              if (bL == 1)
12657                 vassert(rD != rN); /* since we just wrote rD */
12658              putIRegA( rN, mkexpr(eaT), IRTemp_INVALID, Ijk_Boring );
12659              break;
12660        }
12661     }
12662
12663     switch (summary & 0x0F) {
12664        case 1:  DIP("%sr%s%s r%u, %s\n",
12665                     bL == 0 ? "st" : "ld",
12666                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
12667                 break;
12668        case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
12669                     bL == 0 ? "st" : "ld",
12670                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
12671                 break;
12672        case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
12673                     bL == 0 ? "st" : "ld",
12674                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
12675                 break;
12676        default: vassert(0);
12677     }
12678
12679     /* XXX deal with alignment constraints */
12680
12681     goto decode_success;
12682
12683     /* Complications:
12684
12685        For all loads: if the Amode specifies base register
12686        writeback, and the same register is specified for Rd and Rn,
12687        the results are UNPREDICTABLE.
12688
12689        For all loads and stores: if R15 is written, branch to
12690        that address afterwards.
12691
12692        STRB: straightforward
12693        LDRB: loaded data is zero extended
12694        STR:  lowest 2 bits of address are ignored
12695        LDR:  if the lowest 2 bits of the address are nonzero
12696              then the loaded value is rotated right by 8 * the lowest 2 bits
12697     */
12698   }
12699
12700  after_load_store_ubyte_or_word:
12701
12702   /* --------------------- Load/store (sbyte & hword) -------- */
12703   // LDRH LDRSH STRH LDRSB
12704   /*                 31   27   23   19 15 11   7    3     # highest bit
12705                        28   24   20 16 12    8    4    0
12706      A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
12707      A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
12708      A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
12709      A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
12710      A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
12711      A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
12712   */
12713   /* case coding:
12714             1   at-ea               (access at ea)
12715             2   at-ea-then-upd      (access at ea, then Rn = ea)
12716             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
12717      ea coding
12718             16  Rn +/- imm8
12719             32  Rn +/- Rm
12720   */
12721   /* Quickly skip over all of this for hopefully most instructions */
12722   if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
12723      goto after_load_store_sbyte_or_hword;
12724
12725   /* Check the "1SH1" thing. */
12726   if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
12727      goto after_load_store_sbyte_or_hword;
12728
12729   summary = 0;
12730
12731   /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
12732      summary = 1 | 16;
12733   }
12734   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
12735      summary = 1 | 32;
12736   }
12737   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
12738      summary = 2 | 16;
12739   }
12740   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
12741      summary = 2 | 32;
12742   }
12743   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
12744      summary = 3 | 16;
12745   }
12746   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
12747      summary = 3 | 32;
12748   }
12749   else goto after_load_store_sbyte_or_hword;
12750
12751   { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
12752     UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
12753     UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
12754     UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
12755     UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
12756     UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
12757     UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
12758     UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
12759
12760     /* Skip combinations that are either meaningless or already
12761        handled by main word-or-unsigned-byte load-store
12762        instructions. */
12763     if (bS == 0 && bH == 0) /* "unsigned byte" */
12764        goto after_load_store_sbyte_or_hword;
12765     if (bS == 1 && bL == 0) /* "signed store" */
12766        goto after_load_store_sbyte_or_hword;
12767
12768     /* Require 11:8 == 0 for Rn +/- Rm cases */
12769     if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
12770        goto after_load_store_sbyte_or_hword;
12771
12772     /* Skip some invalid cases, which would lead to two competing
12773        updates to the same register, or which are otherwise
12774        disallowed by the spec. */
12775     switch (summary) {
12776        case 1 | 16:
12777           break;
12778        case 1 | 32:
12779           if (rM == 15) goto after_load_store_sbyte_or_hword;
12780           break;
12781        case 2 | 16: case 3 | 16:
12782           if (rN == 15) goto after_load_store_sbyte_or_hword;
12783           if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
12784           break;
12785        case 2 | 32: case 3 | 32:
12786           if (rM == 15) goto after_load_store_sbyte_or_hword;
12787           if (rN == 15) goto after_load_store_sbyte_or_hword;
12788           if (rN == rM) goto after_load_store_sbyte_or_hword;
12789           if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
12790           break;
12791        default:
12792           vassert(0);
12793     }
12794
12795     /* Now, we can't do a conditional load or store, since that very
12796        likely will generate an exception.  So we have to take a side
12797        exit at this point if the condition is false. */
12798     if (condT != IRTemp_INVALID) {
12799        mk_skip_over_A32_if_cond_is_false( condT );
12800        condT = IRTemp_INVALID;
12801     }
12802     /* Ok, now we're unconditional.  Do the load or store. */
12803
12804     /* compute the effective address.  Bind it to a tmp since we
12805        may need to use it twice. */
12806     IRExpr* eaE = NULL;
12807     switch (summary & 0xF0) {
12808        case 16:
12809           eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
12810           break;
12811        case 32:
12812           eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
12813           break;
12814     }
12815     vassert(eaE);
12816     IRTemp eaT = newTemp(Ity_I32);
12817     assign(eaT, eaE);
12818
12819     /* get the old Rn value */
12820     IRTemp rnT = newTemp(Ity_I32);
12821     assign(rnT, getIRegA(rN));
12822
12823     /* decide on the transfer address */
12824     IRTemp taT = IRTemp_INVALID;
12825     switch (summary & 0x0F) {
12826        case 1: case 2: taT = eaT; break;
12827        case 3:         taT = rnT; break;
12828     }
12829     vassert(taT != IRTemp_INVALID);
12830
12831     /* halfword store  H 1  L 0  S 0
12832        uhalf load      H 1  L 1  S 0
12833        shalf load      H 1  L 1  S 1
12834        sbyte load      H 0  L 1  S 1
12835     */
12836     HChar* name = NULL;
12837     /* generate the transfer */
12838     /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
12839        storeLE( mkexpr(taT), unop(Iop_32to16, getIRegA(rD)) );
12840        name = "strh";
12841     }
12842     else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
12843        putIRegA( rD, unop(Iop_16Uto32, loadLE(Ity_I16, mkexpr(taT))),
12844                  IRTemp_INVALID, Ijk_Boring );
12845        name = "ldrh";
12846     }
12847     else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
12848        putIRegA( rD, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(taT))),
12849                  IRTemp_INVALID, Ijk_Boring );
12850        name = "ldrsh";
12851     }
12852     else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
12853        putIRegA( rD, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(taT))),
12854                  IRTemp_INVALID, Ijk_Boring );
12855        name = "ldrsb";
12856     }
12857     else
12858        vassert(0); // should be assured by logic above
12859
12860     /* Update Rn if necessary. */
12861     switch (summary & 0x0F) {
12862        case 2: case 3:
12863           // should be assured by logic above:
12864           if (bL == 1)
12865              vassert(rD != rN); /* since we just wrote rD */
12866           putIRegA( rN, mkexpr(eaT), IRTemp_INVALID, Ijk_Boring );
12867           break;
12868     }
12869
12870     switch (summary & 0x0F) {
12871        case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
12872                 break;
12873        case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
12874                     name, nCC(INSN_COND), rD, dis_buf);
12875                 break;
12876        case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
12877                     name, nCC(INSN_COND), rD, dis_buf);
12878                 break;
12879        default: vassert(0);
12880     }
12881
12882     /* XXX deal with alignment constraints */
12883
12884     goto decode_success;
12885
12886     /* Complications:
12887
12888        For all loads: if the Amode specifies base register
12889        writeback, and the same register is specified for Rd and Rn,
12890        the results are UNPREDICTABLE.
12891
12892        For all loads and stores: if R15 is written, branch to
12893        that address afterwards.
12894
12895        Misaligned halfword stores => Unpredictable
12896        Misaligned halfword loads  => Unpredictable
12897     */
12898   }
12899
12900  after_load_store_sbyte_or_hword:
12901
12902   /* --------------------- Load/store multiple -------------- */
12903   // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
12904   // Remarkably complex and difficult to get right
12905   // match 27:20 as 100XX0WL
12906   if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
12907      // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
12908      // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
12909      // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
12910      // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
12911      //                   28   24   20 16       0
12912
12913      UInt bINC    = (insn >> 23) & 1;
12914      UInt bBEFORE = (insn >> 24) & 1;
12915
12916      UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
12917      UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
12918      UInt rN      = (insn >> 16) & 0xF;
12919      UInt regList = insn & 0xFFFF;
12920      /* Skip some invalid cases, which would lead to two competing
12921         updates to the same register, or which are otherwise
12922         disallowed by the spec.  Note the test above has required
12923         that S == 0, since that looks like a kernel-mode only thing.
12924         Done by forcing the real pattern, viz 100XXSWL to actually be
12925         100XX0WL. */
12926      if (rN == 15) goto after_load_store_multiple;
12927      // reglist can't be empty
12928      if (regList == 0) goto after_load_store_multiple;
12929      // if requested to writeback Rn, and this is a load instruction,
12930      // then Rn can't appear in RegList, since we'd have two competing
12931      // new values for Rn.  We do however accept this case for store
12932      // instructions.
12933      if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
12934         goto after_load_store_multiple;
12935
12936      /* Now, we can't do a conditional load or store, since that very
12937         likely will generate an exception.  So we have to take a side
12938         exit at this point if the condition is false. */
12939      if (condT != IRTemp_INVALID) {
12940         mk_skip_over_A32_if_cond_is_false( condT );
12941         condT = IRTemp_INVALID;
12942      }
12943
12944      /* Ok, now we're unconditional.  Generate the IR. */
12945      mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
12946
12947      DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
12948          bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
12949          nCC(INSN_COND),
12950          rN, bW ? "!" : "", regList);
12951
12952      goto decode_success;
12953   }
12954
12955  after_load_store_multiple:
12956
12957   /* --------------------- Control flow --------------------- */
12958   // B, BL (Branch, or Branch-and-Link, to immediate offset)
12959   //
12960   if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
12961      UInt link   = (insn >> 24) & 1;
12962      UInt uimm24 = insn & ((1<<24)-1);
12963      Int  simm24 = (Int)uimm24;
12964      UInt dst    = guest_R15_curr_instr_notENC + 8
12965                    + (((simm24 << 8) >> 8) << 2);
12966      IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
12967      if (link) {
12968         putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
12969                      condT, Ijk_Boring);
12970      }
12971      if (condT == IRTemp_INVALID) {
12972         /* unconditional transfer to 'dst'.  See if we can simply
12973            continue tracing at the destination. */
12974         if (resteerOkFn( callback_opaque, (Addr64)dst )) {
12975            /* yes */
12976            dres.whatNext   = Dis_ResteerU;
12977            dres.continueAt = (Addr64)dst;
12978         } else {
12979            /* no; terminate the SB at this point. */
12980            irsb->next     = mkU32(dst);
12981            irsb->jumpkind = jk;
12982            dres.whatNext  = Dis_StopHere;
12983         }
12984         DIP("b%s 0x%x\n", link ? "l" : "", dst);
12985      } else {
12986         /* conditional transfer to 'dst' */
12987         HChar* comment = "";
12988
12989         /* First see if we can do some speculative chasing into one
12990            arm or the other.  Be conservative and only chase if
12991            !link, that is, this is a normal conditional branch to a
12992            known destination. */
12993         if (!link
12994             && resteerCisOk
12995             && vex_control.guest_chase_cond
12996             && dst < guest_R15_curr_instr_notENC
12997             && resteerOkFn( callback_opaque, (Addr64)(Addr32)dst) ) {
12998            /* Speculation: assume this backward branch is taken.  So
12999               we need to emit a side-exit to the insn following this
13000               one, on the negation of the condition, and continue at
13001               the branch target address (dst). */
13002            stmt( IRStmt_Exit( unop(Iop_Not1,
13003                                    unop(Iop_32to1, mkexpr(condT))),
13004                               Ijk_Boring,
13005                               IRConst_U32(guest_R15_curr_instr_notENC+4) ));
13006            dres.whatNext   = Dis_ResteerC;
13007            dres.continueAt = (Addr64)(Addr32)dst;
13008            comment = "(assumed taken)";
13009         }
13010         else
13011         if (!link
13012             && resteerCisOk
13013             && vex_control.guest_chase_cond
13014             && dst >= guest_R15_curr_instr_notENC
13015             && resteerOkFn( callback_opaque,
13016                             (Addr64)(Addr32)
13017                                     (guest_R15_curr_instr_notENC+4)) ) {
13018            /* Speculation: assume this forward branch is not taken.
13019               So we need to emit a side-exit to dst (the dest) and
13020               continue disassembling at the insn immediately
13021               following this one. */
13022            stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
13023                               Ijk_Boring,
13024                               IRConst_U32(dst) ));
13025            dres.whatNext   = Dis_ResteerC;
13026            dres.continueAt = (Addr64)(Addr32)
13027                                      (guest_R15_curr_instr_notENC+4);
13028            comment = "(assumed not taken)";
13029         }
13030         else {
13031            /* Conservative default translation - end the block at
13032               this point. */
13033            stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
13034                               jk, IRConst_U32(dst) ));
13035            irsb->next     = mkU32(guest_R15_curr_instr_notENC + 4);
13036            irsb->jumpkind = Ijk_Boring;
13037            dres.whatNext  = Dis_StopHere;
13038         }
13039         DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
13040             dst, comment);
13041      }
13042      goto decode_success;
13043   }
13044
13045   // B, BL (Branch, or Branch-and-Link, to a register)
13046   // NB: interworking branch
13047   if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
13048       && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
13049       && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
13050           || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
13051      IRTemp  dst = newTemp(Ity_I32);
13052      UInt    link = (INSN(11,4) >> 1) & 1;
13053      UInt    rM   = INSN(3,0);
13054      // we don't decode the case (link && rM == 15), as that's
13055      // Unpredictable.
13056      if (!(link && rM == 15)) {
13057         if (condT != IRTemp_INVALID) {
13058            mk_skip_over_A32_if_cond_is_false( condT );
13059         }
13060         // rM contains an interworking address exactly as we require
13061         // (with continuation CPSR.T in bit 0), so we can use it
13062         // as-is, with no masking.
13063         assign( dst, getIRegA(rM) );
13064         if (link) {
13065            putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
13066                      IRTemp_INVALID/*because AL*/, Ijk_Boring );
13067         }
13068         irsb->next     = mkexpr(dst);
13069         irsb->jumpkind = link ? Ijk_Call
13070                               : (rM == 14 ? Ijk_Ret : Ijk_Boring);
13071         dres.whatNext  = Dis_StopHere;
13072         if (condT == IRTemp_INVALID) {
13073            DIP("b%sx r%u\n", link ? "l" : "", rM);
13074         } else {
13075            DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
13076         }
13077         goto decode_success;
13078      }
13079      /* else: (link && rM == 15): just fall through */
13080   }
13081
13082   /* --- NB: ARM interworking branches are in NV space, hence
13083      are handled elsewhere by decode_NV_instruction.
13084      ---
13085   */
13086
13087   /* --------------------- Clz --------------------- */
13088   // CLZ
13089   if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
13090       && INSN(19,16) == BITS4(1,1,1,1)
13091       && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
13092      UInt rD = INSN(15,12);
13093      UInt rM = INSN(3,0);
13094      IRTemp arg = newTemp(Ity_I32);
13095      IRTemp res = newTemp(Ity_I32);
13096      assign(arg, getIRegA(rM));
13097      assign(res, IRExpr_Mux0X(
13098                     unop(Iop_1Uto8,binop(Iop_CmpEQ32, mkexpr(arg),
13099                                                       mkU32(0))),
13100                     unop(Iop_Clz32, mkexpr(arg)),
13101                     mkU32(32)
13102            ));
13103      putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
13104      DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
13105      goto decode_success;
13106   }
13107
13108   /* --------------------- Mul etc --------------------- */
13109   // MUL
13110   if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
13111       && INSN(15,12) == BITS4(0,0,0,0)
13112       && INSN(7,4) == BITS4(1,0,0,1)) {
13113      UInt bitS = (insn >> 20) & 1; /* 20:20 */
13114      UInt rD = INSN(19,16);
13115      UInt rS = INSN(11,8);
13116      UInt rM = INSN(3,0);
13117      if (rD == 15 || rM == 15 || rS == 15) {
13118         /* Unpredictable; don't decode; fall through */
13119      } else {
13120         IRTemp argL = newTemp(Ity_I32);
13121         IRTemp argR = newTemp(Ity_I32);
13122         IRTemp res  = newTemp(Ity_I32);
13123         IRTemp oldC = IRTemp_INVALID;
13124         IRTemp oldV = IRTemp_INVALID;
13125         assign( argL, getIRegA(rM));
13126         assign( argR, getIRegA(rS));
13127         assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
13128         if (bitS) {
13129            oldC = newTemp(Ity_I32);
13130            assign(oldC, mk_armg_calculate_flag_c());
13131            oldV = newTemp(Ity_I32);
13132            assign(oldV, mk_armg_calculate_flag_v());
13133         }
13134         // now update guest state
13135         putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
13136         if (bitS) {
13137            IRTemp pair = newTemp(Ity_I32);
13138            assign( pair, binop(Iop_Or32,
13139                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
13140                                mkexpr(oldV)) );
13141            setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
13142         }
13143         DIP("mul%c%s r%u, r%u, r%u\n",
13144             bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
13145         goto decode_success;
13146      }
13147      /* fall through */
13148   }
13149
13150   // MLA, MLS
13151   if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
13152       && INSN(7,4) == BITS4(1,0,0,1)) {
13153      UInt bitS  = (insn >> 20) & 1; /* 20:20 */
13154      UInt isMLS = (insn >> 22) & 1; /* 22:22 */
13155      UInt rD = INSN(19,16);
13156      UInt rN = INSN(15,12);
13157      UInt rS = INSN(11,8);
13158      UInt rM = INSN(3,0);
13159      if (bitS == 1 && isMLS == 1) {
13160         /* This isn't allowed (MLS that sets flags).  don't decode;
13161            fall through */
13162      }
13163      else
13164      if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
13165         /* Unpredictable; don't decode; fall through */
13166      } else {
13167         IRTemp argL = newTemp(Ity_I32);
13168         IRTemp argR = newTemp(Ity_I32);
13169         IRTemp argP = newTemp(Ity_I32);
13170         IRTemp res  = newTemp(Ity_I32);
13171         IRTemp oldC = IRTemp_INVALID;
13172         IRTemp oldV = IRTemp_INVALID;
13173         assign( argL, getIRegA(rM));
13174         assign( argR, getIRegA(rS));
13175         assign( argP, getIRegA(rN));
13176         assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
13177                            mkexpr(argP),
13178                            binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
13179         if (bitS) {
13180            vassert(!isMLS); // guaranteed above
13181            oldC = newTemp(Ity_I32);
13182            assign(oldC, mk_armg_calculate_flag_c());
13183            oldV = newTemp(Ity_I32);
13184            assign(oldV, mk_armg_calculate_flag_v());
13185         }
13186         // now update guest state
13187         putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
13188         if (bitS) {
13189            IRTemp pair = newTemp(Ity_I32);
13190            assign( pair, binop(Iop_Or32,
13191                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
13192                                mkexpr(oldV)) );
13193            setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
13194         }
13195         DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
13196             isMLS ? 's' : 'a', bitS ? 's' : ' ',
13197             nCC(INSN_COND), rD, rM, rS, rN);
13198         goto decode_success;
13199      }
13200      /* fall through */
13201   }
13202
13203   // SMULL, UMULL
13204   if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
13205       && INSN(7,4) == BITS4(1,0,0,1)) {
13206      UInt bitS = (insn >> 20) & 1; /* 20:20 */
13207      UInt rDhi = INSN(19,16);
13208      UInt rDlo = INSN(15,12);
13209      UInt rS   = INSN(11,8);
13210      UInt rM   = INSN(3,0);
13211      UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
13212      if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
13213         /* Unpredictable; don't decode; fall through */
13214      } else {
13215         IRTemp argL  = newTemp(Ity_I32);
13216         IRTemp argR  = newTemp(Ity_I32);
13217         IRTemp res   = newTemp(Ity_I64);
13218         IRTemp resHi = newTemp(Ity_I32);
13219         IRTemp resLo = newTemp(Ity_I32);
13220         IRTemp oldC  = IRTemp_INVALID;
13221         IRTemp oldV  = IRTemp_INVALID;
13222         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
13223         assign( argL, getIRegA(rM));
13224         assign( argR, getIRegA(rS));
13225         assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
13226         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
13227         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
13228         if (bitS) {
13229            oldC = newTemp(Ity_I32);
13230            assign(oldC, mk_armg_calculate_flag_c());
13231            oldV = newTemp(Ity_I32);
13232            assign(oldV, mk_armg_calculate_flag_v());
13233         }
13234         // now update guest state
13235         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
13236         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
13237         if (bitS) {
13238            IRTemp pair = newTemp(Ity_I32);
13239            assign( pair, binop(Iop_Or32,
13240                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
13241                                mkexpr(oldV)) );
13242            setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
13243         }
13244         DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
13245             isS ? 's' : 'u', bitS ? 's' : ' ',
13246             nCC(INSN_COND), rDlo, rDhi, rM, rS);
13247         goto decode_success;
13248      }
13249      /* fall through */
13250   }
13251
13252   // SMLAL, UMLAL
13253   if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
13254       && INSN(7,4) == BITS4(1,0,0,1)) {
13255      UInt bitS = (insn >> 20) & 1; /* 20:20 */
13256      UInt rDhi = INSN(19,16);
13257      UInt rDlo = INSN(15,12);
13258      UInt rS   = INSN(11,8);
13259      UInt rM   = INSN(3,0);
13260      UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
13261      if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
13262         /* Unpredictable; don't decode; fall through */
13263      } else {
13264         IRTemp argL  = newTemp(Ity_I32);
13265         IRTemp argR  = newTemp(Ity_I32);
13266         IRTemp old   = newTemp(Ity_I64);
13267         IRTemp res   = newTemp(Ity_I64);
13268         IRTemp resHi = newTemp(Ity_I32);
13269         IRTemp resLo = newTemp(Ity_I32);
13270         IRTemp oldC  = IRTemp_INVALID;
13271         IRTemp oldV  = IRTemp_INVALID;
13272         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
13273         assign( argL, getIRegA(rM));
13274         assign( argR, getIRegA(rS));
13275         assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
13276         assign( res, binop(Iop_Add64,
13277                            mkexpr(old),
13278                            binop(mulOp, mkexpr(argL), mkexpr(argR))) );
13279         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
13280         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
13281         if (bitS) {
13282            oldC = newTemp(Ity_I32);
13283            assign(oldC, mk_armg_calculate_flag_c());
13284            oldV = newTemp(Ity_I32);
13285            assign(oldV, mk_armg_calculate_flag_v());
13286         }
13287         // now update guest state
13288         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
13289         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
13290         if (bitS) {
13291            IRTemp pair = newTemp(Ity_I32);
13292            assign( pair, binop(Iop_Or32,
13293                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
13294                                mkexpr(oldV)) );
13295            setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
13296         }
13297         DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
13298             isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
13299             rDlo, rDhi, rM, rS);
13300         goto decode_success;
13301      }
13302      /* fall through */
13303   }
13304
13305   /* --------------------- Msr etc --------------------- */
13306
13307   // MSR apsr, #imm
13308   if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
13309       && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
13310      UInt write_ge    = INSN(18,18);
13311      UInt write_nzcvq = INSN(19,19);
13312      if (write_nzcvq || write_ge) {
13313         UInt   imm = (INSN(11,0) >> 0) & 0xFF;
13314         UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
13315         IRTemp immT = newTemp(Ity_I32);
13316         vassert(rot <= 30);
13317         imm = ROR32(imm, rot);
13318         assign(immT, mkU32(imm));
13319         desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
13320         DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
13321             write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
13322         goto decode_success;
13323      }
13324      /* fall through */
13325   }
13326
13327   // MSR apsr, reg
13328   if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
13329       && INSN(17,12) == BITS6(0,0,1,1,1,1)
13330       && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
13331      UInt rN          = INSN(3,0);
13332      UInt write_ge    = INSN(18,18);
13333      UInt write_nzcvq = INSN(19,19);
13334      if (rN != 15 && (write_nzcvq || write_ge)) {
13335         IRTemp rNt = newTemp(Ity_I32);
13336         assign(rNt, getIRegA(rN));
13337         desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
13338         DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
13339             write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
13340         goto decode_success;
13341      }
13342      /* fall through */
13343   }
13344
13345   // MRS rD, cpsr
13346   if ((insn & 0x0FFF0FFF) == 0x010F0000) {
13347      UInt rD   = INSN(15,12);
13348      if (rD != 15) {
13349         IRTemp apsr = synthesise_APSR();
13350         putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
13351         DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
13352         goto decode_success;
13353      }
13354      /* fall through */
13355   }
13356
13357   /* --------------------- Svc --------------------- */
13358   if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
13359      UInt imm24 = (insn >> 0) & 0xFFFFFF;
13360      if (imm24 == 0) {
13361         /* A syscall.  We can't do this conditionally, hence: */
13362         if (condT != IRTemp_INVALID) {
13363            mk_skip_over_A32_if_cond_is_false( condT );
13364         }
13365         // AL after here
13366         irsb->next     = mkU32( guest_R15_curr_instr_notENC + 4 );
13367         irsb->jumpkind = Ijk_Sys_syscall;
13368         dres.whatNext  = Dis_StopHere;
13369         DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
13370         goto decode_success;
13371      }
13372      /* fall through */
13373   }
13374
13375   /* ------------------------ swp ------------------------ */
13376
13377   // SWP, SWPB
13378   if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13379       && BITS4(0,0,0,0) == INSN(11,8)
13380       && BITS4(1,0,0,1) == INSN(7,4)) {
13381      UInt   rN   = INSN(19,16);
13382      UInt   rD   = INSN(15,12);
13383      UInt   rM   = INSN(3,0);
13384      IRTemp tRn  = newTemp(Ity_I32);
13385      IRTemp tNew = newTemp(Ity_I32);
13386      IRTemp tOld = IRTemp_INVALID;
13387      IRTemp tSC1 = newTemp(Ity_I1);
13388      UInt   isB  = (insn >> 22) & 1;
13389
13390      if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
13391         /* undecodable; fall through */
13392      } else {
13393         /* make unconditional */
13394         if (condT != IRTemp_INVALID) {
13395            mk_skip_over_A32_if_cond_is_false( condT );
13396            condT = IRTemp_INVALID;
13397         }
13398         /* Ok, now we're unconditional.  Generate a LL-SC loop. */
13399         assign(tRn, getIRegA(rN));
13400         assign(tNew, getIRegA(rM));
13401         if (isB) {
13402            /* swpb */
13403            tOld = newTemp(Ity_I8);
13404            stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
13405                              NULL/*=>isLL*/) );
13406            stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
13407                              unop(Iop_32to8, mkexpr(tNew))) );
13408         } else {
13409            /* swp */
13410            tOld = newTemp(Ity_I32);
13411            stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
13412                              NULL/*=>isLL*/) );
13413            stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
13414                              mkexpr(tNew)) );
13415         }
13416         stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
13417                           /*Ijk_NoRedir*/Ijk_Boring,
13418                           IRConst_U32(guest_R15_curr_instr_notENC)) );
13419         putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
13420                      IRTemp_INVALID, Ijk_Boring);
13421         DIP("swp%s%s r%u, r%u, [r%u]\n",
13422             isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
13423         goto decode_success;
13424      }
13425      /* fall through */
13426   }
13427
13428   /* ----------------------------------------------------------- */
13429   /* -- ARMv6 instructions                                    -- */
13430   /* ----------------------------------------------------------- */
13431
13432   /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
13433
13434   // LDREXD, LDREX, LDREXH, LDREXB
13435   if (0x01900F9F == (insn & 0x0F900FFF)) {
13436      UInt   rT    = INSN(15,12);
13437      UInt   rN    = INSN(19,16);
13438      IRType ty    = Ity_INVALID;
13439      IROp   widen = Iop_INVALID;
13440      HChar* nm    = NULL;
13441      Bool   valid = True;
13442      switch (INSN(22,21)) {
13443         case 0: nm = "";  ty = Ity_I32; break;
13444         case 1: nm = "d"; ty = Ity_I64; break;
13445         case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
13446         case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
13447         default: vassert(0);
13448      }
13449      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
13450         if (rT == 15 || rN == 15)
13451            valid = False;
13452      } else {
13453         vassert(ty == Ity_I64);
13454         if ((rT & 1) == 1 || rT == 14 || rN == 15)
13455            valid = False;
13456      }
13457      if (valid) {
13458         IRTemp res;
13459         /* make unconditional */
13460         if (condT != IRTemp_INVALID) {
13461           mk_skip_over_A32_if_cond_is_false( condT );
13462           condT = IRTemp_INVALID;
13463         }
13464         /* Ok, now we're unconditional.  Do the load. */
13465         res = newTemp(ty);
13466         // FIXME: assumes little-endian guest
13467         stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
13468                           NULL/*this is a load*/) );
13469         if (ty == Ity_I64) {
13470            // FIXME: assumes little-endian guest
13471            putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
13472                           IRTemp_INVALID, Ijk_Boring);
13473            putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
13474                           IRTemp_INVALID, Ijk_Boring);
13475            DIP("ldrex%s%s r%u, r%u, [r%u]\n",
13476                nm, nCC(INSN_COND), rT+0, rT+1, rN);
13477         } else {
13478            putIRegA(rT, widen == Iop_INVALID
13479                            ? mkexpr(res) : unop(widen, mkexpr(res)),
13480                     IRTemp_INVALID, Ijk_Boring);
13481            DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
13482         }
13483         goto decode_success;
13484      }
13485      /* undecodable; fall through */
13486   }
13487
13488   // STREXD, STREX, STREXH, STREXB
13489   if (0x01800F90 == (insn & 0x0F900FF0)) {
13490      UInt   rT     = INSN(3,0);
13491      UInt   rN     = INSN(19,16);
13492      UInt   rD     = INSN(15,12);
13493      IRType ty     = Ity_INVALID;
13494      IROp   narrow = Iop_INVALID;
13495      HChar* nm     = NULL;
13496      Bool   valid  = True;
13497      switch (INSN(22,21)) {
13498         case 0: nm = "";  ty = Ity_I32; break;
13499         case 1: nm = "d"; ty = Ity_I64; break;
13500         case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
13501         case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
13502         default: vassert(0);
13503      }
13504      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
13505         if (rD == 15 || rN == 15 || rT == 15
13506             || rD == rN || rD == rT)
13507            valid = False;
13508      } else {
13509         vassert(ty == Ity_I64);
13510         if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
13511             || rD == rN || rD == rT || rD == rT+1)
13512            valid = False;
13513      }
13514      if (valid) {
13515         IRTemp resSC1, resSC32, data;
13516         /* make unconditional */
13517         if (condT != IRTemp_INVALID) {
13518            mk_skip_over_A32_if_cond_is_false( condT );
13519            condT = IRTemp_INVALID;
13520         }
13521         /* Ok, now we're unconditional.  Do the store. */
13522         data = newTemp(ty);
13523         assign(data,
13524                ty == Ity_I64
13525                   // FIXME: assumes little-endian guest
13526                   ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
13527                   : narrow == Iop_INVALID
13528                      ? getIRegA(rT)
13529                      : unop(narrow, getIRegA(rT)));
13530         resSC1 = newTemp(Ity_I1);
13531         // FIXME: assumes little-endian guest
13532         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
13533
13534         /* Set rD to 1 on failure, 0 on success.  Currently we have
13535            resSC1 == 0 on failure, 1 on success. */
13536         resSC32 = newTemp(Ity_I32);
13537         assign(resSC32,
13538                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
13539
13540         putIRegA(rD, mkexpr(resSC32),
13541                      IRTemp_INVALID, Ijk_Boring);
13542         if (ty == Ity_I64) {
13543            DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
13544                nm, nCC(INSN_COND), rD, rT, rT+1, rN);
13545         } else {
13546            DIP("strex%s%s r%u, r%u, [r%u]\n",
13547                nm, nCC(INSN_COND), rD, rT, rN);
13548         }
13549         goto decode_success;
13550      }
13551      /* fall through */
13552   }
13553
13554   /* --------------------- movw, movt --------------------- */
13555   if (0x03000000 == (insn & 0x0FF00000)
13556       || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
13557      UInt rD    = INSN(15,12);
13558      UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
13559      UInt isT   = (insn >> 22) & 1;
13560      if (rD == 15) {
13561         /* forget it */
13562      } else {
13563         if (isT) {
13564            putIRegA(rD,
13565                     binop(Iop_Or32,
13566                           binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
13567                           mkU32(imm16 << 16)),
13568                     condT, Ijk_Boring);
13569            DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
13570            goto decode_success;
13571         } else {
13572            putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
13573            DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
13574            goto decode_success;
13575         }
13576      }
13577      /* fall through */
13578   }
13579
13580   /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
13581   /* FIXME: this is an exact duplicate of the Thumb version.  They
13582      should be commoned up. */
13583   if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
13584       && BITS4(1,1,1,1) == INSN(19,16)
13585       && BITS4(0,1,1,1) == INSN(7,4)
13586       && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
13587      UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
13588      if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
13589         Int    rot  = (INSN(11,8) >> 2) & 3;
13590         UInt   rM   = INSN(3,0);
13591         UInt   rD   = INSN(15,12);
13592         IRTemp srcT = newTemp(Ity_I32);
13593         IRTemp rotT = newTemp(Ity_I32);
13594         IRTemp dstT = newTemp(Ity_I32);
13595         HChar* nm   = "???";
13596         assign(srcT, getIRegA(rM));
13597         assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
13598         switch (subopc) {
13599            case BITS4(0,1,1,0): // UXTB
13600               assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
13601               nm = "uxtb";
13602               break;
13603            case BITS4(0,0,1,0): // SXTB
13604               assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
13605               nm = "sxtb";
13606               break;
13607            case BITS4(0,1,1,1): // UXTH
13608               assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
13609               nm = "uxth";
13610               break;
13611            case BITS4(0,0,1,1): // SXTH
13612               assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
13613               nm = "sxth";
13614               break;
13615            case BITS4(0,1,0,0): // UXTB16
13616               assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
13617               nm = "uxtb16";
13618               break;
13619            case BITS4(0,0,0,0): { // SXTB16
13620               IRTemp lo32 = newTemp(Ity_I32);
13621               IRTemp hi32 = newTemp(Ity_I32);
13622               assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
13623               assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
13624               assign(
13625                  dstT,
13626                  binop(Iop_Or32,
13627                        binop(Iop_And32,
13628                              unop(Iop_8Sto32,
13629                                   unop(Iop_32to8, mkexpr(lo32))),
13630                              mkU32(0xFFFF)),
13631                        binop(Iop_Shl32,
13632                              unop(Iop_8Sto32,
13633                                   unop(Iop_32to8, mkexpr(hi32))),
13634                              mkU8(16))
13635               ));
13636               nm = "sxtb16";
13637               break;
13638            }
13639            default:
13640               vassert(0); // guarded by "if" above
13641         }
13642         putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
13643         DIP("%s%s r%u, r%u, ROR #%u\n", nm, nCC(INSN_COND), rD, rM, rot);
13644         goto decode_success;
13645      }
13646      /* fall through */
13647   }
13648
13649   /* ------------------- bfi, bfc ------------------- */
13650   if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
13651       && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
13652      UInt rD  = INSN(15,12);
13653      UInt rN  = INSN(3,0);
13654      UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
13655      UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
13656      if (rD == 15 || msb < lsb) {
13657         /* undecodable; fall through */
13658      } else {
13659         IRTemp src    = newTemp(Ity_I32);
13660         IRTemp olddst = newTemp(Ity_I32);
13661         IRTemp newdst = newTemp(Ity_I32);
13662         UInt   mask = 1 << (msb - lsb);
13663         mask = (mask - 1) + mask;
13664         vassert(mask != 0); // guaranteed by "msb < lsb" check above
13665         mask <<= lsb;
13666
13667         assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
13668         assign(olddst, getIRegA(rD));
13669         assign(newdst,
13670                binop(Iop_Or32,
13671                   binop(Iop_And32,
13672                         binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
13673                         mkU32(mask)),
13674                   binop(Iop_And32,
13675                         mkexpr(olddst),
13676                         mkU32(~mask)))
13677               );
13678
13679         putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
13680
13681         if (rN == 15) {
13682            DIP("bfc%s r%u, #%u, #%u\n",
13683                nCC(INSN_COND), rD, lsb, msb-lsb+1);
13684         } else {
13685            DIP("bfi%s r%u, r%u, #%u, #%u\n",
13686                nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
13687         }
13688         goto decode_success;
13689      }
13690      /* fall through */
13691   }
13692
13693   /* ------------------- {u,s}bfx ------------------- */
13694   if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
13695       && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
13696      UInt rD  = INSN(15,12);
13697      UInt rN  = INSN(3,0);
13698      UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
13699      UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
13700      UInt msb = lsb + wm1;
13701      UInt isU = (insn >> 22) & 1;    /* 22:22 */
13702      if (rD == 15 || rN == 15 || msb >= 32) {
13703         /* undecodable; fall through */
13704      } else {
13705         IRTemp src  = newTemp(Ity_I32);
13706         IRTemp tmp  = newTemp(Ity_I32);
13707         IRTemp res  = newTemp(Ity_I32);
13708         UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
13709         vassert(msb >= 0 && msb <= 31);
13710         vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
13711
13712         assign(src, getIRegA(rN));
13713         assign(tmp, binop(Iop_And32,
13714                           binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
13715                           mkU32(mask)));
13716         assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
13717                           binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
13718                           mkU8(31-wm1)));
13719
13720         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
13721
13722         DIP("%s%s r%u, r%u, #%u, #%u\n",
13723             isU ? "ubfx" : "sbfx",
13724             nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
13725         goto decode_success;
13726      }
13727      /* fall through */
13728   }
13729
13730   /* --------------------- Load/store doubleword ------------- */
13731   // LDRD STRD
13732   /*                 31   27   23   19 15 11   7    3     # highest bit
13733                        28   24   20 16 12    8    4    0
13734      A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
13735      A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
13736      A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
13737      A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
13738      A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
13739      A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
13740   */
13741   /* case coding:
13742             1   at-ea               (access at ea)
13743             2   at-ea-then-upd      (access at ea, then Rn = ea)
13744             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
13745      ea coding
13746             16  Rn +/- imm8
13747             32  Rn +/- Rm
13748   */
13749   /* Quickly skip over all of this for hopefully most instructions */
13750   if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
13751      goto after_load_store_doubleword;
13752
13753   /* Check the "11S1" thing. */
13754   if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
13755      goto after_load_store_doubleword;
13756
13757   summary = 0;
13758
13759   /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
13760      summary = 1 | 16;
13761   }
13762   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
13763      summary = 1 | 32;
13764   }
13765   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
13766      summary = 2 | 16;
13767   }
13768   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
13769      summary = 2 | 32;
13770   }
13771   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
13772      summary = 3 | 16;
13773   }
13774   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
13775      summary = 3 | 32;
13776   }
13777   else goto after_load_store_doubleword;
13778
13779   { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
13780     UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
13781     UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
13782     UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
13783     UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
13784     UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
13785
13786     /* Require rD to be an even numbered register */
13787     if ((rD & 1) != 0)
13788        goto after_load_store_doubleword;
13789
13790     /* Require 11:8 == 0 for Rn +/- Rm cases */
13791     if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
13792        goto after_load_store_doubleword;
13793
13794     /* Skip some invalid cases, which would lead to two competing
13795        updates to the same register, or which are otherwise
13796        disallowed by the spec. */
13797     switch (summary) {
13798        case 1 | 16:
13799           break;
13800        case 1 | 32:
13801           if (rM == 15) goto after_load_store_doubleword;
13802           break;
13803        case 2 | 16: case 3 | 16:
13804           if (rN == 15) goto after_load_store_doubleword;
13805           if (bS == 0 && (rN == rD || rN == rD+1))
13806              goto after_load_store_doubleword;
13807           break;
13808        case 2 | 32: case 3 | 32:
13809           if (rM == 15) goto after_load_store_doubleword;
13810           if (rN == 15) goto after_load_store_doubleword;
13811           if (rN == rM) goto after_load_store_doubleword;
13812           if (bS == 0 && (rN == rD || rN == rD+1))
13813              goto after_load_store_doubleword;
13814           break;
13815        default:
13816           vassert(0);
13817     }
13818
13819     /* Now, we can't do a conditional load or store, since that very
13820        likely will generate an exception.  So we have to take a side
13821        exit at this point if the condition is false. */
13822     if (condT != IRTemp_INVALID) {
13823        mk_skip_over_A32_if_cond_is_false( condT );
13824        condT = IRTemp_INVALID;
13825     }
13826     /* Ok, now we're unconditional.  Do the load or store. */
13827
13828     /* compute the effective address.  Bind it to a tmp since we
13829        may need to use it twice. */
13830     IRExpr* eaE = NULL;
13831     switch (summary & 0xF0) {
13832        case 16:
13833           eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
13834           break;
13835        case 32:
13836           eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
13837           break;
13838     }
13839     vassert(eaE);
13840     IRTemp eaT = newTemp(Ity_I32);
13841     assign(eaT, eaE);
13842
13843     /* get the old Rn value */
13844     IRTemp rnT = newTemp(Ity_I32);
13845     assign(rnT, getIRegA(rN));
13846
13847     /* decide on the transfer address */
13848     IRTemp taT = IRTemp_INVALID;
13849     switch (summary & 0x0F) {
13850        case 1: case 2: taT = eaT; break;
13851        case 3:         taT = rnT; break;
13852     }
13853     vassert(taT != IRTemp_INVALID);
13854
13855     /* XXX deal with alignment constraints */
13856     /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
13857        ignore alignment issues for the time being. */
13858
13859     /* doubleword store  S 1
13860        doubleword load   S 0
13861     */
13862     HChar* name = NULL;
13863     /* generate the transfers */
13864     if (bS == 1) { // doubleword store
13865        storeLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)), getIRegA(rD+0) );
13866        storeLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)), getIRegA(rD+1) );
13867        name = "strd";
13868     } else { // doubleword load
13869        putIRegA( rD+0,
13870                  loadLE(Ity_I32, binop(Iop_Add32, mkexpr(taT), mkU32(0))),
13871                  IRTemp_INVALID, Ijk_Boring );
13872        putIRegA( rD+1,
13873                  loadLE(Ity_I32, binop(Iop_Add32, mkexpr(taT), mkU32(4))),
13874                  IRTemp_INVALID, Ijk_Boring );
13875        name = "ldrd";
13876     }
13877
13878     /* Update Rn if necessary. */
13879     switch (summary & 0x0F) {
13880        case 2: case 3:
13881           // should be assured by logic above:
13882           if (bS == 0) {
13883              vassert(rD+0 != rN); /* since we just wrote rD+0 */
13884              vassert(rD+1 != rN); /* since we just wrote rD+1 */
13885           }
13886           putIRegA( rN, mkexpr(eaT), IRTemp_INVALID, Ijk_Boring );
13887           break;
13888     }
13889
13890     switch (summary & 0x0F) {
13891        case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
13892                 break;
13893        case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
13894                     name, nCC(INSN_COND), rD, dis_buf);
13895                 break;
13896        case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
13897                     name, nCC(INSN_COND), rD, dis_buf);
13898                 break;
13899        default: vassert(0);
13900     }
13901
13902     goto decode_success;
13903   }
13904
13905  after_load_store_doubleword:
13906
13907   /* ------------------- {s,u}xtab ------------- */
13908   if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13909       && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
13910       && BITS4(0,1,1,1) == INSN(7,4)) {
13911      UInt rN  = INSN(19,16);
13912      UInt rD  = INSN(15,12);
13913      UInt rM  = INSN(3,0);
13914      UInt rot = (insn >> 10) & 3;
13915      UInt isU = INSN(22,22);
13916      if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
13917         /* undecodable; fall through */
13918      } else {
13919         IRTemp srcL = newTemp(Ity_I32);
13920         IRTemp srcR = newTemp(Ity_I32);
13921         IRTemp res  = newTemp(Ity_I32);
13922         assign(srcR, getIRegA(rM));
13923         assign(srcL, getIRegA(rN));
13924         assign(res,  binop(Iop_Add32,
13925                            mkexpr(srcL),
13926                            unop(isU ? Iop_8Uto32 : Iop_8Sto32,
13927                                 unop(Iop_32to8,
13928                                      genROR32(srcR, 8 * rot)))));
13929         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
13930         DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
13931             isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
13932         goto decode_success;
13933      }
13934      /* fall through */
13935   }
13936
13937   /* ------------------- {s,u}xtah ------------- */
13938   if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13939       && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
13940       && BITS4(0,1,1,1) == INSN(7,4)) {
13941      UInt rN  = INSN(19,16);
13942      UInt rD  = INSN(15,12);
13943      UInt rM  = INSN(3,0);
13944      UInt rot = (insn >> 10) & 3;
13945      UInt isU = INSN(22,22);
13946      if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
13947         /* undecodable; fall through */
13948      } else {
13949         IRTemp srcL = newTemp(Ity_I32);
13950         IRTemp srcR = newTemp(Ity_I32);
13951         IRTemp res  = newTemp(Ity_I32);
13952         assign(srcR, getIRegA(rM));
13953         assign(srcL, getIRegA(rN));
13954         assign(res,  binop(Iop_Add32,
13955                            mkexpr(srcL),
13956                            unop(isU ? Iop_16Uto32 : Iop_16Sto32,
13957                                 unop(Iop_32to16,
13958                                      genROR32(srcR, 8 * rot)))));
13959         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
13960
13961         DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
13962             isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
13963         goto decode_success;
13964      }
13965      /* fall through */
13966   }
13967
13968   /* ------------------- rev16, rev ------------------ */
13969   if (INSN(27,16) == 0x6BF
13970       && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
13971      Bool isREV = INSN(11,4) == 0xF3;
13972      UInt rM    = INSN(3,0);
13973      UInt rD    = INSN(15,12);
13974      if (rM != 15 && rD != 15) {
13975         IRTemp rMt = newTemp(Ity_I32);
13976         assign(rMt, getIRegA(rM));
13977         IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
13978         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
13979         DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
13980             nCC(INSN_COND), rD, rM);
13981         goto decode_success;
13982      }
13983   }
13984
13985   /* ------------------- rbit ------------------ */
13986   if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
13987      UInt rD = INSN(15,12);
13988      UInt rM = INSN(3,0);
13989      if (rD != 15 && rM != 15) {
13990         IRTemp arg = newTemp(Ity_I32);
13991         assign(arg, getIRegA(rM));
13992         IRTemp res = gen_BITREV(arg);
13993         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
13994         DIP("rbit r%u, r%u\n", rD, rM);
13995         goto decode_success;
13996      }
13997   }
13998
13999   /* ------------------- smmul ------------------ */
14000   if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
14001       && INSN(15,12) == BITS4(1,1,1,1)
14002       && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
14003      UInt bitR = INSN(5,5);
14004      UInt rD = INSN(19,16);
14005      UInt rM = INSN(11,8);
14006      UInt rN = INSN(3,0);
14007      if (rD != 15 && rM != 15 && rN != 15) {
14008         IRExpr* res
14009         = unop(Iop_64HIto32,
14010                binop(Iop_Add64,
14011                      binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
14012                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
14013         putIRegA(rD, res, condT, Ijk_Boring);
14014         DIP("smmul%s%s r%u, r%u, r%u\n",
14015             nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
14016         goto decode_success;
14017      }
14018   }
14019
14020   /* ------------------- NOP ------------------ */
14021   if (0x0320F000 == (insn & 0x0FFFFFFF)) {
14022      DIP("nop%s\n", nCC(INSN_COND));
14023      goto decode_success;
14024   }
14025
14026   /* ----------------------------------------------------------- */
14027   /* -- ARMv7 instructions                                    -- */
14028   /* ----------------------------------------------------------- */
14029
14030   /* -------------- read CP15 TPIDRURO register ------------- */
14031   /* mrc     p15, 0, r0, c13, c0, 3  up to
14032      mrc     p15, 0, r14, c13, c0, 3
14033   */
14034   /* I don't know whether this is really v7-only.  But anyway, we
14035      have to support it since arm-linux uses TPIDRURO as a thread
14036      state register. */
14037   if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
14038      UInt rD = INSN(15,12);
14039      if (rD <= 14) {
14040         /* skip r15, that's too stupid to handle */
14041         putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
14042                      condT, Ijk_Boring);
14043         DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
14044         goto decode_success;
14045      }
14046      /* fall through */
14047   }
14048
14049   /* Handle various kinds of barriers.  This is rather indiscriminate
14050      in the sense that they are all turned into an IR Fence, which
14051      means we don't know which they are, so the back end has to
14052      re-emit them all when it comes acrosss an IR Fence.
14053   */
14054   if (0xEE070F9A == (insn & 0xFFFF0FFF)) { /* v6 */
14055      /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
14056         Synch Barrier -- ensures completion of memory accesses. */
14057      stmt( IRStmt_MBE(Imbe_Fence) );
14058      DIP("mcr 15, 0, rX, c7, c10, 4 (data synch barrier)\n");
14059      goto decode_success;
14060   }
14061   if (0xEE070FBA == (insn & 0xFFFF0FFF)) { /* v6 */
14062      /* mcr 15, 0, r0, c7, c10, 5 (v6) equiv to DMB (v7).  Data
14063         Memory Barrier -- ensures ordering of memory accesses. */
14064      stmt( IRStmt_MBE(Imbe_Fence) );
14065      DIP("mcr 15, 0, rX, c7, c10, 5 (data memory barrier)\n");
14066      goto decode_success;
14067   }
14068   if (0xEE070F95 == (insn & 0xFFFF0FFF)) { /* v6 */
14069      /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
14070         Instruction Synchronisation Barrier (or Flush Prefetch
14071         Buffer) -- a pipe flush, I think.  I suspect we could
14072         ignore those, but to be on the safe side emit a fence
14073         anyway. */
14074      stmt( IRStmt_MBE(Imbe_Fence) );
14075      DIP("mcr 15, 0, rX, c7, c5, 4 (insn synch barrier)\n");
14076      goto decode_success;
14077   }
14078
14079   /* ----------------------------------------------------------- */
14080   /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
14081   /* ----------------------------------------------------------- */
14082
14083   if (INSN_COND != ARMCondNV) {
14084      Bool ok_vfp = decode_CP10_CP11_instruction (
14085                       &dres, INSN(27,0), condT, INSN_COND,
14086                       False/*!isT*/
14087                    );
14088      if (ok_vfp)
14089         goto decode_success;
14090   }
14091
14092   /* ----------------------------------------------------------- */
14093   /* -- NEON instructions (in ARM mode)                       -- */
14094   /* ----------------------------------------------------------- */
14095
14096   /* These are all in NV space, and so are taken care of (far) above,
14097      by a call from this function to decode_NV_instruction(). */
14098
14099   /* ----------------------------------------------------------- */
14100   /* -- v6 media instructions (in ARM mode)                   -- */
14101   /* ----------------------------------------------------------- */
14102
14103   { Bool ok_v6m = decode_V6MEDIA_instruction(
14104                       &dres, INSN(27,0), condT, INSN_COND,
14105                       False/*!isT*/
14106                   );
14107     if (ok_v6m)
14108        goto decode_success;
14109   }
14110
14111   /* ----------------------------------------------------------- */
14112   /* -- Undecodable                                           -- */
14113   /* ----------------------------------------------------------- */
14114
14115   goto decode_failure;
14116   /*NOTREACHED*/
14117
14118  decode_failure:
14119   /* All decode failures end up here. */
14120   vex_printf("disInstr(arm): unhandled instruction: "
14121              "0x%x\n", insn);
14122   vex_printf("                 cond=%d(0x%x) 27:20=%u(0x%02x) "
14123                                "4:4=%d "
14124                                "3:0=%u(0x%x)\n",
14125              (Int)INSN_COND, (UInt)INSN_COND,
14126              (Int)INSN(27,20), (UInt)INSN(27,20),
14127              (Int)INSN(4,4),
14128              (Int)INSN(3,0), (UInt)INSN(3,0) );
14129
14130   /* Tell the dispatcher that this insn cannot be decoded, and so has
14131      not been executed, and (is currently) the next to be executed.
14132      R15 should be up-to-date since it made so at the start of each
14133      insn, but nevertheless be paranoid and update it again right
14134      now. */
14135   vassert(0 == (guest_R15_curr_instr_notENC & 3));
14136   llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
14137   irsb->next     = mkU32(guest_R15_curr_instr_notENC);
14138   irsb->jumpkind = Ijk_NoDecode;
14139   dres.whatNext  = Dis_StopHere;
14140   dres.len       = 0;
14141   return dres;
14142
14143  decode_success:
14144   /* All decode successes end up here. */
14145   DIP("\n");
14146
14147   vassert(dres.len == 4 || dres.len == 20);
14148
14149   /* Now then.  Do we have an implicit jump to r15 to deal with? */
14150   if (r15written) {
14151      /* If we get jump to deal with, we assume that there's been no
14152         other competing branch stuff previously generated for this
14153         insn.  That's reasonable, in the sense that the ARM insn set
14154         appears to declare as "Unpredictable" any instruction which
14155         generates more than one possible new value for r15.  Hence
14156         just assert.  The decoders themselves should check against
14157         all such instructions which are thusly Unpredictable, and
14158         decline to decode them.  Hence we should never get here if we
14159         have competing new values for r15, and hence it is safe to
14160         assert here. */
14161      vassert(dres.whatNext == Dis_Continue);
14162      vassert(irsb->next == NULL);
14163      vassert(irsb->jumpkind == Ijk_Boring);
14164      /* If r15 is unconditionally written, terminate the block by
14165         jumping to it.  If it's conditionally written, still
14166         terminate the block (a shame, but we can't do side exits to
14167         arbitrary destinations), but first jump to the next
14168         instruction if the condition doesn't hold. */
14169      /* We can't use getIReg(15) to get the destination, since that
14170         will produce r15+8, which isn't what we want.  Must use
14171         llGetIReg(15) instead. */
14172      if (r15guard == IRTemp_INVALID) {
14173         /* unconditional */
14174      } else {
14175         /* conditional */
14176         stmt( IRStmt_Exit(
14177                  unop(Iop_32to1,
14178                       binop(Iop_Xor32,
14179                             mkexpr(r15guard), mkU32(1))),
14180                  r15kind,
14181                  IRConst_U32(guest_R15_curr_instr_notENC + 4)
14182         ));
14183      }
14184      irsb->next     = llGetIReg(15);
14185      irsb->jumpkind = r15kind;
14186      dres.whatNext  = Dis_StopHere;
14187   }
14188
14189   return dres;
14190
14191#  undef INSN_COND
14192#  undef INSN
14193}
14194
14195
14196/*------------------------------------------------------------*/
14197/*--- Disassemble a single Thumb2 instruction              ---*/
14198/*------------------------------------------------------------*/
14199
14200static const UChar it_length_table[256]; /* fwds */
14201
14202/* NB: in Thumb mode we do fetches of regs with getIRegT, which
14203   automagically adds 4 to fetches of r15.  However, writes to regs
14204   are done with putIRegT, which disallows writes to r15.  Hence any
14205   r15 writes and associated jumps have to be done "by hand". */
14206
14207/* Disassemble a single Thumb instruction into IR.  The instruction is
14208   located in host memory at guest_instr, and has (decoded) guest IP
14209   of guest_R15_curr_instr_notENC, which will have been set before the
14210   call here. */
14211
14212static
14213DisResult disInstr_THUMB_WRK (
14214             Bool         put_IP,
14215             Bool         (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
14216             Bool         resteerCisOk,
14217             void*        callback_opaque,
14218             UChar*       guest_instr,
14219             VexArchInfo* archinfo,
14220             VexAbiInfo*  abiinfo
14221          )
14222{
14223   /* A macro to fish bits out of insn0.  There's also INSN1, to fish
14224      bits out of insn1, but that's defined only after the end of the
14225      16-bit insn decoder, so as to stop it mistakenly being used
14226      therein. */
14227#  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
14228
14229   DisResult dres;
14230   UShort    insn0; /* first 16 bits of the insn */
14231   //Bool      allow_VFP = False;
14232   //UInt      hwcaps = archinfo->hwcaps;
14233   HChar     dis_buf[128];  // big enough to hold LDMIA etc text
14234
14235   /* Summary result of the ITxxx backwards analysis: False == safe
14236      but suboptimal. */
14237   Bool guaranteedUnconditional = False;
14238
14239   /* What insn variants are we supporting today? */
14240   //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14241   // etc etc
14242
14243   /* Set result defaults. */
14244   dres.whatNext   = Dis_Continue;
14245   dres.len        = 2;
14246   dres.continueAt = 0;
14247
14248   /* Set default actions for post-insn handling of writes to r15, if
14249      required. */
14250   r15written = False;
14251   r15guard   = IRTemp_INVALID; /* unconditional */
14252   r15kind    = Ijk_Boring;
14253
14254   /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
14255      this point.  If we need the second 16, get them later.  We can't
14256      get them both out immediately because it risks a fault (very
14257      unlikely, but ..) if the second 16 bits aren't actually
14258      necessary. */
14259   insn0 = getUShortLittleEndianly( guest_instr );
14260
14261   if (0) vex_printf("insn: 0x%x\n", insn0);
14262
14263   DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
14264
14265   /* We may be asked to update the guest R15 before going further. */
14266   vassert(0 == (guest_R15_curr_instr_notENC & 1));
14267   if (put_IP) {
14268      llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
14269   }
14270
14271   /* ----------------------------------------------------------- */
14272   /* Spot "Special" instructions (see comment at top of file). */
14273   {
14274      UChar* code = (UChar*)guest_instr;
14275      /* Spot the 16-byte preamble:
14276
14277         ea4f 0cfc  mov.w   ip, ip, ror #3
14278         ea4f 3c7c  mov.w   ip, ip, ror #13
14279         ea4f 7c7c  mov.w   ip, ip, ror #29
14280         ea4f 4cfc  mov.w   ip, ip, ror #19
14281      */
14282      UInt word1 = 0x0CFCEA4F;
14283      UInt word2 = 0x3C7CEA4F;
14284      UInt word3 = 0x7C7CEA4F;
14285      UInt word4 = 0x4CFCEA4F;
14286      if (getUIntLittleEndianly(code+ 0) == word1 &&
14287          getUIntLittleEndianly(code+ 4) == word2 &&
14288          getUIntLittleEndianly(code+ 8) == word3 &&
14289          getUIntLittleEndianly(code+12) == word4) {
14290         /* Got a "Special" instruction preamble.  Which one is it? */
14291         // 0x 0A 0A EA 4A
14292         if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
14293                                               /* orr.w r10,r10,r10 */) {
14294            /* R3 = client_request ( R4 ) */
14295            DIP("r3 = client_request ( %%r4 )\n");
14296            irsb->next     = mkU32( (guest_R15_curr_instr_notENC + 20) | 1 );
14297            irsb->jumpkind = Ijk_ClientReq;
14298            dres.whatNext  = Dis_StopHere;
14299            goto decode_success;
14300         }
14301         else
14302         // 0x 0B 0B EA 4B
14303         if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
14304                                               /* orr r11,r11,r11 */) {
14305            /* R3 = guest_NRADDR */
14306            DIP("r3 = guest_NRADDR\n");
14307            dres.len = 20;
14308            llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
14309            goto decode_success;
14310         }
14311         else
14312         // 0x 0C 0C EA 4C
14313         if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
14314                                               /* orr r12,r12,r12 */) {
14315            /*  branch-and-link-to-noredir R4 */
14316            DIP("branch-and-link-to-noredir r4\n");
14317            llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
14318            irsb->next     = getIRegT(4);
14319            irsb->jumpkind = Ijk_NoRedir;
14320            dres.whatNext  = Dis_StopHere;
14321            goto decode_success;
14322         }
14323         /* We don't know what it is.  Set insn0 so decode_failure
14324            can print the insn following the Special-insn preamble. */
14325         insn0 = getUShortLittleEndianly(code+16);
14326         goto decode_failure;
14327         /*NOTREACHED*/
14328      }
14329
14330   }
14331
14332   /* ----------------------------------------------------------- */
14333
14334   /* Main Thumb instruction decoder starts here.  It's a series of
14335      switches which examine ever longer bit sequences at the MSB of
14336      the instruction word, first for 16-bit insns, then for 32-bit
14337      insns. */
14338
14339   /* --- BEGIN ITxxx optimisation analysis --- */
14340   /* This is a crucial optimisation for the ITState boilerplate that
14341      follows.  Examine the 9 halfwords preceding this instruction,
14342      and if we are absolutely sure that none of them constitute an
14343      'it' instruction, then we can be sure that this instruction is
14344      not under the control of any 'it' instruction, and so
14345      guest_ITSTATE must be zero.  So write zero into ITSTATE right
14346      now, so that iropt can fold out almost all of the resulting
14347      junk.
14348
14349      If we aren't sure, we can always safely skip this step.  So be a
14350      bit conservative about it: only poke around in the same page as
14351      this instruction, lest we get a fault from the previous page
14352      that would not otherwise have happened.  The saving grace is
14353      that such skipping is pretty rare -- it only happens,
14354      statistically, 18/4096ths of the time, so is judged unlikely to
14355      be a performance problems.
14356
14357      FIXME: do better.  Take into account the number of insns covered
14358      by any IT insns we find, to rule out cases where an IT clearly
14359      cannot cover this instruction.  This would improve behaviour for
14360      branch targets immediately following an IT-guarded group that is
14361      not of full length.  Eg, (and completely ignoring issues of 16-
14362      vs 32-bit insn length):
14363
14364             ite cond
14365             insn1
14366             insn2
14367      label: insn3
14368             insn4
14369
14370      The 'it' only conditionalises insn1 and insn2.  However, the
14371      current analysis is conservative and considers insn3 and insn4
14372      also possibly guarded.  Hence if 'label:' is the start of a hot
14373      loop we will get a big performance hit.
14374   */
14375   {
14376      /* Summary result of this analysis: False == safe but
14377         suboptimal. */
14378      vassert(guaranteedUnconditional == False);
14379
14380      UInt pc = guest_R15_curr_instr_notENC;
14381      vassert(0 == (pc & 1));
14382
14383      UInt pageoff = pc & 0xFFF;
14384      if (pageoff >= 18) {
14385         /* It's safe to poke about in the 9 halfwords preceding this
14386            insn.  So, have a look at them. */
14387         guaranteedUnconditional = True; /* assume no 'it' insn found,
14388                                            till we do */
14389         UShort* hwp = (UShort*)(HWord)pc;
14390         Int i;
14391         for (i = -1; i >= -9; i--) {
14392            /* We're in the same page.  (True, but commented out due
14393               to expense.) */
14394            /*
14395            vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
14396                      == ( pc & 0xFFFFF000 ) );
14397            */
14398            /* All valid IT instructions must have the form 0xBFxy,
14399               where x can be anything, but y must be nonzero.  Find
14400               the number of insns covered by it (1 .. 4) and check to
14401               see if it can possibly reach up to the instruction in
14402               question.  Some (x,y) combinations mean UNPREDICTABLE,
14403               and the table is constructed to be conservative by
14404               returning 4 for those cases, so the analysis is safe
14405               even if the code uses unpredictable IT instructions (in
14406               which case its authors are nuts, but hey.)  */
14407            UShort hwp_i = hwp[i];
14408            if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
14409               /* might be an 'it' insn. */
14410               /* # guarded insns */
14411               Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
14412               vassert(n_guarded >= 1 && n_guarded <= 4);
14413               if (n_guarded * 2 /* # guarded HWs, worst case */
14414                   > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
14415                   /* -(i+0) also seems to work, even though I think
14416                      it's wrong.  I don't understand that. */
14417                  guaranteedUnconditional = False;
14418               break;
14419            }
14420         }
14421      }
14422   }
14423   /* --- END ITxxx optimisation analysis --- */
14424
14425   /* Generate the guarding condition for this insn, by examining
14426      ITSTATE.  Assign it to condT.  Also, generate new
14427      values for ITSTATE ready for stuffing back into the
14428      guest state, but don't actually do the Put yet, since it will
14429      need to stuffed back in only after the instruction gets to a
14430      point where it is sure to complete.  Mostly we let the code at
14431      decode_success handle this, but in cases where the insn contains
14432      a side exit, we have to update them before the exit. */
14433
14434   /* If the ITxxx optimisation analysis above could not prove that
14435      this instruction is guaranteed unconditional, we insert a
14436      lengthy IR preamble to compute the guarding condition at
14437      runtime.  If it can prove it (which obviously we hope is the
14438      normal case) then we insert a minimal preamble, which is
14439      equivalent to setting guest_ITSTATE to zero and then folding
14440      that through the full preamble (which completely disappears). */
14441
14442   IRTemp condT              = IRTemp_INVALID;
14443   IRTemp old_itstate        = IRTemp_INVALID;
14444   IRTemp new_itstate        = IRTemp_INVALID;
14445   IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
14446
14447   if (guaranteedUnconditional) {
14448      /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
14449
14450      // ITSTATE = 0 :: I32
14451      IRTemp z32 = newTemp(Ity_I32);
14452      assign(z32, mkU32(0));
14453      put_ITSTATE(z32);
14454
14455      // old_itstate = 0 :: I32
14456      //
14457      // old_itstate = get_ITSTATE();
14458      old_itstate = z32; /* 0 :: I32 */
14459
14460      // new_itstate = old_itstate >> 8
14461      //             = 0 >> 8
14462      //             = 0 :: I32
14463      //
14464      // new_itstate = newTemp(Ity_I32);
14465      // assign(new_itstate,
14466      //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
14467      new_itstate = z32;
14468
14469      // ITSTATE = 0 :: I32(again)
14470      //
14471      // put_ITSTATE(new_itstate);
14472
14473      // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
14474      //        = calc_cond_dyn( xor(0,0xE0) )
14475      //        = calc_cond_dyn ( 0xE0 )
14476      //        = 1 :: I32
14477      // Not that this matters, since the computed value is not used:
14478      // see condT folding below
14479      //
14480      // IRTemp condT1 = newTemp(Ity_I32);
14481      // assign(condT1,
14482      //        mk_armg_calculate_condition_dyn(
14483      //           binop(Iop_Xor32,
14484      //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
14485      //                 mkU32(0xE0))
14486      //       )
14487      // );
14488
14489      // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
14490      //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
14491      //       = 32to8(0) == 0  ? 1  : condT1
14492      //       = 0 == 0  ? 1  : condT1
14493      //       = 1
14494      //
14495      // condT = newTemp(Ity_I32);
14496      // assign(condT, IRExpr_Mux0X(
14497      //                  unop(Iop_32to8, binop(Iop_And32,
14498      //                                        mkexpr(old_itstate),
14499      //                                        mkU32(0xF0))),
14500      //                  mkU32(1),
14501      //                  mkexpr(condT1)
14502      //       ));
14503      condT = newTemp(Ity_I32);
14504      assign(condT, mkU32(1));
14505
14506      // notInITt = xor32(and32(old_itstate, 1), 1)
14507      //          = xor32(and32(0, 1), 1)
14508      //          = xor32(0, 1)
14509      //          = 1 :: I32
14510      //
14511      // IRTemp notInITt = newTemp(Ity_I32);
14512      // assign(notInITt,
14513      //        binop(Iop_Xor32,
14514      //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
14515      //              mkU32(1)));
14516
14517      // cond_AND_notInIT_T = and32(notInITt, condT)
14518      //                    = and32(1, 1)
14519      //                    = 1
14520      //
14521      // cond_AND_notInIT_T = newTemp(Ity_I32);
14522      // assign(cond_AND_notInIT_T,
14523      //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
14524      cond_AND_notInIT_T = condT; /* 1 :: I32 */
14525
14526      /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
14527   } else {
14528      /* BEGIN { STANDARD PREAMBLE; } */
14529
14530      old_itstate = get_ITSTATE();
14531
14532      new_itstate = newTemp(Ity_I32);
14533      assign(new_itstate,
14534             binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
14535
14536      put_ITSTATE(new_itstate);
14537
14538      /* Same strategy as for ARM insns: generate a condition
14539         temporary at this point (or IRTemp_INVALID, meaning
14540         unconditional).  We leave it to lower-level instruction
14541         decoders to decide whether they can generate straight-line
14542         code, or whether they must generate a side exit before the
14543         instruction.  condT :: Ity_I32 and is always either zero or
14544         one. */
14545      IRTemp condT1 = newTemp(Ity_I32);
14546      assign(condT1,
14547             mk_armg_calculate_condition_dyn(
14548                binop(Iop_Xor32,
14549                      binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
14550                      mkU32(0xE0))
14551            )
14552      );
14553
14554      /* This is a bit complex, but needed to make Memcheck understand
14555         that, if the condition in old_itstate[7:4] denotes AL (that
14556         is, if this instruction is to be executed unconditionally),
14557         then condT does not depend on the results of calling the
14558         helper.
14559
14560         We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
14561         that case set condT directly to 1.  Else we use the results
14562         of the helper.  Since old_itstate is always defined and
14563         because Memcheck does lazy V-bit propagation through Mux0X,
14564         this will cause condT to always be a defined 1 if the
14565         condition is 'AL'.  From an execution semantics point of view
14566         this is irrelevant since we're merely duplicating part of the
14567         behaviour of the helper.  But it makes it clear to Memcheck,
14568         in this case, that condT does not in fact depend on the
14569         contents of the condition code thunk.  Without it, we get
14570         quite a lot of false errors.
14571
14572         So, just to clarify: from a straight semantics point of view,
14573         we can simply do "assign(condT, mkexpr(condT1))", and the
14574         simulator still runs fine.  It's just that we get loads of
14575         false errors from Memcheck. */
14576      condT = newTemp(Ity_I32);
14577      assign(condT, IRExpr_Mux0X(
14578                       unop(Iop_32to8, binop(Iop_And32,
14579                                             mkexpr(old_itstate),
14580                                             mkU32(0xF0))),
14581                       mkU32(1),
14582                       mkexpr(condT1)
14583            ));
14584
14585      /* Something we don't have in ARM: generate a 0 or 1 value
14586         indicating whether or not we are in an IT block (NB: 0 = in
14587         IT block, 1 = not in IT block).  This is used to gate
14588         condition code updates in 16-bit Thumb instructions. */
14589      IRTemp notInITt = newTemp(Ity_I32);
14590      assign(notInITt,
14591             binop(Iop_Xor32,
14592                   binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
14593                   mkU32(1)));
14594
14595      /* Compute 'condT && notInITt' -- that is, the instruction is
14596         going to execute, and we're not in an IT block.  This is the
14597         gating condition for updating condition codes in 16-bit Thumb
14598         instructions, except for CMP, CMN and TST. */
14599      cond_AND_notInIT_T = newTemp(Ity_I32);
14600      assign(cond_AND_notInIT_T,
14601             binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
14602      /* END { STANDARD PREAMBLE; } */
14603   }
14604
14605
14606   /* At this point:
14607      * ITSTATE has been updated
14608      * condT holds the guarding condition for this instruction (0 or 1),
14609      * notInITt is 1 if we're in "normal" code, 0 if in an IT block
14610      * cond_AND_notInIT_T is the AND of the above two.
14611
14612      If the instruction proper can't trap, then there's nothing else
14613      to do w.r.t. ITSTATE -- just go and and generate IR for the
14614      insn, taking into account the guarding condition.
14615
14616      If, however, the instruction might trap, then we must back up
14617      ITSTATE to the old value, and re-update it after the potentially
14618      trapping IR section.  A trap can happen either via a memory
14619      reference or because we need to throw SIGILL.
14620
14621      If an instruction has a side exit, we need to be sure that any
14622      ITSTATE backup is re-updated before the side exit.
14623   */
14624
14625   /* ----------------------------------------------------------- */
14626   /* --                                                       -- */
14627   /* -- Thumb 16-bit integer instructions                     -- */
14628   /* --                                                       -- */
14629   /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
14630   /* --            not allowed in this section                -- */
14631   /* --                                                       -- */
14632   /* ----------------------------------------------------------- */
14633
14634   /* 16-bit instructions inside an IT block, apart from CMP, CMN and
14635      TST, do not set the condition codes.  Hence we must dynamically
14636      test for this case for every condition code update. */
14637
14638   IROp   anOp   = Iop_INVALID;
14639   HChar* anOpNm = NULL;
14640
14641   /* ================ 16-bit 15:6 cases ================ */
14642
14643   switch (INSN0(15,6)) {
14644
14645   case 0x10a:   // CMP
14646   case 0x10b: { // CMN
14647      /* ---------------- CMP Rn, Rm ---------------- */
14648      Bool   isCMN = INSN0(15,6) == 0x10b;
14649      UInt   rN    = INSN0(2,0);
14650      UInt   rM    = INSN0(5,3);
14651      IRTemp argL  = newTemp(Ity_I32);
14652      IRTemp argR  = newTemp(Ity_I32);
14653      assign( argL, getIRegT(rN) );
14654      assign( argR, getIRegT(rM) );
14655      /* Update flags regardless of whether in an IT block or not. */
14656      setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
14657                      argL, argR, condT );
14658      DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
14659      goto decode_success;
14660   }
14661
14662   case 0x108: {
14663      /* ---------------- TST Rn, Rm ---------------- */
14664      UInt   rN   = INSN0(2,0);
14665      UInt   rM   = INSN0(5,3);
14666      IRTemp oldC = newTemp(Ity_I32);
14667      IRTemp oldV = newTemp(Ity_I32);
14668      IRTemp res  = newTemp(Ity_I32);
14669      assign( oldC, mk_armg_calculate_flag_c() );
14670      assign( oldV, mk_armg_calculate_flag_v() );
14671      assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
14672      /* Update flags regardless of whether in an IT block or not. */
14673      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
14674      DIP("tst r%u, r%u\n", rN, rM);
14675      goto decode_success;
14676   }
14677
14678   case 0x109: {
14679      /* ---------------- NEGS Rd, Rm ---------------- */
14680      /* Rd = -Rm */
14681      UInt   rM   = INSN0(5,3);
14682      UInt   rD   = INSN0(2,0);
14683      IRTemp arg  = newTemp(Ity_I32);
14684      IRTemp zero = newTemp(Ity_I32);
14685      assign(arg, getIRegT(rM));
14686      assign(zero, mkU32(0));
14687      // rD can never be r15
14688      putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
14689      setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
14690      DIP("negs r%u, r%u\n", rD, rM);
14691      goto decode_success;
14692   }
14693
14694   case 0x10F: {
14695      /* ---------------- MVNS Rd, Rm ---------------- */
14696      /* Rd = ~Rm */
14697      UInt   rM   = INSN0(5,3);
14698      UInt   rD   = INSN0(2,0);
14699      IRTemp oldV = newTemp(Ity_I32);
14700      IRTemp oldC = newTemp(Ity_I32);
14701      IRTemp res  = newTemp(Ity_I32);
14702      assign( oldV, mk_armg_calculate_flag_v() );
14703      assign( oldC, mk_armg_calculate_flag_c() );
14704      assign(res, unop(Iop_Not32, getIRegT(rM)));
14705      // rD can never be r15
14706      putIRegT(rD, mkexpr(res), condT);
14707      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
14708                         cond_AND_notInIT_T );
14709      DIP("mvns r%u, r%u\n", rD, rM);
14710      goto decode_success;
14711   }
14712
14713   case 0x10C:
14714      /* ---------------- ORRS Rd, Rm ---------------- */
14715      anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
14716   case 0x100:
14717      /* ---------------- ANDS Rd, Rm ---------------- */
14718      anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
14719   case 0x101:
14720      /* ---------------- EORS Rd, Rm ---------------- */
14721      anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
14722   case 0x10d:
14723      /* ---------------- MULS Rd, Rm ---------------- */
14724      anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
14725   and_orr_eor_mul: {
14726      /* Rd = Rd `op` Rm */
14727      UInt   rM   = INSN0(5,3);
14728      UInt   rD   = INSN0(2,0);
14729      IRTemp res  = newTemp(Ity_I32);
14730      IRTemp oldV = newTemp(Ity_I32);
14731      IRTemp oldC = newTemp(Ity_I32);
14732      assign( oldV, mk_armg_calculate_flag_v() );
14733      assign( oldC, mk_armg_calculate_flag_c() );
14734      assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
14735      // not safe to read guest state after here
14736      // rD can never be r15
14737      putIRegT(rD, mkexpr(res), condT);
14738      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
14739                         cond_AND_notInIT_T );
14740      DIP("%s r%u, r%u\n", anOpNm, rD, rM);
14741      goto decode_success;
14742   }
14743
14744   case 0x10E: {
14745      /* ---------------- BICS Rd, Rm ---------------- */
14746      /* Rd = Rd & ~Rm */
14747      UInt   rM   = INSN0(5,3);
14748      UInt   rD   = INSN0(2,0);
14749      IRTemp res  = newTemp(Ity_I32);
14750      IRTemp oldV = newTemp(Ity_I32);
14751      IRTemp oldC = newTemp(Ity_I32);
14752      assign( oldV, mk_armg_calculate_flag_v() );
14753      assign( oldC, mk_armg_calculate_flag_c() );
14754      assign( res, binop(Iop_And32, getIRegT(rD),
14755                                    unop(Iop_Not32, getIRegT(rM) )));
14756      // not safe to read guest state after here
14757      // rD can never be r15
14758      putIRegT(rD, mkexpr(res), condT);
14759      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
14760                         cond_AND_notInIT_T );
14761      DIP("bics r%u, r%u\n", rD, rM);
14762      goto decode_success;
14763   }
14764
14765   case 0x105: {
14766      /* ---------------- ADCS Rd, Rm ---------------- */
14767      /* Rd = Rd + Rm + oldC */
14768      UInt   rM   = INSN0(5,3);
14769      UInt   rD   = INSN0(2,0);
14770      IRTemp argL = newTemp(Ity_I32);
14771      IRTemp argR = newTemp(Ity_I32);
14772      IRTemp oldC = newTemp(Ity_I32);
14773      IRTemp res  = newTemp(Ity_I32);
14774      assign(argL, getIRegT(rD));
14775      assign(argR, getIRegT(rM));
14776      assign(oldC, mk_armg_calculate_flag_c());
14777      assign(res, binop(Iop_Add32,
14778                        binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
14779                        mkexpr(oldC)));
14780      // rD can never be r15
14781      putIRegT(rD, mkexpr(res), condT);
14782      setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
14783                         cond_AND_notInIT_T );
14784      DIP("adcs r%u, r%u\n", rD, rM);
14785      goto decode_success;
14786   }
14787
14788   case 0x106: {
14789      /* ---------------- SBCS Rd, Rm ---------------- */
14790      /* Rd = Rd - Rm - (oldC ^ 1) */
14791      UInt   rM   = INSN0(5,3);
14792      UInt   rD   = INSN0(2,0);
14793      IRTemp argL = newTemp(Ity_I32);
14794      IRTemp argR = newTemp(Ity_I32);
14795      IRTemp oldC = newTemp(Ity_I32);
14796      IRTemp res  = newTemp(Ity_I32);
14797      assign(argL, getIRegT(rD));
14798      assign(argR, getIRegT(rM));
14799      assign(oldC, mk_armg_calculate_flag_c());
14800      assign(res, binop(Iop_Sub32,
14801                        binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
14802                        binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
14803      // rD can never be r15
14804      putIRegT(rD, mkexpr(res), condT);
14805      setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
14806                         cond_AND_notInIT_T );
14807      DIP("sbcs r%u, r%u\n", rD, rM);
14808      goto decode_success;
14809   }
14810
14811   case 0x2CB: {
14812      /* ---------------- UXTB Rd, Rm ---------------- */
14813      /* Rd = 8Uto32(Rm) */
14814      UInt rM = INSN0(5,3);
14815      UInt rD = INSN0(2,0);
14816      putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
14817                   condT);
14818      DIP("uxtb r%u, r%u\n", rD, rM);
14819      goto decode_success;
14820   }
14821
14822   case 0x2C9: {
14823      /* ---------------- SXTB Rd, Rm ---------------- */
14824      /* Rd = 8Sto32(Rm) */
14825      UInt rM = INSN0(5,3);
14826      UInt rD = INSN0(2,0);
14827      putIRegT(rD, binop(Iop_Sar32,
14828                         binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
14829                         mkU8(24)),
14830                   condT);
14831      DIP("sxtb r%u, r%u\n", rD, rM);
14832      goto decode_success;
14833   }
14834
14835   case 0x2CA: {
14836      /* ---------------- UXTH Rd, Rm ---------------- */
14837      /* Rd = 16Uto32(Rm) */
14838      UInt rM = INSN0(5,3);
14839      UInt rD = INSN0(2,0);
14840      putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
14841                   condT);
14842      DIP("uxth r%u, r%u\n", rD, rM);
14843      goto decode_success;
14844   }
14845
14846   case 0x2C8: {
14847      /* ---------------- SXTH Rd, Rm ---------------- */
14848      /* Rd = 16Sto32(Rm) */
14849      UInt rM = INSN0(5,3);
14850      UInt rD = INSN0(2,0);
14851      putIRegT(rD, binop(Iop_Sar32,
14852                         binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
14853                         mkU8(16)),
14854                   condT);
14855      DIP("sxth r%u, r%u\n", rD, rM);
14856      goto decode_success;
14857   }
14858
14859   case 0x102:   // LSLS
14860   case 0x103:   // LSRS
14861   case 0x104:   // ASRS
14862   case 0x107: { // RORS
14863      /* ---------------- LSLS Rs, Rd ---------------- */
14864      /* ---------------- LSRS Rs, Rd ---------------- */
14865      /* ---------------- ASRS Rs, Rd ---------------- */
14866      /* ---------------- RORS Rs, Rd ---------------- */
14867      /* Rd = Rd `op` Rs, and set flags */
14868      UInt   rS   = INSN0(5,3);
14869      UInt   rD   = INSN0(2,0);
14870      IRTemp oldV = newTemp(Ity_I32);
14871      IRTemp rDt  = newTemp(Ity_I32);
14872      IRTemp rSt  = newTemp(Ity_I32);
14873      IRTemp res  = newTemp(Ity_I32);
14874      IRTemp resC = newTemp(Ity_I32);
14875      HChar* wot  = "???";
14876      assign(rSt, getIRegT(rS));
14877      assign(rDt, getIRegT(rD));
14878      assign(oldV, mk_armg_calculate_flag_v());
14879      /* Does not appear to be the standard 'how' encoding. */
14880      switch (INSN0(15,6)) {
14881         case 0x102:
14882            compute_result_and_C_after_LSL_by_reg(
14883               dis_buf, &res, &resC, rDt, rSt, rD, rS
14884            );
14885            wot = "lsl";
14886            break;
14887         case 0x103:
14888            compute_result_and_C_after_LSR_by_reg(
14889               dis_buf, &res, &resC, rDt, rSt, rD, rS
14890            );
14891            wot = "lsr";
14892            break;
14893         case 0x104:
14894            compute_result_and_C_after_ASR_by_reg(
14895               dis_buf, &res, &resC, rDt, rSt, rD, rS
14896            );
14897            wot = "asr";
14898            break;
14899         case 0x107:
14900            compute_result_and_C_after_ROR_by_reg(
14901               dis_buf, &res, &resC, rDt, rSt, rD, rS
14902            );
14903            wot = "ror";
14904            break;
14905         default:
14906            /*NOTREACHED*/vassert(0);
14907      }
14908      // not safe to read guest state after this point
14909      putIRegT(rD, mkexpr(res), condT);
14910      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
14911                         cond_AND_notInIT_T );
14912      DIP("%ss r%u, r%u\n", wot, rS, rD);
14913      goto decode_success;
14914   }
14915
14916   case 0x2E8:   // REV
14917   case 0x2E9: { // REV16
14918      /* ---------------- REV   Rd, Rm ---------------- */
14919      /* ---------------- REV16 Rd, Rm ---------------- */
14920      UInt rM = INSN0(5,3);
14921      UInt rD = INSN0(2,0);
14922      Bool isREV = INSN0(15,6) == 0x2E8;
14923      IRTemp arg = newTemp(Ity_I32);
14924      assign(arg, getIRegT(rM));
14925      IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
14926      putIRegT(rD, mkexpr(res), condT);
14927      DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
14928      goto decode_success;
14929   }
14930
14931   default:
14932      break; /* examine the next shortest prefix */
14933
14934   }
14935
14936
14937   /* ================ 16-bit 15:7 cases ================ */
14938
14939   switch (INSN0(15,7)) {
14940
14941   case BITS9(1,0,1,1,0,0,0,0,0): {
14942      /* ------------ ADD SP, #imm7 * 4 ------------ */
14943      UInt uimm7 = INSN0(6,0);
14944      putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
14945                   condT);
14946      DIP("add sp, #%u\n", uimm7 * 4);
14947      goto decode_success;
14948   }
14949
14950   case BITS9(1,0,1,1,0,0,0,0,1): {
14951      /* ------------ SUB SP, #imm7 * 4 ------------ */
14952      UInt uimm7 = INSN0(6,0);
14953      putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
14954                   condT);
14955      DIP("sub sp, #%u\n", uimm7 * 4);
14956      goto decode_success;
14957   }
14958
14959   case BITS9(0,1,0,0,0,1,1,1,0): {
14960      /* ---------------- BX rM ---------------- */
14961      /* Branch to reg, and optionally switch modes.  Reg contains a
14962         suitably encoded address therefore (w CPSR.T at the bottom).
14963         Have to special-case r15, as usual. */
14964      UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
14965      if (BITS3(0,0,0) == INSN0(2,0)) {
14966         IRTemp dst = newTemp(Ity_I32);
14967         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
14968         mk_skip_over_T16_if_cond_is_false(condT);
14969         condT = IRTemp_INVALID;
14970         // now uncond
14971         if (rM <= 14) {
14972            assign( dst, getIRegT(rM) );
14973         } else {
14974            vassert(rM == 15);
14975            assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
14976         }
14977         irsb->next     = mkexpr(dst);
14978         irsb->jumpkind = rM == 14 ? Ijk_Ret : Ijk_Boring;
14979         dres.whatNext  = Dis_StopHere;
14980         DIP("bx r%u (possibly switch to ARM mode)\n", rM);
14981         goto decode_success;
14982      }
14983      break;
14984   }
14985
14986   /* ---------------- BLX rM ---------------- */
14987   /* Branch and link to interworking address in rM. */
14988   case BITS9(0,1,0,0,0,1,1,1,1): {
14989      if (BITS3(0,0,0) == INSN0(2,0)) {
14990         UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
14991         IRTemp dst = newTemp(Ity_I32);
14992         if (rM <= 14) {
14993            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
14994            mk_skip_over_T16_if_cond_is_false(condT);
14995            condT = IRTemp_INVALID;
14996            // now uncond
14997            /* We're returning to Thumb code, hence "| 1" */
14998            assign( dst, getIRegT(rM) );
14999            putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
15000                          IRTemp_INVALID );
15001            irsb->next     = mkexpr(dst);
15002            irsb->jumpkind = Ijk_Call;
15003            dres.whatNext  = Dis_StopHere;
15004            DIP("blx r%u (possibly switch to ARM mode)\n", rM);
15005            goto decode_success;
15006         }
15007         /* else unpredictable, fall through */
15008      }
15009      break;
15010   }
15011
15012   default:
15013      break; /* examine the next shortest prefix */
15014
15015   }
15016
15017
15018   /* ================ 16-bit 15:8 cases ================ */
15019
15020   switch (INSN0(15,8)) {
15021
15022   case BITS8(1,1,0,1,1,1,1,1): {
15023      /* ---------------- SVC ---------------- */
15024      UInt imm8 = INSN0(7,0);
15025      if (imm8 == 0) {
15026         /* A syscall.  We can't do this conditionally, hence: */
15027         mk_skip_over_T16_if_cond_is_false( condT );
15028         // FIXME: what if we have to back up and restart this insn?
15029         // then ITSTATE will be wrong (we'll have it as "used")
15030         // when it isn't.  Correct is to save ITSTATE in a
15031         // stash pseudo-reg, and back up from that if we have to
15032         // restart.
15033         // uncond after here
15034         irsb->next     = mkU32( (guest_R15_curr_instr_notENC + 2) | 1 );
15035         irsb->jumpkind = Ijk_Sys_syscall;
15036         dres.whatNext  = Dis_StopHere;
15037         DIP("svc #0x%08x\n", imm8);
15038         goto decode_success;
15039      }
15040      /* else fall through */
15041      break;
15042   }
15043
15044   case BITS8(0,1,0,0,0,1,0,0): {
15045      /* ---------------- ADD(HI) Rd, Rm ---------------- */
15046      UInt h1 = INSN0(7,7);
15047      UInt h2 = INSN0(6,6);
15048      UInt rM = (h2 << 3) | INSN0(5,3);
15049      UInt rD = (h1 << 3) | INSN0(2,0);
15050      //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
15051      if (rD == 15 && rM == 15) {
15052         // then it's invalid
15053      } else {
15054         IRTemp res = newTemp(Ity_I32);
15055         assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
15056         if (rD != 15) {
15057            putIRegT( rD, mkexpr(res), condT );
15058         } else {
15059            /* Only allowed outside or last-in IT block; SIGILL if not so. */
15060            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
15061            /* jump over insn if not selected */
15062            mk_skip_over_T16_if_cond_is_false(condT);
15063            condT = IRTemp_INVALID;
15064            // now uncond
15065            /* non-interworking branch */
15066            irsb->next = binop(Iop_Or32, mkexpr(res), mkU32(1));
15067            irsb->jumpkind = Ijk_Boring;
15068            dres.whatNext = Dis_StopHere;
15069         }
15070         DIP("add(hi) r%u, r%u\n", rD, rM);
15071         goto decode_success;
15072      }
15073      break;
15074   }
15075
15076   case BITS8(0,1,0,0,0,1,0,1): {
15077      /* ---------------- CMP(HI) Rd, Rm ---------------- */
15078      UInt h1 = INSN0(7,7);
15079      UInt h2 = INSN0(6,6);
15080      UInt rM = (h2 << 3) | INSN0(5,3);
15081      UInt rN = (h1 << 3) | INSN0(2,0);
15082      if (h1 != 0 || h2 != 0) {
15083         IRTemp argL  = newTemp(Ity_I32);
15084         IRTemp argR  = newTemp(Ity_I32);
15085         assign( argL, getIRegT(rN) );
15086         assign( argR, getIRegT(rM) );
15087         /* Update flags regardless of whether in an IT block or not. */
15088         setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
15089         DIP("cmphi r%u, r%u\n", rN, rM);
15090         goto decode_success;
15091      }
15092      break;
15093   }
15094
15095   case BITS8(0,1,0,0,0,1,1,0): {
15096      /* ---------------- MOV(HI) Rd, Rm ---------------- */
15097      UInt h1 = INSN0(7,7);
15098      UInt h2 = INSN0(6,6);
15099      UInt rM = (h2 << 3) | INSN0(5,3);
15100      UInt rD = (h1 << 3) | INSN0(2,0);
15101      /* The old ARM ARM seems to disallow the case where both Rd and
15102         Rm are "low" registers, but newer versions allow it. */
15103      if (1 /*h1 != 0 || h2 != 0*/) {
15104         IRTemp val = newTemp(Ity_I32);
15105         assign( val, getIRegT(rM) );
15106         if (rD != 15) {
15107            putIRegT( rD, mkexpr(val), condT );
15108         } else {
15109            /* Only allowed outside or last-in IT block; SIGILL if not so. */
15110            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
15111            /* jump over insn if not selected */
15112            mk_skip_over_T16_if_cond_is_false(condT);
15113            condT = IRTemp_INVALID;
15114            // now uncond
15115            /* non-interworking branch */
15116            irsb->next = binop(Iop_Or32, mkexpr(val), mkU32(1));
15117            irsb->jumpkind = rM == 14 ? Ijk_Ret : Ijk_Boring;
15118            dres.whatNext = Dis_StopHere;
15119         }
15120         DIP("mov r%u, r%u\n", rD, rM);
15121         goto decode_success;
15122      }
15123      break;
15124   }
15125
15126   case BITS8(1,0,1,1,1,1,1,1): {
15127      /* ---------------- IT (if-then) ---------------- */
15128      UInt firstcond = INSN0(7,4);
15129      UInt mask = INSN0(3,0);
15130      UInt newITSTATE = 0;
15131      /* This is the ITSTATE represented as described in
15132         libvex_guest_arm.h.  It is not the ARM ARM representation. */
15133      UChar c1 = '.';
15134      UChar c2 = '.';
15135      UChar c3 = '.';
15136      Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
15137                                    firstcond, mask );
15138      if (valid && firstcond != 0xF/*NV*/) {
15139         /* Not allowed in an IT block; SIGILL if so. */
15140         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
15141
15142         IRTemp t = newTemp(Ity_I32);
15143         assign(t, mkU32(newITSTATE));
15144         put_ITSTATE(t);
15145
15146         DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
15147         goto decode_success;
15148      }
15149      break;
15150   }
15151
15152   case BITS8(1,0,1,1,0,0,0,1):
15153   case BITS8(1,0,1,1,0,0,1,1):
15154   case BITS8(1,0,1,1,1,0,0,1):
15155   case BITS8(1,0,1,1,1,0,1,1): {
15156      /* ---------------- CB{N}Z ---------------- */
15157      UInt rN    = INSN0(2,0);
15158      UInt bOP   = INSN0(11,11);
15159      UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
15160      gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
15161      /* It's a conditional branch forward. */
15162      IRTemp kond = newTemp(Ity_I1);
15163      assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
15164                          getIRegT(rN), mkU32(0)) );
15165
15166      vassert(0 == (guest_R15_curr_instr_notENC & 1));
15167      /* Looks like the nearest insn we can branch to is the one after
15168         next.  That makes sense, as there's no point in being able to
15169         encode a conditional branch to the next instruction. */
15170      UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
15171      stmt(IRStmt_Exit( mkexpr(kond),
15172                        Ijk_Boring,
15173                        IRConst_U32(toUInt(dst)) ));
15174      DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
15175      goto decode_success;
15176   }
15177
15178   default:
15179      break; /* examine the next shortest prefix */
15180
15181   }
15182
15183
15184   /* ================ 16-bit 15:9 cases ================ */
15185
15186   switch (INSN0(15,9)) {
15187
15188   case BITS7(1,0,1,1,0,1,0): {
15189      /* ---------------- PUSH ---------------- */
15190      /* This is a bit like STMxx, but way simpler. Complications we
15191         don't have to deal with:
15192         * SP being one of the transferred registers
15193         * direction (increment vs decrement)
15194         * before-vs-after-ness
15195      */
15196      Int  i, nRegs;
15197      UInt bitR    = INSN0(8,8);
15198      UInt regList = INSN0(7,0);
15199      if (bitR) regList |= (1 << 14);
15200
15201      if (regList != 0) {
15202         /* Since we can't generate a guaranteed non-trapping IR
15203            sequence, (1) jump over the insn if it is gated false, and
15204            (2) back out the ITSTATE update. */
15205         mk_skip_over_T16_if_cond_is_false(condT);
15206         condT = IRTemp_INVALID;
15207         put_ITSTATE(old_itstate);
15208         // now uncond
15209
15210         nRegs = 0;
15211         for (i = 0; i < 16; i++) {
15212            if ((regList & (1 << i)) != 0)
15213               nRegs++;
15214         }
15215         vassert(nRegs >= 1 && nRegs <= 8);
15216
15217         /* Move SP down first of all, so we're "covered".  And don't
15218            mess with its alignment. */
15219         IRTemp newSP = newTemp(Ity_I32);
15220         assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
15221         putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
15222
15223         /* Generate a transfer base address as a forced-aligned
15224            version of the final SP value. */
15225         IRTemp base = newTemp(Ity_I32);
15226         assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
15227
15228         /* Now the transfers */
15229         nRegs = 0;
15230         for (i = 0; i < 16; i++) {
15231            if ((regList & (1 << i)) != 0) {
15232               storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
15233                        getIRegT(i) );
15234               nRegs++;
15235            }
15236         }
15237
15238         /* Reinstate the ITSTATE update. */
15239         put_ITSTATE(new_itstate);
15240
15241         DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
15242         goto decode_success;
15243      }
15244      break;
15245   }
15246
15247   case BITS7(1,0,1,1,1,1,0): {
15248      /* ---------------- POP ---------------- */
15249      Int  i, nRegs;
15250      UInt bitR    = INSN0(8,8);
15251      UInt regList = INSN0(7,0);
15252
15253      if (regList != 0 || bitR) {
15254         /* Since we can't generate a guaranteed non-trapping IR
15255            sequence, (1) jump over the insn if it is gated false, and
15256            (2) back out the ITSTATE update. */
15257         mk_skip_over_T16_if_cond_is_false(condT);
15258         condT = IRTemp_INVALID;
15259         put_ITSTATE(old_itstate);
15260         // now uncond
15261
15262         nRegs = 0;
15263         for (i = 0; i < 8; i++) {
15264            if ((regList & (1 << i)) != 0)
15265               nRegs++;
15266         }
15267         vassert(nRegs >= 0 && nRegs <= 7);
15268         vassert(bitR == 0 || bitR == 1);
15269
15270         IRTemp oldSP = newTemp(Ity_I32);
15271         assign(oldSP, getIRegT(13));
15272
15273         /* Generate a transfer base address as a forced-aligned
15274            version of the original SP value. */
15275         IRTemp base = newTemp(Ity_I32);
15276         assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
15277
15278         /* Compute a new value for SP, but don't install it yet, so
15279            that we're "covered" until all the transfers are done.
15280            And don't mess with its alignment. */
15281         IRTemp newSP = newTemp(Ity_I32);
15282         assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
15283                                        mkU32(4 * (nRegs + bitR))));
15284
15285         /* Now the transfers, not including PC */
15286         nRegs = 0;
15287         for (i = 0; i < 8; i++) {
15288            if ((regList & (1 << i)) != 0) {
15289               putIRegT(i, loadLE( Ity_I32,
15290                                   binop(Iop_Add32, mkexpr(base),
15291                                                    mkU32(4 * nRegs))),
15292                           IRTemp_INVALID );
15293               nRegs++;
15294            }
15295         }
15296
15297         IRTemp newPC = IRTemp_INVALID;
15298         if (bitR) {
15299            newPC = newTemp(Ity_I32);
15300            assign( newPC, loadLE( Ity_I32,
15301                                   binop(Iop_Add32, mkexpr(base),
15302                                                    mkU32(4 * nRegs))));
15303         }
15304
15305         /* Now we can safely install the new SP value */
15306         putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
15307
15308         /* Reinstate the ITSTATE update. */
15309         put_ITSTATE(new_itstate);
15310
15311         /* now, do we also have to do a branch?  If so, it turns out
15312            that the new PC value is encoded exactly as we need it to
15313            be -- with CPSR.T in the bottom bit.  So we can simply use
15314            it as is, no need to mess with it.  Note, therefore, this
15315            is an interworking return. */
15316         if (bitR) {
15317            irsb->next     = mkexpr(newPC);
15318            irsb->jumpkind = Ijk_Ret;
15319            dres.whatNext  = Dis_StopHere;
15320         }
15321
15322         DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
15323         goto decode_success;
15324      }
15325      break;
15326   }
15327
15328   case BITS7(0,0,0,1,1,1,0):   /* ADDS */
15329   case BITS7(0,0,0,1,1,1,1): { /* SUBS */
15330      /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
15331      /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
15332      UInt   uimm3 = INSN0(8,6);
15333      UInt   rN    = INSN0(5,3);
15334      UInt   rD    = INSN0(2,0);
15335      UInt   isSub = INSN0(9,9);
15336      IRTemp argL  = newTemp(Ity_I32);
15337      IRTemp argR  = newTemp(Ity_I32);
15338      assign( argL, getIRegT(rN) );
15339      assign( argR, mkU32(uimm3) );
15340      putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
15341                         mkexpr(argL), mkexpr(argR)),
15342                   condT);
15343      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
15344                      argL, argR, cond_AND_notInIT_T );
15345      DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
15346      goto decode_success;
15347   }
15348
15349   case BITS7(0,0,0,1,1,0,0):   /* ADDS */
15350   case BITS7(0,0,0,1,1,0,1): { /* SUBS */
15351      /* ---------------- ADDS Rd, Rn, Rm ---------------- */
15352      /* ---------------- SUBS Rd, Rn, Rm ---------------- */
15353      UInt   rM    = INSN0(8,6);
15354      UInt   rN    = INSN0(5,3);
15355      UInt   rD    = INSN0(2,0);
15356      UInt   isSub = INSN0(9,9);
15357      IRTemp argL  = newTemp(Ity_I32);
15358      IRTemp argR  = newTemp(Ity_I32);
15359      assign( argL, getIRegT(rN) );
15360      assign( argR, getIRegT(rM) );
15361      putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
15362                          mkexpr(argL), mkexpr(argR)),
15363                    condT );
15364      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
15365                      argL, argR, cond_AND_notInIT_T );
15366      DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
15367      goto decode_success;
15368   }
15369
15370   case BITS7(0,1,0,1,0,0,0):   /* STR */
15371   case BITS7(0,1,0,1,1,0,0): { /* LDR */
15372      /* ------------- LDR Rd, [Rn, Rm] ------------- */
15373      /* ------------- STR Rd, [Rn, Rm] ------------- */
15374      /* LDR/STR Rd, [Rn + Rm] */
15375      UInt    rD   = INSN0(2,0);
15376      UInt    rN   = INSN0(5,3);
15377      UInt    rM   = INSN0(8,6);
15378      UInt    isLD = INSN0(11,11);
15379
15380      mk_skip_over_T16_if_cond_is_false(condT);
15381      condT = IRTemp_INVALID;
15382      // now uncond
15383
15384      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
15385      put_ITSTATE(old_itstate); // backout
15386      if (isLD) {
15387         putIRegT(rD, loadLE(Ity_I32, ea), IRTemp_INVALID);
15388      } else {
15389         storeLE(ea, getIRegT(rD));
15390      }
15391      put_ITSTATE(new_itstate); // restore
15392
15393      DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
15394      goto decode_success;
15395   }
15396
15397   case BITS7(0,1,0,1,0,0,1):
15398   case BITS7(0,1,0,1,1,0,1): {
15399      /* ------------- LDRH Rd, [Rn, Rm] ------------- */
15400      /* ------------- STRH Rd, [Rn, Rm] ------------- */
15401      /* LDRH/STRH Rd, [Rn + Rm] */
15402      UInt    rD   = INSN0(2,0);
15403      UInt    rN   = INSN0(5,3);
15404      UInt    rM   = INSN0(8,6);
15405      UInt    isLD = INSN0(11,11);
15406
15407      mk_skip_over_T16_if_cond_is_false(condT);
15408      condT = IRTemp_INVALID;
15409      // now uncond
15410
15411      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
15412      put_ITSTATE(old_itstate); // backout
15413      if (isLD) {
15414         putIRegT(rD, unop(Iop_16Uto32, loadLE(Ity_I16, ea)),
15415                      IRTemp_INVALID);
15416      } else {
15417         storeLE( ea, unop(Iop_32to16, getIRegT(rD)) );
15418      }
15419      put_ITSTATE(new_itstate); // restore
15420
15421      DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
15422      goto decode_success;
15423   }
15424
15425   case BITS7(0,1,0,1,1,1,1): {
15426      /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
15427      /* LDRSH Rd, [Rn + Rm] */
15428      UInt    rD = INSN0(2,0);
15429      UInt    rN = INSN0(5,3);
15430      UInt    rM = INSN0(8,6);
15431
15432      mk_skip_over_T16_if_cond_is_false(condT);
15433      condT = IRTemp_INVALID;
15434      // now uncond
15435
15436      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
15437      put_ITSTATE(old_itstate); // backout
15438      putIRegT(rD, unop(Iop_16Sto32, loadLE(Ity_I16, ea)),
15439                   IRTemp_INVALID);
15440      put_ITSTATE(new_itstate); // restore
15441
15442      DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
15443      goto decode_success;
15444   }
15445
15446   case BITS7(0,1,0,1,0,1,1): {
15447      /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
15448      /* LDRSB Rd, [Rn + Rm] */
15449      UInt    rD = INSN0(2,0);
15450      UInt    rN = INSN0(5,3);
15451      UInt    rM = INSN0(8,6);
15452
15453      mk_skip_over_T16_if_cond_is_false(condT);
15454      condT = IRTemp_INVALID;
15455      // now uncond
15456
15457      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
15458      put_ITSTATE(old_itstate); // backout
15459      putIRegT(rD, unop(Iop_8Sto32, loadLE(Ity_I8, ea)),
15460                   IRTemp_INVALID);
15461      put_ITSTATE(new_itstate); // restore
15462
15463      DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
15464      goto decode_success;
15465   }
15466
15467   case BITS7(0,1,0,1,0,1,0):
15468   case BITS7(0,1,0,1,1,1,0): {
15469      /* ------------- LDRB Rd, [Rn, Rm] ------------- */
15470      /* ------------- STRB Rd, [Rn, Rm] ------------- */
15471      /* LDRB/STRB Rd, [Rn + Rm] */
15472      UInt    rD   = INSN0(2,0);
15473      UInt    rN   = INSN0(5,3);
15474      UInt    rM   = INSN0(8,6);
15475      UInt    isLD = INSN0(11,11);
15476
15477      mk_skip_over_T16_if_cond_is_false(condT);
15478      condT = IRTemp_INVALID;
15479      // now uncond
15480
15481      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
15482      put_ITSTATE(old_itstate); // backout
15483      if (isLD) {
15484         putIRegT(rD, unop(Iop_8Uto32, loadLE(Ity_I8, ea)),
15485                  IRTemp_INVALID);
15486      } else {
15487         storeLE( ea, unop(Iop_32to8, getIRegT(rD)) );
15488      }
15489      put_ITSTATE(new_itstate); // restore
15490
15491      DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
15492      goto decode_success;
15493   }
15494
15495   default:
15496      break; /* examine the next shortest prefix */
15497
15498   }
15499
15500
15501   /* ================ 16-bit 15:11 cases ================ */
15502
15503   switch (INSN0(15,11)) {
15504
15505   case BITS5(0,0,1,1,0):
15506   case BITS5(0,0,1,1,1): {
15507      /* ---------------- ADDS Rn, #uimm8 ---------------- */
15508      /* ---------------- SUBS Rn, #uimm8 ---------------- */
15509      UInt   isSub = INSN0(11,11);
15510      UInt   rN    = INSN0(10,8);
15511      UInt   uimm8 = INSN0(7,0);
15512      IRTemp argL  = newTemp(Ity_I32);
15513      IRTemp argR  = newTemp(Ity_I32);
15514      assign( argL, getIRegT(rN) );
15515      assign( argR, mkU32(uimm8) );
15516      putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
15517                          mkexpr(argL), mkexpr(argR)), condT );
15518      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
15519                      argL, argR, cond_AND_notInIT_T );
15520      DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
15521      goto decode_success;
15522   }
15523
15524   case BITS5(1,0,1,0,0): {
15525      /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
15526      /* a.k.a. ADR */
15527      /* rD = align4(PC) + imm8 * 4 */
15528      UInt rD   = INSN0(10,8);
15529      UInt imm8 = INSN0(7,0);
15530      putIRegT(rD, binop(Iop_Add32,
15531                         binop(Iop_And32, getIRegT(15), mkU32(~3U)),
15532                         mkU32(imm8 * 4)),
15533                   condT);
15534      DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
15535      goto decode_success;
15536   }
15537
15538   case BITS5(1,0,1,0,1): {
15539      /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
15540      UInt rD   = INSN0(10,8);
15541      UInt imm8 = INSN0(7,0);
15542      putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
15543                   condT);
15544      DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
15545      goto decode_success;
15546   }
15547
15548   case BITS5(0,0,1,0,1): {
15549      /* ---------------- CMP Rn, #uimm8 ---------------- */
15550      UInt   rN    = INSN0(10,8);
15551      UInt   uimm8 = INSN0(7,0);
15552      IRTemp argL  = newTemp(Ity_I32);
15553      IRTemp argR  = newTemp(Ity_I32);
15554      assign( argL, getIRegT(rN) );
15555      assign( argR, mkU32(uimm8) );
15556      /* Update flags regardless of whether in an IT block or not. */
15557      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
15558      DIP("cmp r%u, #%u\n", rN, uimm8);
15559      goto decode_success;
15560   }
15561
15562   case BITS5(0,0,1,0,0): {
15563      /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
15564      UInt   rD    = INSN0(10,8);
15565      UInt   uimm8 = INSN0(7,0);
15566      IRTemp oldV  = newTemp(Ity_I32);
15567      IRTemp oldC  = newTemp(Ity_I32);
15568      IRTemp res   = newTemp(Ity_I32);
15569      assign( oldV, mk_armg_calculate_flag_v() );
15570      assign( oldC, mk_armg_calculate_flag_c() );
15571      assign( res, mkU32(uimm8) );
15572      putIRegT(rD, mkexpr(res), condT);
15573      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
15574                         cond_AND_notInIT_T );
15575      DIP("movs r%u, #%u\n", rD, uimm8);
15576      goto decode_success;
15577   }
15578
15579   case BITS5(0,1,0,0,1): {
15580      /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
15581      /* LDR Rd, [align4(PC) + imm8 * 4] */
15582      UInt   rD   = INSN0(10,8);
15583      UInt   imm8 = INSN0(7,0);
15584      IRTemp ea   = newTemp(Ity_I32);
15585
15586      mk_skip_over_T16_if_cond_is_false(condT);
15587      condT = IRTemp_INVALID;
15588      // now uncond
15589
15590      assign(ea, binop(Iop_Add32,
15591                       binop(Iop_And32, getIRegT(15), mkU32(~3U)),
15592                       mkU32(imm8 * 4)));
15593      put_ITSTATE(old_itstate); // backout
15594      putIRegT(rD, loadLE(Ity_I32, mkexpr(ea)),
15595                   IRTemp_INVALID);
15596      put_ITSTATE(new_itstate); // restore
15597
15598      DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
15599      goto decode_success;
15600   }
15601
15602   case BITS5(0,1,1,0,0):   /* STR */
15603   case BITS5(0,1,1,0,1): { /* LDR */
15604      /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
15605      /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
15606      /* LDR/STR Rd, [Rn + imm5 * 4] */
15607      UInt    rD   = INSN0(2,0);
15608      UInt    rN   = INSN0(5,3);
15609      UInt    imm5 = INSN0(10,6);
15610      UInt    isLD = INSN0(11,11);
15611
15612      mk_skip_over_T16_if_cond_is_false(condT);
15613      condT = IRTemp_INVALID;
15614      // now uncond
15615
15616      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
15617      put_ITSTATE(old_itstate); // backout
15618      if (isLD) {
15619         putIRegT(rD, loadLE(Ity_I32, ea), IRTemp_INVALID);
15620      } else {
15621         storeLE( ea, getIRegT(rD) );
15622      }
15623      put_ITSTATE(new_itstate); // restore
15624
15625      DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
15626      goto decode_success;
15627   }
15628
15629   case BITS5(1,0,0,0,0):   /* STRH */
15630   case BITS5(1,0,0,0,1): { /* LDRH */
15631      /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
15632      /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
15633      /* LDRH/STRH Rd, [Rn + imm5 * 2] */
15634      UInt    rD   = INSN0(2,0);
15635      UInt    rN   = INSN0(5,3);
15636      UInt    imm5 = INSN0(10,6);
15637      UInt    isLD = INSN0(11,11);
15638
15639      mk_skip_over_T16_if_cond_is_false(condT);
15640      condT = IRTemp_INVALID;
15641      // now uncond
15642
15643      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
15644      put_ITSTATE(old_itstate); // backout
15645      if (isLD) {
15646         putIRegT(rD, unop(Iop_16Uto32, loadLE(Ity_I16, ea)),
15647                  IRTemp_INVALID);
15648      } else {
15649         storeLE( ea, unop(Iop_32to16, getIRegT(rD)) );
15650      }
15651      put_ITSTATE(new_itstate); // restore
15652
15653      DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
15654      goto decode_success;
15655   }
15656
15657   case BITS5(0,1,1,1,0):   /* STRB */
15658   case BITS5(0,1,1,1,1): { /* LDRB */
15659      /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
15660      /* ------------- STRB Rd, [Rn, #imm5] ------------- */
15661      /* LDRB/STRB Rd, [Rn + imm5] */
15662      UInt    rD   = INSN0(2,0);
15663      UInt    rN   = INSN0(5,3);
15664      UInt    imm5 = INSN0(10,6);
15665      UInt    isLD = INSN0(11,11);
15666
15667      mk_skip_over_T16_if_cond_is_false(condT);
15668      condT = IRTemp_INVALID;
15669      // now uncond
15670
15671      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
15672      put_ITSTATE(old_itstate); // backout
15673      if (isLD) {
15674         putIRegT(rD, unop(Iop_8Uto32, loadLE(Ity_I8, ea)),
15675                  IRTemp_INVALID);
15676      } else {
15677         storeLE( ea, unop(Iop_32to8, getIRegT(rD)) );
15678      }
15679      put_ITSTATE(new_itstate); // restore
15680
15681      DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
15682      goto decode_success;
15683   }
15684
15685   case BITS5(1,0,0,1,0):   /* STR */
15686   case BITS5(1,0,0,1,1): { /* LDR */
15687      /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
15688      /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
15689      /* LDR/STR Rd, [SP + imm8 * 4] */
15690      UInt rD    = INSN0(10,8);
15691      UInt imm8  = INSN0(7,0);
15692      UInt isLD  = INSN0(11,11);
15693
15694      mk_skip_over_T16_if_cond_is_false(condT);
15695      condT = IRTemp_INVALID;
15696      // now uncond
15697
15698      IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
15699      put_ITSTATE(old_itstate); // backout
15700      if (isLD) {
15701         putIRegT(rD, loadLE(Ity_I32, ea), IRTemp_INVALID);
15702      } else {
15703         storeLE(ea, getIRegT(rD));
15704      }
15705      put_ITSTATE(new_itstate); // restore
15706
15707      DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
15708      goto decode_success;
15709   }
15710
15711   case BITS5(1,1,0,0,1): {
15712      /* ------------- LDMIA Rn!, {reglist} ------------- */
15713      Int i, nRegs = 0;
15714      UInt rN   = INSN0(10,8);
15715      UInt list = INSN0(7,0);
15716      /* Empty lists aren't allowed. */
15717      if (list != 0) {
15718         mk_skip_over_T16_if_cond_is_false(condT);
15719         condT = IRTemp_INVALID;
15720         put_ITSTATE(old_itstate);
15721         // now uncond
15722
15723         IRTemp oldRn = newTemp(Ity_I32);
15724         IRTemp base  = newTemp(Ity_I32);
15725         assign(oldRn, getIRegT(rN));
15726         assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
15727         for (i = 0; i < 8; i++) {
15728            if (0 == (list & (1 << i)))
15729               continue;
15730            nRegs++;
15731            putIRegT(
15732               i, loadLE(Ity_I32,
15733                         binop(Iop_Add32, mkexpr(base),
15734                                          mkU32(nRegs * 4 - 4))),
15735               IRTemp_INVALID
15736            );
15737         }
15738         /* Only do the writeback for rN if it isn't in the list of
15739            registers to be transferred. */
15740         if (0 == (list & (1 << rN))) {
15741            putIRegT(rN,
15742                     binop(Iop_Add32, mkexpr(oldRn),
15743                                      mkU32(nRegs * 4)),
15744                     IRTemp_INVALID
15745            );
15746         }
15747
15748         /* Reinstate the ITSTATE update. */
15749         put_ITSTATE(new_itstate);
15750
15751         DIP("ldmia r%u!, {0x%04x}\n", rN, list);
15752         goto decode_success;
15753      }
15754      break;
15755   }
15756
15757   case BITS5(1,1,0,0,0): {
15758      /* ------------- STMIA Rn!, {reglist} ------------- */
15759      Int i, nRegs = 0;
15760      UInt rN   = INSN0(10,8);
15761      UInt list = INSN0(7,0);
15762      /* Empty lists aren't allowed.  Also, if rN is in the list then
15763         it must be the lowest numbered register in the list. */
15764      Bool valid = list != 0;
15765      if (valid && 0 != (list & (1 << rN))) {
15766         for (i = 0; i < rN; i++) {
15767            if (0 != (list & (1 << i)))
15768               valid = False;
15769         }
15770      }
15771      if (valid) {
15772         mk_skip_over_T16_if_cond_is_false(condT);
15773         condT = IRTemp_INVALID;
15774         put_ITSTATE(old_itstate);
15775         // now uncond
15776
15777         IRTemp oldRn = newTemp(Ity_I32);
15778         IRTemp base = newTemp(Ity_I32);
15779         assign(oldRn, getIRegT(rN));
15780         assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
15781         for (i = 0; i < 8; i++) {
15782            if (0 == (list & (1 << i)))
15783               continue;
15784            nRegs++;
15785            storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
15786                     getIRegT(i) );
15787         }
15788         /* Always do the writeback. */
15789         putIRegT(rN,
15790                  binop(Iop_Add32, mkexpr(oldRn),
15791                                   mkU32(nRegs * 4)),
15792                  IRTemp_INVALID);
15793
15794         /* Reinstate the ITSTATE update. */
15795         put_ITSTATE(new_itstate);
15796
15797         DIP("stmia r%u!, {0x%04x}\n", rN, list);
15798         goto decode_success;
15799      }
15800      break;
15801   }
15802
15803   case BITS5(0,0,0,0,0):   /* LSLS */
15804   case BITS5(0,0,0,0,1):   /* LSRS */
15805   case BITS5(0,0,0,1,0): { /* ASRS */
15806      /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
15807      /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
15808      /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
15809      UInt   rD   = INSN0(2,0);
15810      UInt   rM   = INSN0(5,3);
15811      UInt   imm5 = INSN0(10,6);
15812      IRTemp res  = newTemp(Ity_I32);
15813      IRTemp resC = newTemp(Ity_I32);
15814      IRTemp rMt  = newTemp(Ity_I32);
15815      IRTemp oldV = newTemp(Ity_I32);
15816      HChar* wot  = "???";
15817      assign(rMt, getIRegT(rM));
15818      assign(oldV, mk_armg_calculate_flag_v());
15819      /* Looks like INSN0(12,11) are the standard 'how' encoding.
15820         Could compactify if the ROR case later appears. */
15821      switch (INSN0(15,11)) {
15822         case BITS5(0,0,0,0,0):
15823            compute_result_and_C_after_LSL_by_imm5(
15824               dis_buf, &res, &resC, rMt, imm5, rM
15825            );
15826            wot = "lsl";
15827            break;
15828         case BITS5(0,0,0,0,1):
15829            compute_result_and_C_after_LSR_by_imm5(
15830               dis_buf, &res, &resC, rMt, imm5, rM
15831            );
15832            wot = "lsr";
15833            break;
15834         case BITS5(0,0,0,1,0):
15835            compute_result_and_C_after_ASR_by_imm5(
15836               dis_buf, &res, &resC, rMt, imm5, rM
15837            );
15838            wot = "asr";
15839            break;
15840         default:
15841            /*NOTREACHED*/vassert(0);
15842      }
15843      // not safe to read guest state after this point
15844      putIRegT(rD, mkexpr(res), condT);
15845      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
15846                         cond_AND_notInIT_T );
15847      /* ignore buf and roll our own output */
15848      DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
15849      goto decode_success;
15850   }
15851
15852   case BITS5(1,1,1,0,0): {
15853      /* ---------------- B #simm11 ---------------- */
15854      Int  simm11 = INSN0(10,0);
15855           simm11 = (simm11 << 21) >> 20;
15856      UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
15857      /* Only allowed outside or last-in IT block; SIGILL if not so. */
15858      gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
15859      // and skip this insn if not selected; being cleverer is too
15860      // difficult
15861      mk_skip_over_T16_if_cond_is_false(condT);
15862      condT = IRTemp_INVALID;
15863      // now uncond
15864      irsb->next     = mkU32( dst | 1 /*CPSR.T*/ );
15865      irsb->jumpkind = Ijk_Boring;
15866      dres.whatNext  = Dis_StopHere;
15867      DIP("b 0x%x\n", dst);
15868      goto decode_success;
15869   }
15870
15871   default:
15872      break; /* examine the next shortest prefix */
15873
15874   }
15875
15876
15877   /* ================ 16-bit 15:12 cases ================ */
15878
15879   switch (INSN0(15,12)) {
15880
15881   case BITS4(1,1,0,1): {
15882      /* ---------------- Bcond #simm8 ---------------- */
15883      UInt cond  = INSN0(11,8);
15884      Int  simm8 = INSN0(7,0);
15885           simm8 = (simm8 << 24) >> 23;
15886      UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
15887      if (cond != ARMCondAL && cond != ARMCondNV) {
15888         /* Not allowed in an IT block; SIGILL if so. */
15889         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
15890
15891         IRTemp kondT = newTemp(Ity_I32);
15892         assign( kondT, mk_armg_calculate_condition(cond) );
15893         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
15894                            Ijk_Boring,
15895                            IRConst_U32(dst | 1/*CPSR.T*/) ));
15896         irsb->next = mkU32( (guest_R15_curr_instr_notENC + 2)
15897                             | 1 /*CPSR.T*/ );
15898         irsb->jumpkind = Ijk_Boring;
15899         dres.whatNext  = Dis_StopHere;
15900         DIP("b%s 0x%x\n", nCC(cond), dst);
15901         goto decode_success;
15902      }
15903      break;
15904   }
15905
15906   default:
15907      break; /* hmm, nothing matched */
15908
15909   }
15910
15911   /* ================ 16-bit misc cases ================ */
15912
15913   /* ------ NOP ------ */
15914   if (INSN0(15,0) == 0xBF00) {
15915      DIP("nop");
15916      goto decode_success;
15917   }
15918
15919   /* ----------------------------------------------------------- */
15920   /* --                                                       -- */
15921   /* -- Thumb 32-bit integer instructions                     -- */
15922   /* --                                                       -- */
15923   /* ----------------------------------------------------------- */
15924
15925#  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
15926
15927   /* second 16 bits of the instruction, if any */
15928   UShort insn1 = getUShortLittleEndianly( guest_instr+2 );
15929
15930   anOp   = Iop_INVALID; /* paranoia */
15931   anOpNm = NULL;        /* paranoia */
15932
15933   /* Change result defaults to suit 32-bit insns. */
15934   vassert(dres.whatNext   == Dis_Continue);
15935   vassert(dres.len        == 2);
15936   vassert(dres.continueAt == 0);
15937   dres.len = 4;
15938
15939   /* ------------------- (T1) SMMUL{R} ------------------ */
15940   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
15941       && INSN0(6,4) == BITS3(1,0,1)
15942       && INSN1(15,12) == BITS4(1,1,1,1)
15943       && INSN1(7,5) == BITS3(0,0,0)) {
15944      UInt bitR = INSN1(4,4);
15945      UInt rD = INSN1(11,8);
15946      UInt rM = INSN1(3,0);
15947      UInt rN = INSN0(3,0);
15948      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
15949        IRExpr* res = newTemp(Ity_I32);
15950        assign(res, unop(Iop_64HIto32,
15951                binop(Iop_Add64,
15952                      binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
15953                    mkU64(bitR ? 0x80000000ULL : 0ULL))));
15954         putIRegT(rD, mkexpr(res), condT);
15955         DIP("smmul%s r%u, r%u, r%u\n",
15956             bitR ? "r" : "", rD, rN, rM);
15957         goto decode_success;
15958      }
15959   }
15960
15961
15962   /* ---------------- BL/BLX simm26 ---------------- */
15963   if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
15964      UInt isBL = INSN1(12,12);
15965      UInt bS   = INSN0(10,10);
15966      UInt bJ1  = INSN1(13,13);
15967      UInt bJ2  = INSN1(11,11);
15968      UInt bI1  = 1 ^ (bJ1 ^ bS);
15969      UInt bI2  = 1 ^ (bJ2 ^ bS);
15970      Int simm25
15971         =   (bS          << (1 + 1 + 10 + 11 + 1))
15972           | (bI1         << (1 + 10 + 11 + 1))
15973           | (bI2         << (10 + 11 + 1))
15974           | (INSN0(9,0)  << (11 + 1))
15975           | (INSN1(10,0) << 1);
15976      simm25 = (simm25 << 7) >> 7;
15977
15978      vassert(0 == (guest_R15_curr_instr_notENC & 1));
15979      UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
15980
15981      /* One further validity case to check: in the case of BLX
15982         (not-BL), that insn1[0] must be zero. */
15983      Bool valid = True;
15984      if (isBL == 0 && INSN1(0,0) == 1) valid = False;
15985      if (valid) {
15986         /* Only allowed outside or last-in IT block; SIGILL if not so. */
15987         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
15988         // and skip this insn if not selected; being cleverer is too
15989         // difficult
15990         mk_skip_over_T32_if_cond_is_false(condT);
15991         condT = IRTemp_INVALID;
15992         // now uncond
15993
15994         /* We're returning to Thumb code, hence "| 1" */
15995         putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
15996                   IRTemp_INVALID);
15997         if (isBL) {
15998            /* BL: unconditional T -> T call */
15999            /* we're calling Thumb code, hence "| 1" */
16000            irsb->next = mkU32( dst | 1 );
16001            DIP("bl 0x%x (stay in Thumb mode)\n", dst);
16002         } else {
16003            /* BLX: unconditional T -> A call */
16004            /* we're calling ARM code, hence "& 3" to align to a
16005               valid ARM insn address */
16006            irsb->next = mkU32( dst & ~3 );
16007            DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
16008         }
16009         irsb->jumpkind = Ijk_Call;
16010         dres.whatNext = Dis_StopHere;
16011         goto decode_success;
16012      }
16013   }
16014
16015   /* ---------------- {LD,ST}M{IA,DB} ---------------- */
16016   if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
16017       || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
16018      UInt bW      = INSN0(5,5); /* writeback Rn ? */
16019      UInt bL      = INSN0(4,4);
16020      UInt rN      = INSN0(3,0);
16021      UInt bP      = INSN1(15,15); /* reglist entry for r15 */
16022      UInt bM      = INSN1(14,14); /* reglist entry for r14 */
16023      UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
16024      UInt rL13    = INSN1(13,13); /* must be zero */
16025      UInt regList = 0;
16026      Bool valid   = True;
16027
16028      UInt bINC    = 1;
16029      UInt bBEFORE = 0;
16030      if (INSN0(15,6) == 0x3a4) {
16031         bINC    = 0;
16032         bBEFORE = 1;
16033      }
16034
16035      /* detect statically invalid cases, and construct the final
16036         reglist */
16037      if (rL13 == 1)
16038         valid = False;
16039
16040      if (bL == 1) {
16041         regList = (bP << 15) | (bM << 14) | rLmost;
16042         if (rN == 15)                       valid = False;
16043         if (popcount32(regList) < 2)        valid = False;
16044         if (bP == 1 && bM == 1)             valid = False;
16045         if (bW == 1 && (regList & (1<<rN))) valid = False;
16046      } else {
16047         regList = (bM << 14) | rLmost;
16048         if (bP == 1)                        valid = False;
16049         if (rN == 15)                       valid = False;
16050         if (popcount32(regList) < 2)        valid = False;
16051         if (bW == 1 && (regList & (1<<rN))) valid = False;
16052         if (regList & (1<<rN)) {
16053            UInt i;
16054            /* if Rn is in the list, then it must be the
16055               lowest numbered entry */
16056            for (i = 0; i < rN; i++) {
16057               if (regList & (1<<i))
16058                  valid = False;
16059            }
16060         }
16061      }
16062
16063      if (valid) {
16064         if (bL == 1 && bP == 1) {
16065            // We'll be writing the PC.  Hence:
16066            /* Only allowed outside or last-in IT block; SIGILL if not so. */
16067            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
16068         }
16069
16070         /* Go uncond: */
16071         mk_skip_over_T32_if_cond_is_false(condT);
16072         condT = IRTemp_INVALID;
16073         // now uncond
16074
16075         /* Generate the IR.  This might generate a write to R15, */
16076         mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
16077
16078         if (bL == 1 && (regList & (1<<15))) {
16079            // If we wrote to R15, we have an interworking return to
16080            // deal with.
16081            irsb->next     = llGetIReg(15);
16082            irsb->jumpkind = Ijk_Ret;
16083            dres.whatNext  = Dis_StopHere;
16084         }
16085
16086         DIP("%sm%c%c r%u%s, {0x%04x}\n",
16087              bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
16088              rN, bW ? "!" : "", regList);
16089
16090         goto decode_success;
16091      }
16092   }
16093
16094   /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
16095   if (INSN0(15,11) == BITS5(1,1,1,1,0)
16096       && INSN0(9,5) == BITS5(0,1,0,0,0)
16097       && INSN1(15,15) == 0) {
16098      UInt bS = INSN0(4,4);
16099      UInt rN = INSN0(3,0);
16100      UInt rD = INSN1(11,8);
16101      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
16102      /* but allow "add.w reg, sp, #constT" */
16103      if (!valid && rN == 13 && rD != 15)
16104         valid = True;
16105      if (valid) {
16106         IRTemp argL  = newTemp(Ity_I32);
16107         IRTemp argR  = newTemp(Ity_I32);
16108         IRTemp res   = newTemp(Ity_I32);
16109         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
16110         assign(argL, getIRegT(rN));
16111         assign(argR, mkU32(imm32));
16112         assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
16113         putIRegT(rD, mkexpr(res), condT);
16114         if (bS == 1)
16115            setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
16116         DIP("add%s.w r%u, r%u, #%u\n",
16117             bS == 1 ? "s" : "", rD, rN, imm32);
16118         goto decode_success;
16119      }
16120   }
16121
16122   /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
16123   if (INSN0(15,11) == BITS5(1,1,1,1,0)
16124       && INSN0(9,4) == BITS6(1,0,0,0,0,0)
16125       && INSN1(15,15) == 0) {
16126      UInt rN = INSN0(3,0);
16127      UInt rD = INSN1(11,8);
16128      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
16129      /* but allow "addw sp, sp, #uimm12" */
16130      if (!valid && rD == 13 && rN == 13)
16131         valid = True;
16132      if (valid) {
16133         IRTemp argL = newTemp(Ity_I32);
16134         IRTemp argR = newTemp(Ity_I32);
16135         IRTemp res  = newTemp(Ity_I32);
16136         UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
16137         assign(argL, getIRegT(rN));
16138         assign(argR, mkU32(imm12));
16139         assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
16140         putIRegT(rD, mkexpr(res), condT);
16141         DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
16142         goto decode_success;
16143      }
16144   }
16145
16146   /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
16147   /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
16148   if (INSN0(15,11) == BITS5(1,1,1,1,0)
16149       && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
16150           || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
16151       && INSN1(15,15) == 0
16152       && INSN1(11,8) == BITS4(1,1,1,1)) {
16153      UInt rN = INSN0(3,0);
16154      if (rN != 15) {
16155         IRTemp argL  = newTemp(Ity_I32);
16156         IRTemp argR  = newTemp(Ity_I32);
16157         Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
16158         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
16159         assign(argL, getIRegT(rN));
16160         assign(argR, mkU32(imm32));
16161         setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
16162                         argL, argR, condT );
16163         DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
16164         goto decode_success;
16165      }
16166   }
16167
16168   /* -------------- (T1) TST.W Rn, #constT -------------- */
16169   /* -------------- (T1) TEQ.W Rn, #constT -------------- */
16170   if (INSN0(15,11) == BITS5(1,1,1,1,0)
16171       && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
16172           || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
16173       && INSN1(15,15) == 0
16174       && INSN1(11,8) == BITS4(1,1,1,1)) {
16175      UInt rN = INSN0(3,0);
16176      if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
16177         Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
16178         IRTemp argL  = newTemp(Ity_I32);
16179         IRTemp argR  = newTemp(Ity_I32);
16180         IRTemp res   = newTemp(Ity_I32);
16181         IRTemp oldV  = newTemp(Ity_I32);
16182         IRTemp oldC  = newTemp(Ity_I32);
16183         Bool   updC  = False;
16184         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
16185         assign(argL, getIRegT(rN));
16186         assign(argR, mkU32(imm32));
16187         assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
16188                            mkexpr(argL), mkexpr(argR)));
16189         assign( oldV, mk_armg_calculate_flag_v() );
16190         assign( oldC, updC
16191                       ? mkU32((imm32 >> 31) & 1)
16192                       : mk_armg_calculate_flag_c() );
16193         setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
16194         DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
16195         goto decode_success;
16196      }
16197   }
16198
16199   /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
16200   /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
16201   if (INSN0(15,11) == BITS5(1,1,1,1,0)
16202       && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
16203           || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
16204       && INSN1(15,15) == 0) {
16205      Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
16206      UInt bS    = INSN0(4,4);
16207      UInt rN    = INSN0(3,0);
16208      UInt rD    = INSN1(11,8);
16209      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
16210      /* but allow "sub{s}.w reg, sp, #constT
16211         this is (T2) of "SUB (SP minus immediate)" */
16212      if (!valid && !isRSB && rN == 13 && rD != 15)
16213         valid = True;
16214      if (valid) {
16215         IRTemp argL  = newTemp(Ity_I32);
16216         IRTemp argR  = newTemp(Ity_I32);
16217         IRTemp res   = newTemp(Ity_I32);
16218         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
16219         assign(argL, getIRegT(rN));
16220         assign(argR, mkU32(imm32));
16221         assign(res,  isRSB
16222                      ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
16223                      : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
16224         putIRegT(rD, mkexpr(res), condT);
16225         if (bS == 1) {
16226            if (isRSB)
16227               setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
16228            else
16229               setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
16230         }
16231         DIP("%s%s.w r%u, r%u, #%u\n",
16232             isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
16233         goto decode_success;
16234      }
16235   }
16236
16237   /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
16238   if (INSN0(15,11) == BITS5(1,1,1,1,0)
16239       && INSN0(9,4) == BITS6(1,0,1,0,1,0)
16240       && INSN1(15,15) == 0) {
16241      UInt rN = INSN0(3,0);
16242      UInt rD = INSN1(11,8);
16243      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
16244      /* but allow "subw sp, sp, #uimm12" */
16245      if (!valid && rD == 13 && rN == 13)
16246         valid = True;
16247      if (valid) {
16248         IRTemp argL  = newTemp(Ity_I32);
16249         IRTemp argR  = newTemp(Ity_I32);
16250         IRTemp res   = newTemp(Ity_I32);
16251         UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
16252         assign(argL, getIRegT(rN));
16253         assign(argR, mkU32(imm12));
16254         assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
16255         putIRegT(rD, mkexpr(res), condT);
16256         DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
16257         goto decode_success;
16258      }
16259   }
16260
16261   /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
16262   /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
16263   if (INSN0(15,11) == BITS5(1,1,1,1,0)
16264       && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
16265           || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
16266       && INSN1(15,15) == 0) {
16267      /* ADC:  Rd = Rn + constT + oldC */
16268      /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
16269      UInt bS    = INSN0(4,4);
16270      UInt rN    = INSN0(3,0);
16271      UInt rD    = INSN1(11,8);
16272      if (!isBadRegT(rN) && !isBadRegT(rD)) {
16273         IRTemp argL  = newTemp(Ity_I32);
16274         IRTemp argR  = newTemp(Ity_I32);
16275         IRTemp res   = newTemp(Ity_I32);
16276         IRTemp oldC  = newTemp(Ity_I32);
16277         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
16278         assign(argL, getIRegT(rN));
16279         assign(argR, mkU32(imm32));
16280         assign(oldC, mk_armg_calculate_flag_c() );
16281         HChar* nm  = "???";
16282         switch (INSN0(9,5)) {
16283            case BITS5(0,1,0,1,0): // ADC
16284               nm = "adc";
16285               assign(res,
16286                      binop(Iop_Add32,
16287                            binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
16288                            mkexpr(oldC) ));
16289               putIRegT(rD, mkexpr(res), condT);
16290               if (bS)
16291                  setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
16292                                     argL, argR, oldC, condT );
16293               break;
16294            case BITS5(0,1,0,1,1): // SBC
16295               nm = "sbc";
16296               assign(res,
16297                      binop(Iop_Sub32,
16298                            binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
16299                            binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16300               putIRegT(rD, mkexpr(res), condT);
16301               if (bS)
16302                  setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16303                                     argL, argR, oldC, condT );
16304               break;
16305            default:
16306              vassert(0);
16307         }
16308         DIP("%s%s.w r%u, r%u, #%u\n",
16309             nm, bS == 1 ? "s" : "", rD, rN, imm32);
16310         goto decode_success;
16311      }
16312   }
16313
16314   /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
16315   /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
16316   /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
16317   /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
16318   if (INSN0(15,11) == BITS5(1,1,1,1,0)
16319       && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
16320           || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
16321           || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
16322           || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
16323           || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
16324       && INSN1(15,15) == 0) {
16325      UInt bS = INSN0(4,4);
16326      UInt rN = INSN0(3,0);
16327      UInt rD = INSN1(11,8);
16328      if (!isBadRegT(rN) && !isBadRegT(rD)) {
16329         Bool   notArgR = False;
16330         IROp   op      = Iop_INVALID;
16331         HChar* nm      = "???";
16332         switch (INSN0(9,5)) {
16333            case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
16334            case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
16335            case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
16336                                   notArgR = True; break;
16337            case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
16338            case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
16339                                   notArgR = True; break;
16340            default: vassert(0);
16341         }
16342         IRTemp argL  = newTemp(Ity_I32);
16343         IRTemp argR  = newTemp(Ity_I32);
16344         IRTemp res   = newTemp(Ity_I32);
16345         Bool   updC  = False;
16346         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
16347         assign(argL, getIRegT(rN));
16348         assign(argR, mkU32(notArgR ? ~imm32 : imm32));
16349         assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
16350         putIRegT(rD, mkexpr(res), condT);
16351         if (bS) {
16352            IRTemp oldV = newTemp(Ity_I32);
16353            IRTemp oldC = newTemp(Ity_I32);
16354            assign( oldV, mk_armg_calculate_flag_v() );
16355            assign( oldC, updC
16356                          ? mkU32((imm32 >> 31) & 1)
16357                          : mk_armg_calculate_flag_c() );
16358            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
16359                               condT );
16360         }
16361         DIP("%s%s.w r%u, r%u, #%u\n",
16362             nm, bS == 1 ? "s" : "", rD, rN, imm32);
16363         goto decode_success;
16364      }
16365   }
16366
16367   /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
16368   /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
16369   /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
16370   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
16371       && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
16372           || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
16373           || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
16374       && INSN1(15,15) == 0) {
16375      UInt rN   = INSN0(3,0);
16376      UInt rD   = INSN1(11,8);
16377      UInt rM   = INSN1(3,0);
16378      UInt bS   = INSN0(4,4);
16379      UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
16380      UInt how  = INSN1(5,4);
16381
16382      Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
16383      /* but allow "add.w reg, sp, reg   w/ no shift
16384         (T3) "ADD (SP plus register) */
16385      if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
16386          && rD != 15 && rN == 13 && imm5 == 0 && how == 0) {
16387         valid = True;
16388      }
16389      /* also allow "sub.w reg, sp, reg   w/ no shift
16390         (T1) "SUB (SP minus register) */
16391      if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
16392          && rD != 15 && rN == 13 && imm5 == 0 && how == 0) {
16393         valid = True;
16394      }
16395      if (valid) {
16396         Bool   swap = False;
16397         IROp   op   = Iop_INVALID;
16398         HChar* nm   = "???";
16399         switch (INSN0(8,5)) {
16400            case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
16401            case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
16402            case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
16403                                 swap = True; break;
16404            default: vassert(0);
16405         }
16406
16407         IRTemp argL = newTemp(Ity_I32);
16408         assign(argL, getIRegT(rN));
16409
16410         IRTemp rMt = newTemp(Ity_I32);
16411         assign(rMt, getIRegT(rM));
16412
16413         IRTemp argR = newTemp(Ity_I32);
16414         compute_result_and_C_after_shift_by_imm5(
16415            dis_buf, &argR, NULL, rMt, how, imm5, rM
16416         );
16417
16418         IRTemp res = newTemp(Ity_I32);
16419         assign(res, swap
16420                     ? binop(op, mkexpr(argR), mkexpr(argL))
16421                     : binop(op, mkexpr(argL), mkexpr(argR)));
16422
16423         putIRegT(rD, mkexpr(res), condT);
16424         if (bS) {
16425            switch (op) {
16426               case Iop_Add32:
16427                  setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
16428                  break;
16429               case Iop_Sub32:
16430                  if (swap)
16431                     setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
16432                  else
16433                     setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
16434                  break;
16435               default:
16436                  vassert(0);
16437            }
16438         }
16439
16440         DIP("%s%s.w r%u, r%u, %s\n",
16441             nm, bS ? "s" : "", rD, rN, dis_buf);
16442         goto decode_success;
16443      }
16444   }
16445
16446   /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
16447   /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
16448   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
16449       && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
16450           || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
16451       && INSN1(15,15) == 0) {
16452      /* ADC:  Rd = Rn + shifter_operand + oldC */
16453      /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
16454      UInt rN = INSN0(3,0);
16455      UInt rD = INSN1(11,8);
16456      UInt rM = INSN1(3,0);
16457      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
16458         UInt bS   = INSN0(4,4);
16459         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
16460         UInt how  = INSN1(5,4);
16461
16462         IRTemp argL = newTemp(Ity_I32);
16463         assign(argL, getIRegT(rN));
16464
16465         IRTemp rMt = newTemp(Ity_I32);
16466         assign(rMt, getIRegT(rM));
16467
16468         IRTemp oldC = newTemp(Ity_I32);
16469         assign(oldC, mk_armg_calculate_flag_c());
16470
16471         IRTemp argR = newTemp(Ity_I32);
16472         compute_result_and_C_after_shift_by_imm5(
16473            dis_buf, &argR, NULL, rMt, how, imm5, rM
16474         );
16475
16476         HChar* nm  = "???";
16477         IRTemp res = newTemp(Ity_I32);
16478         switch (INSN0(8,5)) {
16479            case BITS4(1,0,1,0): // ADC
16480               nm = "adc";
16481               assign(res,
16482                      binop(Iop_Add32,
16483                            binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
16484                            mkexpr(oldC) ));
16485               putIRegT(rD, mkexpr(res), condT);
16486               if (bS)
16487                  setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
16488                                     argL, argR, oldC, condT );
16489               break;
16490            case BITS4(1,0,1,1): // SBC
16491               nm = "sbc";
16492               assign(res,
16493                      binop(Iop_Sub32,
16494                            binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
16495                            binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16496               putIRegT(rD, mkexpr(res), condT);
16497               if (bS)
16498                  setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16499                                     argL, argR, oldC, condT );
16500               break;
16501            default:
16502               vassert(0);
16503         }
16504
16505         DIP("%s%s.w r%u, r%u, %s\n",
16506             nm, bS ? "s" : "", rD, rN, dis_buf);
16507         goto decode_success;
16508      }
16509   }
16510
16511   /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
16512   /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
16513   /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
16514   /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
16515   /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
16516   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
16517       && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
16518           || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
16519           || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
16520           || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
16521           || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
16522       && INSN1(15,15) == 0) {
16523      UInt rN = INSN0(3,0);
16524      UInt rD = INSN1(11,8);
16525      UInt rM = INSN1(3,0);
16526      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
16527         Bool notArgR = False;
16528         IROp op      = Iop_INVALID;
16529         HChar* nm  = "???";
16530         switch (INSN0(8,5)) {
16531            case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
16532            case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
16533            case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
16534            case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
16535                                 notArgR = True; break;
16536            case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
16537                                 notArgR = True; break;
16538            default: vassert(0);
16539         }
16540         UInt bS   = INSN0(4,4);
16541         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
16542         UInt how  = INSN1(5,4);
16543
16544         IRTemp rNt = newTemp(Ity_I32);
16545         assign(rNt, getIRegT(rN));
16546
16547         IRTemp rMt = newTemp(Ity_I32);
16548         assign(rMt, getIRegT(rM));
16549
16550         IRTemp argR = newTemp(Ity_I32);
16551         IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
16552
16553         compute_result_and_C_after_shift_by_imm5(
16554            dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
16555         );
16556
16557         IRTemp res = newTemp(Ity_I32);
16558         if (notArgR) {
16559            vassert(op == Iop_And32 || op == Iop_Or32);
16560            assign(res, binop(op, mkexpr(rNt),
16561                                  unop(Iop_Not32, mkexpr(argR))));
16562         } else {
16563            assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
16564         }
16565
16566         putIRegT(rD, mkexpr(res), condT);
16567         if (bS) {
16568            IRTemp oldV = newTemp(Ity_I32);
16569            assign( oldV, mk_armg_calculate_flag_v() );
16570            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
16571                               condT );
16572         }
16573
16574         DIP("%s%s.w r%u, r%u, %s\n",
16575             nm, bS ? "s" : "", rD, rN, dis_buf);
16576         goto decode_success;
16577      }
16578   }
16579
16580   /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
16581   /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
16582   /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
16583   /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
16584   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
16585       && INSN1(15,12) == BITS4(1,1,1,1)
16586       && INSN1(7,4) == BITS4(0,0,0,0)) {
16587      UInt how = INSN0(6,5); // standard encoding
16588      UInt rN  = INSN0(3,0);
16589      UInt rD  = INSN1(11,8);
16590      UInt rM  = INSN1(3,0);
16591      UInt bS  = INSN0(4,4);
16592      Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
16593      if (valid) {
16594         IRTemp rNt    = newTemp(Ity_I32);
16595         IRTemp rMt    = newTemp(Ity_I32);
16596         IRTemp res    = newTemp(Ity_I32);
16597         IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
16598         IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
16599         HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
16600         HChar* nm     = nms[how];
16601         assign(rNt, getIRegT(rN));
16602         assign(rMt, getIRegT(rM));
16603         compute_result_and_C_after_shift_by_reg(
16604            dis_buf, &res, bS ? &oldC : NULL,
16605            rNt, how, rMt, rN, rM
16606         );
16607         if (bS)
16608            assign(oldV, mk_armg_calculate_flag_v());
16609         putIRegT(rD, mkexpr(res), condT);
16610         if (bS) {
16611            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
16612                               condT );
16613         }
16614         DIP("%s%s.w r%u, r%u, r%u\n",
16615             nm, bS ? "s" : "", rD, rN, rM);
16616         goto decode_success;
16617      }
16618   }
16619
16620   /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
16621   /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
16622   if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
16623       && INSN1(15,15) == 0) {
16624      UInt rD = INSN1(11,8);
16625      UInt rN = INSN1(3,0);
16626      if (!isBadRegT(rD) && !isBadRegT(rN)) {
16627         UInt bS    = INSN0(4,4);
16628         UInt isMVN = INSN0(5,5);
16629         UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
16630         UInt how   = INSN1(5,4);
16631
16632         IRTemp rNt = newTemp(Ity_I32);
16633         assign(rNt, getIRegT(rN));
16634
16635         IRTemp oldRn = newTemp(Ity_I32);
16636         IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
16637         compute_result_and_C_after_shift_by_imm5(
16638            dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
16639         );
16640
16641         IRTemp res = newTemp(Ity_I32);
16642         assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
16643                           : mkexpr(oldRn));
16644
16645         putIRegT(rD, mkexpr(res), condT);
16646         if (bS) {
16647            IRTemp oldV = newTemp(Ity_I32);
16648            assign( oldV, mk_armg_calculate_flag_v() );
16649            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
16650         }
16651         DIP("%s%s.w r%u, %s\n",
16652             isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
16653         goto decode_success;
16654      }
16655   }
16656
16657   /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
16658   /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
16659   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
16660       && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
16661           || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
16662       && INSN1(15,15) == 0
16663       && INSN1(11,8) == BITS4(1,1,1,1)) {
16664      UInt rN = INSN0(3,0);
16665      UInt rM = INSN1(3,0);
16666      if (!isBadRegT(rN) && !isBadRegT(rM)) {
16667         Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
16668
16669         UInt how  = INSN1(5,4);
16670         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
16671
16672         IRTemp argL = newTemp(Ity_I32);
16673         assign(argL, getIRegT(rN));
16674
16675         IRTemp rMt = newTemp(Ity_I32);
16676         assign(rMt, getIRegT(rM));
16677
16678         IRTemp argR = newTemp(Ity_I32);
16679         IRTemp oldC = newTemp(Ity_I32);
16680         compute_result_and_C_after_shift_by_imm5(
16681            dis_buf, &argR, &oldC, rMt, how, imm5, rM
16682         );
16683
16684         IRTemp oldV = newTemp(Ity_I32);
16685         assign( oldV, mk_armg_calculate_flag_v() );
16686
16687         IRTemp res = newTemp(Ity_I32);
16688         assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
16689                           mkexpr(argL), mkexpr(argR)));
16690
16691         setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
16692                            condT );
16693         DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
16694         goto decode_success;
16695      }
16696   }
16697
16698   /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
16699   /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
16700   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
16701       && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
16702           || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
16703       && INSN1(15,15) == 0
16704       && INSN1(11,8) == BITS4(1,1,1,1)) {
16705      UInt rN = INSN0(3,0);
16706      UInt rM = INSN1(3,0);
16707      if (!isBadRegT(rN) && !isBadRegT(rM)) {
16708         Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
16709         UInt how   = INSN1(5,4);
16710         UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
16711
16712         IRTemp argL = newTemp(Ity_I32);
16713         assign(argL, getIRegT(rN));
16714
16715         IRTemp rMt = newTemp(Ity_I32);
16716         assign(rMt, getIRegT(rM));
16717
16718         IRTemp argR = newTemp(Ity_I32);
16719         compute_result_and_C_after_shift_by_imm5(
16720            dis_buf, &argR, NULL, rMt, how, imm5, rM
16721         );
16722
16723         setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
16724                         argL, argR, condT );
16725
16726         DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
16727         goto decode_success;
16728      }
16729   }
16730
16731   /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
16732   /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
16733   if (INSN0(15,11) == BITS5(1,1,1,1,0)
16734       && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
16735           || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
16736       && INSN0(3,0) == BITS4(1,1,1,1)
16737       && INSN1(15,15) == 0) {
16738      UInt rD = INSN1(11,8);
16739      if (!isBadRegT(rD)) {
16740         Bool   updC  = False;
16741         UInt   bS    = INSN0(4,4);
16742         Bool   isMVN = INSN0(5,5) == 1;
16743         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
16744         IRTemp res   = newTemp(Ity_I32);
16745         assign(res, mkU32(isMVN ? ~imm32 : imm32));
16746         putIRegT(rD, mkexpr(res), condT);
16747         if (bS) {
16748            IRTemp oldV = newTemp(Ity_I32);
16749            IRTemp oldC = newTemp(Ity_I32);
16750            assign( oldV, mk_armg_calculate_flag_v() );
16751            assign( oldC, updC
16752                          ? mkU32((imm32 >> 31) & 1)
16753                          : mk_armg_calculate_flag_c() );
16754            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
16755                               condT );
16756         }
16757         DIP("%s%s.w r%u, #%u\n",
16758             isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
16759         goto decode_success;
16760      }
16761   }
16762
16763   /* -------------- (T3) MOVW Rd, #imm16 -------------- */
16764   if (INSN0(15,11) == BITS5(1,1,1,1,0)
16765       && INSN0(9,4) == BITS6(1,0,0,1,0,0)
16766       && INSN1(15,15) == 0) {
16767      UInt rD = INSN1(11,8);
16768      if (!isBadRegT(rD)) {
16769         UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
16770                      | (INSN1(14,12) << 8) | INSN1(7,0);
16771         putIRegT(rD, mkU32(imm16), condT);
16772         DIP("movw r%u, #%u\n", rD, imm16);
16773         goto decode_success;
16774      }
16775   }
16776
16777   /* ---------------- MOVT Rd, #imm16 ---------------- */
16778   if (INSN0(15,11) == BITS5(1,1,1,1,0)
16779       && INSN0(9,4) == BITS6(1,0,1,1,0,0)
16780       && INSN1(15,15) == 0) {
16781      UInt rD = INSN1(11,8);
16782      if (!isBadRegT(rD)) {
16783         UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
16784                      | (INSN1(14,12) << 8) | INSN1(7,0);
16785         IRTemp res = newTemp(Ity_I32);
16786         assign(res,
16787                binop(Iop_Or32,
16788                      binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
16789                      mkU32(imm16 << 16)));
16790         putIRegT(rD, mkexpr(res), condT);
16791         DIP("movt r%u, #%u\n", rD, imm16);
16792         goto decode_success;
16793      }
16794   }
16795
16796   /* ---------------- LD/ST reg+/-#imm8 ---------------- */
16797   /* Loads and stores of the form:
16798         op  Rt, [Rn, #-imm8]      or
16799         op  Rt, [Rn], #+/-imm8    or
16800         op  Rt, [Rn, #+/-imm8]!
16801      where op is one of
16802         ldrb ldrh ldr  ldrsb ldrsh
16803         strb strh str
16804   */
16805   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
16806      Bool   valid  = True;
16807      Bool   syned  = False;
16808      Bool   isST   = False;
16809      IRType ty     = Ity_I8;
16810      HChar* nm     = "???";
16811
16812      switch (INSN0(8,4)) {
16813         case BITS5(0,0,0,0,0):   // strb
16814            nm = "strb"; isST = True; break;
16815         case BITS5(0,0,0,0,1):   // ldrb
16816            nm = "ldrb"; break;
16817         case BITS5(1,0,0,0,1):   // ldrsb
16818            nm = "ldrsb"; syned = True; break;
16819         case BITS5(0,0,0,1,0):   // strh
16820            nm = "strh"; ty = Ity_I16; isST = True; break;
16821         case BITS5(0,0,0,1,1):   // ldrh
16822            nm = "ldrh"; ty = Ity_I16; break;
16823         case BITS5(1,0,0,1,1):   // ldrsh
16824            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
16825         case BITS5(0,0,1,0,0):   // str
16826            nm = "str"; ty = Ity_I32; isST = True; break;
16827         case BITS5(0,0,1,0,1):
16828            nm = "ldr"; ty = Ity_I32; break;  // ldr
16829         default:
16830            valid = False; break;
16831      }
16832
16833      UInt rN      = INSN0(3,0);
16834      UInt rT      = INSN1(15,12);
16835      UInt bP      = INSN1(10,10);
16836      UInt bU      = INSN1(9,9);
16837      UInt bW      = INSN1(8,8);
16838      UInt imm8    = INSN1(7,0);
16839      Bool loadsPC = False;
16840
16841      if (valid) {
16842         if (bP == 1 && bU == 1 && bW == 0)
16843            valid = False;
16844         if (bP == 0 && bW == 0)
16845            valid = False;
16846         if (rN == 15)
16847            valid = False;
16848         if (bW == 1 && rN == rT)
16849            valid = False;
16850         if (ty == Ity_I8 || ty == Ity_I16) {
16851            if (isBadRegT(rT))
16852               valid = False;
16853         } else {
16854            /* ty == Ity_I32 */
16855            if (isST && rT == 15)
16856               valid = False;
16857            if (!isST && rT == 15)
16858               loadsPC = True;
16859         }
16860      }
16861
16862      if (valid) {
16863         // if it's a branch, it can't happen in the middle of an IT block
16864         if (loadsPC)
16865            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
16866         // go uncond
16867         mk_skip_over_T32_if_cond_is_false(condT);
16868         condT = IRTemp_INVALID;
16869         // now uncond
16870
16871         IRTemp preAddr = newTemp(Ity_I32);
16872         assign(preAddr, getIRegT(rN));
16873
16874         IRTemp postAddr = newTemp(Ity_I32);
16875         assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
16876                                mkexpr(preAddr), mkU32(imm8)));
16877
16878         IRTemp transAddr = bP == 1 ? postAddr : preAddr;
16879
16880         if (isST) {
16881
16882             /* Store.  If necessary, update the base register before
16883                the store itself, so that the common idiom of "str rX,
16884                [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
16885                a.k.a "push rX") doesn't cause Memcheck to complain
16886                that the access is below the stack pointer.  Also, not
16887                updating sp before the store confuses Valgrind's
16888                dynamic stack-extending logic.  So do it before the
16889                store.  Hence we need to snarf the store data before
16890                doing the basereg update. */
16891
16892            /* get hold of the data to be stored */
16893            IRTemp oldRt = newTemp(Ity_I32);
16894            assign(oldRt, getIRegT(rT));
16895
16896            /* Update Rn if necessary. */
16897            if (bW == 1) {
16898               vassert(rN != rT); // assured by validity check above
16899               putIRegT(rN, mkexpr(postAddr), IRTemp_INVALID);
16900            }
16901
16902            /* generate the transfer */
16903            switch (ty) {
16904               case Ity_I8:
16905                  storeLE(mkexpr(transAddr),
16906                                 unop(Iop_32to8, mkexpr(oldRt)));
16907                  break;
16908               case Ity_I16:
16909                  storeLE(mkexpr(transAddr),
16910                          unop(Iop_32to16, mkexpr(oldRt)));
16911                  break;
16912              case Ity_I32:
16913                  storeLE(mkexpr(transAddr), mkexpr(oldRt));
16914                  break;
16915              default:
16916                 vassert(0);
16917            }
16918
16919         } else {
16920
16921            /* Load. */
16922
16923            /* generate the transfer */
16924            IRTemp newRt = newTemp(Ity_I32);
16925            IROp   widen = Iop_INVALID;
16926            switch (ty) {
16927               case Ity_I8:
16928                  widen = syned ? Iop_8Sto32 : Iop_8Uto32; break;
16929               case Ity_I16:
16930                  widen = syned ? Iop_16Sto32 : Iop_16Uto32; break;
16931               case Ity_I32:
16932                  break;
16933               default:
16934                  vassert(0);
16935            }
16936            if (widen == Iop_INVALID) {
16937               assign(newRt, loadLE(ty, mkexpr(transAddr)));
16938            } else {
16939               assign(newRt, unop(widen, loadLE(ty, mkexpr(transAddr))));
16940            }
16941            if (loadsPC) {
16942               vassert(rT == 15);
16943               llPutIReg(rT, mkexpr(newRt));
16944            } else {
16945               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
16946            }
16947
16948            if (loadsPC) {
16949               /* Presumably this is an interworking branch. */
16950               irsb->next = mkexpr(newRt);
16951               irsb->jumpkind = Ijk_Boring;  /* or _Ret ? */
16952               dres.whatNext  = Dis_StopHere;
16953            }
16954
16955            /* Update Rn if necessary. */
16956            if (bW == 1) {
16957               vassert(rN != rT); // assured by validity check above
16958               putIRegT(rN, mkexpr(postAddr), IRTemp_INVALID);
16959            }
16960         }
16961
16962         if (bP == 1 && bW == 0) {
16963            DIP("%s.w r%u, [r%u, #%c%u]\n",
16964                nm, rT, rN, bU ? '+' : '-', imm8);
16965         }
16966         else if (bP == 1 && bW == 1) {
16967            DIP("%s.w r%u, [r%u, #%c%u]!\n",
16968                nm, rT, rN, bU ? '+' : '-', imm8);
16969         }
16970         else {
16971            vassert(bP == 0 && bW == 1);
16972            DIP("%s.w r%u, [r%u], #%c%u\n",
16973                nm, rT, rN, bU ? '+' : '-', imm8);
16974         }
16975
16976         goto decode_success;
16977      }
16978   }
16979
16980   /* ------------- LD/ST reg+(reg<<imm2) ------------- */
16981   /* Loads and stores of the form:
16982         op  Rt, [Rn, Rm, LSL #imm8]
16983      where op is one of
16984         ldrb ldrh ldr  ldrsb ldrsh
16985         strb strh str
16986   */
16987   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
16988       && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
16989      Bool   valid  = True;
16990      Bool   syned  = False;
16991      Bool   isST   = False;
16992      IRType ty     = Ity_I8;
16993      HChar* nm     = "???";
16994
16995      switch (INSN0(8,4)) {
16996         case BITS5(0,0,0,0,0):   // strb
16997            nm = "strb"; isST = True; break;
16998         case BITS5(0,0,0,0,1):   // ldrb
16999            nm = "ldrb"; break;
17000         case BITS5(1,0,0,0,1):   // ldrsb
17001            nm = "ldrsb"; syned = True; break;
17002         case BITS5(0,0,0,1,0):   // strh
17003            nm = "strh"; ty = Ity_I16; isST = True; break;
17004         case BITS5(0,0,0,1,1):   // ldrh
17005            nm = "ldrh"; ty = Ity_I16; break;
17006         case BITS5(1,0,0,1,1):   // ldrsh
17007            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
17008         case BITS5(0,0,1,0,0):   // str
17009            nm = "str"; ty = Ity_I32; isST = True; break;
17010         case BITS5(0,0,1,0,1):
17011            nm = "ldr"; ty = Ity_I32; break;  // ldr
17012         default:
17013            valid = False; break;
17014      }
17015
17016      UInt rN      = INSN0(3,0);
17017      UInt rM      = INSN1(3,0);
17018      UInt rT      = INSN1(15,12);
17019      UInt imm2    = INSN1(5,4);
17020      Bool loadsPC = False;
17021
17022      if (ty == Ity_I8 || ty == Ity_I16) {
17023         /* all 8- and 16-bit load and store cases have the
17024            same exclusion set. */
17025         if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
17026            valid = False;
17027      } else {
17028         vassert(ty == Ity_I32);
17029         if (rN == 15 || isBadRegT(rM))
17030            valid = False;
17031         if (isST && rT == 15)
17032            valid = False;
17033         /* If it is a load and rT is 15, that's only allowable if we
17034            not in an IT block, or are the last in it.  Need to insert
17035            a dynamic check for that. */
17036         if (!isST && rT == 15)
17037            loadsPC = True;
17038      }
17039
17040      if (valid) {
17041         // if it's a branch, it can't happen in the middle of an IT block
17042         if (loadsPC)
17043            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
17044         // go uncond
17045         mk_skip_over_T32_if_cond_is_false(condT);
17046         condT = IRTemp_INVALID;
17047         // now uncond
17048
17049         IRTemp transAddr = newTemp(Ity_I32);
17050         assign(transAddr,
17051                binop( Iop_Add32,
17052                       getIRegT(rN),
17053                       binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
17054
17055         if (isST) {
17056            IRTemp oldRt = newTemp(Ity_I32);
17057            assign(oldRt, getIRegT(rT));
17058            switch (ty) {
17059               case Ity_I8:
17060                  storeLE(mkexpr(transAddr),
17061                                 unop(Iop_32to8, mkexpr(oldRt)));
17062                  break;
17063               case Ity_I16:
17064                  storeLE(mkexpr(transAddr),
17065                          unop(Iop_32to16, mkexpr(oldRt)));
17066                  break;
17067              case Ity_I32:
17068                  storeLE(mkexpr(transAddr), mkexpr(oldRt));
17069                  break;
17070              default:
17071                 vassert(0);
17072            }
17073         } else {
17074            IRTemp newRt = newTemp(Ity_I32);
17075            IROp   widen = Iop_INVALID;
17076            switch (ty) {
17077               case Ity_I8:
17078                  widen = syned ? Iop_8Sto32 : Iop_8Uto32; break;
17079               case Ity_I16:
17080                  widen = syned ? Iop_16Sto32 : Iop_16Uto32; break;
17081               case Ity_I32:
17082                  break;
17083               default:
17084                  vassert(0);
17085            }
17086            if (widen == Iop_INVALID) {
17087               assign(newRt, loadLE(ty, mkexpr(transAddr)));
17088            } else {
17089               assign(newRt, unop(widen, loadLE(ty, mkexpr(transAddr))));
17090            }
17091
17092            /* If we're loading the PC, putIRegT will assert.  So go
17093               direct via llPutIReg.  In all other cases use putIRegT
17094               as it is safer (although could simply use llPutIReg for
17095               _all_ cases here.) */
17096            if (loadsPC) {
17097               vassert(rT == 15);
17098               llPutIReg(rT, mkexpr(newRt));
17099            } else {
17100               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
17101            }
17102
17103            if (loadsPC) {
17104               /* Presumably this is an interworking branch. */
17105               irsb->next = mkexpr(newRt);
17106               irsb->jumpkind = Ijk_Boring;  /* or _Ret ? */
17107               dres.whatNext  = Dis_StopHere;
17108            }
17109         }
17110
17111         DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
17112             nm, rT, rN, rM, imm2);
17113
17114         goto decode_success;
17115      }
17116   }
17117
17118   /* --------------- LD/ST reg+imm12 --------------- */
17119   /* Loads and stores of the form:
17120         op  Rt, [Rn, +#imm12]
17121      where op is one of
17122         ldrb ldrh ldr  ldrsb ldrsh
17123         strb strh str
17124   */
17125   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
17126      Bool   valid  = True;
17127      Bool   syned  = False;
17128      Bool   isST   = False;
17129      IRType ty     = Ity_I8;
17130      HChar* nm     = "???";
17131
17132      switch (INSN0(8,4)) {
17133         case BITS5(0,1,0,0,0):   // strb
17134            nm = "strb"; isST = True; break;
17135         case BITS5(0,1,0,0,1):   // ldrb
17136            nm = "ldrb"; break;
17137         case BITS5(1,1,0,0,1):   // ldrsb
17138            nm = "ldrsb"; syned = True; break;
17139         case BITS5(0,1,0,1,0):   // strh
17140            nm = "strh"; ty = Ity_I16; isST = True; break;
17141         case BITS5(0,1,0,1,1):   // ldrh
17142            nm = "ldrh"; ty = Ity_I16; break;
17143         case BITS5(1,1,0,1,1):   // ldrsh
17144            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
17145         case BITS5(0,1,1,0,0):   // str
17146            nm = "str"; ty = Ity_I32; isST = True; break;
17147         case BITS5(0,1,1,0,1):
17148            nm = "ldr"; ty = Ity_I32; break;  // ldr
17149         default:
17150            valid = False; break;
17151      }
17152
17153      UInt rN      = INSN0(3,0);
17154      UInt rT      = INSN1(15,12);
17155      UInt imm12   = INSN1(11,0);
17156      Bool loadsPC = False;
17157
17158      if (ty == Ity_I8 || ty == Ity_I16) {
17159         /* all 8- and 16-bit load and store cases have the
17160            same exclusion set. */
17161         if (rN == 15 || isBadRegT(rT))
17162            valid = False;
17163      } else {
17164         vassert(ty == Ity_I32);
17165         if (isST) {
17166            if (rN == 15 || rT == 15)
17167               valid = False;
17168         } else {
17169            /* For a 32-bit load, rT == 15 is only allowable if we not
17170               in an IT block, or are the last in it.  Need to insert
17171               a dynamic check for that.  Also, in this particular
17172               case, rN == 15 is allowable.  In this case however, the
17173               value obtained for rN is (apparently)
17174               "word-align(address of current insn + 4)". */
17175            if (rT == 15)
17176               loadsPC = True;
17177         }
17178      }
17179
17180      if (valid) {
17181         // if it's a branch, it can't happen in the middle of an IT block
17182         if (loadsPC)
17183            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
17184         // go uncond
17185         mk_skip_over_T32_if_cond_is_false(condT);
17186         condT = IRTemp_INVALID;
17187         // now uncond
17188
17189         IRTemp rNt = newTemp(Ity_I32);
17190         if (rN == 15) {
17191            vassert(ty == Ity_I32 && !isST);
17192            assign(rNt, binop(Iop_And32, getIRegT(rN), mkU32(~3)));
17193         } else {
17194            assign(rNt, getIRegT(rN));
17195         }
17196
17197         IRTemp transAddr = newTemp(Ity_I32);
17198         assign(transAddr,
17199                binop( Iop_Add32, mkexpr(rNt), mkU32(imm12) ));
17200
17201         if (isST) {
17202            IRTemp oldRt = newTemp(Ity_I32);
17203            assign(oldRt, getIRegT(rT));
17204            switch (ty) {
17205               case Ity_I8:
17206                  storeLE(mkexpr(transAddr),
17207                                 unop(Iop_32to8, mkexpr(oldRt)));
17208                  break;
17209               case Ity_I16:
17210                  storeLE(mkexpr(transAddr),
17211                          unop(Iop_32to16, mkexpr(oldRt)));
17212                  break;
17213              case Ity_I32:
17214                  storeLE(mkexpr(transAddr), mkexpr(oldRt));
17215                  break;
17216              default:
17217                 vassert(0);
17218            }
17219         } else {
17220            IRTemp newRt = newTemp(Ity_I32);
17221            IROp   widen = Iop_INVALID;
17222            switch (ty) {
17223               case Ity_I8:
17224                  widen = syned ? Iop_8Sto32 : Iop_8Uto32; break;
17225               case Ity_I16:
17226                  widen = syned ? Iop_16Sto32 : Iop_16Uto32; break;
17227               case Ity_I32:
17228                  break;
17229               default:
17230                  vassert(0);
17231            }
17232            if (widen == Iop_INVALID) {
17233               assign(newRt, loadLE(ty, mkexpr(transAddr)));
17234            } else {
17235               assign(newRt, unop(widen, loadLE(ty, mkexpr(transAddr))));
17236            }
17237            putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
17238
17239            if (loadsPC) {
17240               /* Presumably this is an interworking branch. */
17241               irsb->next = mkexpr(newRt);
17242               irsb->jumpkind = Ijk_Boring;  /* or _Ret ? */
17243               dres.whatNext  = Dis_StopHere;
17244            }
17245         }
17246
17247         DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
17248
17249         goto decode_success;
17250      }
17251   }
17252
17253   /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
17254   /* Doubleword loads and stores of the form:
17255         ldrd/strd  Rt, Rt2, [Rn, #-imm8]      or
17256         ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
17257         ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
17258   */
17259   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
17260      UInt bP   = INSN0(8,8);
17261      UInt bU   = INSN0(7,7);
17262      UInt bW   = INSN0(5,5);
17263      UInt bL   = INSN0(4,4);  // 1: load  0: store
17264      UInt rN   = INSN0(3,0);
17265      UInt rT   = INSN1(15,12);
17266      UInt rT2  = INSN1(11,8);
17267      UInt imm8 = INSN1(7,0);
17268
17269      Bool valid = True;
17270      if (bP == 0 && bW == 0)                 valid = False;
17271      if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
17272      if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
17273      if (rN == 15)                           valid = False;
17274      if (bL == 1 && rT == rT2)               valid = False;
17275
17276      if (valid) {
17277         // go uncond
17278         mk_skip_over_T32_if_cond_is_false(condT);
17279         condT = IRTemp_INVALID;
17280         // now uncond
17281
17282         IRTemp preAddr = newTemp(Ity_I32);
17283         assign(preAddr, getIRegT(rN));
17284
17285         IRTemp postAddr = newTemp(Ity_I32);
17286         assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
17287                                mkexpr(preAddr), mkU32(imm8 << 2)));
17288
17289         IRTemp transAddr = bP == 1 ? postAddr : preAddr;
17290
17291         if (bL == 0) {
17292            IRTemp oldRt  = newTemp(Ity_I32);
17293            IRTemp oldRt2 = newTemp(Ity_I32);
17294            assign(oldRt,  getIRegT(rT));
17295            assign(oldRt2, getIRegT(rT2));
17296            storeLE(mkexpr(transAddr),
17297                    mkexpr(oldRt));
17298            storeLE(binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
17299                    mkexpr(oldRt2));
17300         } else {
17301            IRTemp newRt  = newTemp(Ity_I32);
17302            IRTemp newRt2 = newTemp(Ity_I32);
17303            assign(newRt,
17304                   loadLE(Ity_I32,
17305                          mkexpr(transAddr)));
17306            assign(newRt2,
17307                   loadLE(Ity_I32,
17308                          binop(Iop_Add32, mkexpr(transAddr), mkU32(4))));
17309            putIRegT(rT,  mkexpr(newRt), IRTemp_INVALID);
17310            putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
17311         }
17312
17313         if (bW == 1) {
17314            putIRegT(rN, mkexpr(postAddr), IRTemp_INVALID);
17315         }
17316
17317         HChar* nm = bL ? "ldrd" : "strd";
17318
17319         if (bP == 1 && bW == 0) {
17320            DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
17321                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
17322         }
17323         else if (bP == 1 && bW == 1) {
17324            DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
17325                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
17326         }
17327         else {
17328            vassert(bP == 0 && bW == 1);
17329            DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
17330                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
17331         }
17332
17333         goto decode_success;
17334      }
17335   }
17336
17337   /* -------------- (T3) Bcond.W label -------------- */
17338   /* This variant carries its own condition, so can't be part of an
17339      IT block ... */
17340   if (INSN0(15,11) == BITS5(1,1,1,1,0)
17341       && INSN1(15,14) == BITS2(1,0)
17342       && INSN1(12,12) == 0) {
17343      UInt cond = INSN0(9,6);
17344      if (cond != ARMCondAL && cond != ARMCondNV) {
17345         Int simm21
17346            =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
17347              | (INSN1(11,11) << (1 + 6 + 11 + 1))
17348              | (INSN1(13,13) << (6 + 11 + 1))
17349              | (INSN0(5,0)   << (11 + 1))
17350              | (INSN1(10,0)  << 1);
17351         simm21 = (simm21 << 11) >> 11;
17352
17353         vassert(0 == (guest_R15_curr_instr_notENC & 1));
17354         UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
17355
17356         /* Not allowed in an IT block; SIGILL if so. */
17357         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
17358
17359         IRTemp kondT = newTemp(Ity_I32);
17360         assign( kondT, mk_armg_calculate_condition(cond) );
17361         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
17362                            Ijk_Boring,
17363                            IRConst_U32(dst | 1/*CPSR.T*/) ));
17364         irsb->next = mkU32( (guest_R15_curr_instr_notENC + 4)
17365                             | 1 /*CPSR.T*/ );
17366         irsb->jumpkind = Ijk_Boring;
17367         dres.whatNext  = Dis_StopHere;
17368         DIP("b%s.w 0x%x\n", nCC(cond), dst);
17369         goto decode_success;
17370      }
17371   }
17372
17373   /* ---------------- (T4) B.W label ---------------- */
17374   /* ... whereas this variant doesn't carry its own condition, so it
17375      has to be either unconditional or the conditional by virtue of
17376      being the last in an IT block.  The upside is that there's 4
17377      more bits available for the jump offset, so it has a 16-times
17378      greater branch range than the T3 variant. */
17379   if (INSN0(15,11) == BITS5(1,1,1,1,0)
17380       && INSN1(15,14) == BITS2(1,0)
17381       && INSN1(12,12) == 1) {
17382      if (1) {
17383         UInt bS  = INSN0(10,10);
17384         UInt bJ1 = INSN1(13,13);
17385         UInt bJ2 = INSN1(11,11);
17386         UInt bI1 = 1 ^ (bJ1 ^ bS);
17387         UInt bI2 = 1 ^ (bJ2 ^ bS);
17388         Int simm25
17389            =   (bS          << (1 + 1 + 10 + 11 + 1))
17390              | (bI1         << (1 + 10 + 11 + 1))
17391              | (bI2         << (10 + 11 + 1))
17392              | (INSN0(9,0)  << (11 + 1))
17393              | (INSN1(10,0) << 1);
17394         simm25 = (simm25 << 7) >> 7;
17395
17396         vassert(0 == (guest_R15_curr_instr_notENC & 1));
17397         UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
17398
17399         /* If in an IT block, must be the last insn. */
17400         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
17401
17402         // go uncond
17403         mk_skip_over_T32_if_cond_is_false(condT);
17404         condT = IRTemp_INVALID;
17405         // now uncond
17406
17407         // branch to dst
17408         irsb->next = mkU32( dst | 1 /*CPSR.T*/ );
17409         irsb->jumpkind = Ijk_Boring;
17410         dres.whatNext  = Dis_StopHere;
17411         DIP("b.w 0x%x\n", dst);
17412         goto decode_success;
17413      }
17414   }
17415
17416   /* ------------------ TBB, TBH ------------------ */
17417   if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
17418      UInt rN = INSN0(3,0);
17419      UInt rM = INSN1(3,0);
17420      UInt bH = INSN1(4,4);
17421      if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
17422         /* Must be last or not-in IT block */
17423         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
17424         /* Go uncond */
17425         mk_skip_over_T32_if_cond_is_false(condT);
17426         condT = IRTemp_INVALID;
17427
17428         IRExpr* ea
17429             = binop(Iop_Add32,
17430                     getIRegT(rN),
17431                     bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
17432                        : getIRegT(rM));
17433
17434         IRTemp delta = newTemp(Ity_I32);
17435         if (bH) {
17436            assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
17437         } else {
17438            assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
17439         }
17440
17441         irsb->next
17442            = binop(Iop_Or32,
17443                    binop(Iop_Add32,
17444                          getIRegT(15),
17445                          binop(Iop_Shl32, mkexpr(delta), mkU8(1))
17446                    ),
17447                    mkU32(1)
17448              );
17449         irsb->jumpkind = Ijk_Boring;
17450         dres.whatNext = Dis_StopHere;
17451         DIP("tb%c [r%u, r%u%s]\n",
17452             bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
17453         goto decode_success;
17454      }
17455   }
17456
17457   /* ------------------ UBFX ------------------ */
17458   /* ------------------ SBFX ------------------ */
17459   /* There's also ARM versions of same, but it doesn't seem worth the
17460      hassle to common up the handling (it's only a couple of C
17461      statements). */
17462   if ((INSN0(15,4) == 0xF3C // UBFX
17463        || INSN0(15,4) == 0xF34) // SBFX
17464       && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
17465      UInt rN  = INSN0(3,0);
17466      UInt rD  = INSN1(11,8);
17467      UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
17468      UInt wm1 = INSN1(4,0);
17469      UInt msb =  lsb + wm1;
17470      if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
17471         Bool   isU  = INSN0(15,4) == 0xF3C;
17472         IRTemp src  = newTemp(Ity_I32);
17473         IRTemp tmp  = newTemp(Ity_I32);
17474         IRTemp res  = newTemp(Ity_I32);
17475         UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
17476         vassert(msb >= 0 && msb <= 31);
17477         vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
17478
17479         assign(src, getIRegT(rN));
17480         assign(tmp, binop(Iop_And32,
17481                           binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
17482                           mkU32(mask)));
17483         assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
17484                           binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
17485                           mkU8(31-wm1)));
17486
17487         putIRegT(rD, mkexpr(res), condT);
17488
17489         DIP("%s r%u, r%u, #%u, #%u\n",
17490             isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
17491         goto decode_success;
17492      }
17493   }
17494
17495   /* ------------------ UXTB ------------------ */
17496   /* ------------------ UXTH ------------------ */
17497   /* ------------------ SXTB ------------------ */
17498   /* ------------------ SXTH ------------------ */
17499   /* ----------------- UXTB16 ----------------- */
17500   /* ----------------- SXTB16 ----------------- */
17501   /* FIXME: this is an exact duplicate of the ARM version.  They
17502      should be commoned up. */
17503   if ((INSN0(15,0) == 0xFA5F     // UXTB
17504        || INSN0(15,0) == 0xFA1F  // UXTH
17505        || INSN0(15,0) == 0xFA4F  // SXTB
17506        || INSN0(15,0) == 0xFA0F  // SXTH
17507        || INSN0(15,0) == 0xFA3F  // UXTB16
17508        || INSN0(15,0) == 0xFA2F) // SXTB16
17509       && INSN1(15,12) == BITS4(1,1,1,1)
17510       && INSN1(7,6) == BITS2(1,0)) {
17511      UInt rD = INSN1(11,8);
17512      UInt rM = INSN1(3,0);
17513      UInt rot = INSN1(5,4);
17514      if (!isBadRegT(rD) && !isBadRegT(rM)) {
17515         HChar* nm = "???";
17516         IRTemp srcT = newTemp(Ity_I32);
17517         IRTemp rotT = newTemp(Ity_I32);
17518         IRTemp dstT = newTemp(Ity_I32);
17519         assign(srcT, getIRegT(rM));
17520         assign(rotT, genROR32(srcT, 8 * rot));
17521         switch (INSN0(15,0)) {
17522            case 0xFA5F: // UXTB
17523               nm = "uxtb";
17524               assign(dstT, unop(Iop_8Uto32,
17525                                 unop(Iop_32to8, mkexpr(rotT))));
17526               break;
17527            case 0xFA1F: // UXTH
17528               nm = "uxth";
17529               assign(dstT, unop(Iop_16Uto32,
17530                                 unop(Iop_32to16, mkexpr(rotT))));
17531               break;
17532            case 0xFA4F: // SXTB
17533               nm = "sxtb";
17534               assign(dstT, unop(Iop_8Sto32,
17535                                 unop(Iop_32to8, mkexpr(rotT))));
17536               break;
17537            case 0xFA0F: // SXTH
17538               nm = "sxth";
17539               assign(dstT, unop(Iop_16Sto32,
17540                                 unop(Iop_32to16, mkexpr(rotT))));
17541               break;
17542            case 0xFA3F: // UXTB16
17543               nm = "uxtb16";
17544               assign(dstT, binop(Iop_And32, mkexpr(rotT),
17545                                             mkU32(0x00FF00FF)));
17546               break;
17547            case 0xFA2F: { // SXTB16
17548               nm = "sxtb16";
17549               IRTemp lo32 = newTemp(Ity_I32);
17550               IRTemp hi32 = newTemp(Ity_I32);
17551               assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
17552               assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
17553               assign(
17554                  dstT,
17555                  binop(Iop_Or32,
17556                        binop(Iop_And32,
17557                              unop(Iop_8Sto32,
17558                                   unop(Iop_32to8, mkexpr(lo32))),
17559                              mkU32(0xFFFF)),
17560                        binop(Iop_Shl32,
17561                              unop(Iop_8Sto32,
17562                                   unop(Iop_32to8, mkexpr(hi32))),
17563                              mkU8(16))
17564               ));
17565               break;
17566            }
17567            default:
17568               vassert(0);
17569         }
17570         putIRegT(rD, mkexpr(dstT), condT);
17571         DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
17572         goto decode_success;
17573      }
17574   }
17575
17576   /* -------------- MUL.W Rd, Rn, Rm -------------- */
17577   if (INSN0(15,4) == 0xFB0
17578       && (INSN1(15,0) & 0xF0F0) == 0xF000) {
17579      UInt rN = INSN0(3,0);
17580      UInt rD = INSN1(11,8);
17581      UInt rM = INSN1(3,0);
17582      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
17583         IRTemp res = newTemp(Ity_I32);
17584         assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
17585         putIRegT(rD, mkexpr(res), condT);
17586         DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
17587         goto decode_success;
17588      }
17589   }
17590
17591   /* ------------------ {U,S}MULL ------------------ */
17592   if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
17593       && INSN1(7,4) == BITS4(0,0,0,0)) {
17594      UInt isU  = INSN0(5,5);
17595      UInt rN   = INSN0(3,0);
17596      UInt rDlo = INSN1(15,12);
17597      UInt rDhi = INSN1(11,8);
17598      UInt rM   = INSN1(3,0);
17599      if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
17600          && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
17601         IRTemp res   = newTemp(Ity_I64);
17602         assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
17603                           getIRegT(rN), getIRegT(rM)));
17604         putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
17605         putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
17606         DIP("%cmull r%u, r%u, r%u, r%u\n",
17607             isU ? 'u' : 's', rDlo, rDhi, rN, rM);
17608         goto decode_success;
17609      }
17610   }
17611
17612   /* ------------------ ML{A,S} ------------------ */
17613   if (INSN0(15,4) == 0xFB0
17614       && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
17615           || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
17616      UInt rN = INSN0(3,0);
17617      UInt rA = INSN1(15,12);
17618      UInt rD = INSN1(11,8);
17619      UInt rM = INSN1(3,0);
17620      if (!isBadRegT(rD) && !isBadRegT(rN)
17621          && !isBadRegT(rM) && !isBadRegT(rA)) {
17622         Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
17623         IRTemp res   = newTemp(Ity_I32);
17624         assign(res,
17625                binop(isMLA ? Iop_Add32 : Iop_Sub32,
17626                      getIRegT(rA),
17627                      binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
17628         putIRegT(rD, mkexpr(res), condT);
17629         DIP("%s r%u, r%u, r%u, r%u\n",
17630             isMLA ? "mla" : "mls", rD, rN, rM, rA);
17631         goto decode_success;
17632      }
17633   }
17634
17635   /* ------------------ (T3) ADR ------------------ */
17636   if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
17637       && INSN1(15,15) == 0) {
17638      /* rD = align4(PC) + imm32 */
17639      UInt rD = INSN1(11,8);
17640      if (!isBadRegT(rD)) {
17641         UInt imm32 = (INSN0(10,10) << 11)
17642                      | (INSN1(14,12) << 8) | INSN1(7,0);
17643         putIRegT(rD, binop(Iop_Add32,
17644                            binop(Iop_And32, getIRegT(15), mkU32(~3U)),
17645                            mkU32(imm32)),
17646                      condT);
17647         DIP("add r%u, pc, #%u\n", rD, imm32);
17648         goto decode_success;
17649      }
17650   }
17651
17652   /* ----------------- (T1) UMLAL ----------------- */
17653   /* ----------------- (T1) SMLAL ----------------- */
17654   if ((INSN0(15,4) == 0xFBE // UMLAL
17655        || INSN0(15,4) == 0xFBC) // SMLAL
17656       && INSN1(7,4) == BITS4(0,0,0,0)) {
17657      UInt rN   = INSN0(3,0);
17658      UInt rDlo = INSN1(15,12);
17659      UInt rDhi = INSN1(11,8);
17660      UInt rM   = INSN1(3,0);
17661      if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
17662          && !isBadRegT(rM) && rDhi != rDlo) {
17663         Bool   isS   = INSN0(15,4) == 0xFBC;
17664         IRTemp argL  = newTemp(Ity_I32);
17665         IRTemp argR  = newTemp(Ity_I32);
17666         IRTemp old   = newTemp(Ity_I64);
17667         IRTemp res   = newTemp(Ity_I64);
17668         IRTemp resHi = newTemp(Ity_I32);
17669         IRTemp resLo = newTemp(Ity_I32);
17670         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
17671         assign( argL, getIRegT(rM));
17672         assign( argR, getIRegT(rN));
17673         assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
17674         assign( res, binop(Iop_Add64,
17675                            mkexpr(old),
17676                            binop(mulOp, mkexpr(argL), mkexpr(argR))) );
17677         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17678         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17679         putIRegT( rDhi, mkexpr(resHi), condT );
17680         putIRegT( rDlo, mkexpr(resLo), condT );
17681         DIP("%cmlal r%u, r%u, r%u, r%u\n",
17682             isS ? 's' : 'u', rDlo, rDhi, rN, rM);
17683         goto decode_success;
17684      }
17685   }
17686
17687   /* ------------------ (T2) ADR ------------------ */
17688   if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
17689       && INSN1(15,15) == 0) {
17690      /* rD = align4(PC) - imm32 */
17691      UInt rD = INSN1(11,8);
17692      if (!isBadRegT(rD)) {
17693         UInt imm32 = (INSN0(10,10) << 11)
17694                      | (INSN1(14,12) << 8) | INSN1(7,0);
17695         putIRegT(rD, binop(Iop_Sub32,
17696                            binop(Iop_And32, getIRegT(15), mkU32(~3U)),
17697                            mkU32(imm32)),
17698                      condT);
17699         DIP("sub r%u, pc, #%u\n", rD, imm32);
17700         goto decode_success;
17701      }
17702   }
17703
17704   /* ------------------- (T1) BFI ------------------- */
17705   /* ------------------- (T1) BFC ------------------- */
17706   if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
17707      UInt rD  = INSN1(11,8);
17708      UInt rN  = INSN0(3,0);
17709      UInt msb = INSN1(4,0);
17710      UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
17711      if (isBadRegT(rD) || rN == 13 || msb < lsb) {
17712         /* undecodable; fall through */
17713      } else {
17714         IRTemp src    = newTemp(Ity_I32);
17715         IRTemp olddst = newTemp(Ity_I32);
17716         IRTemp newdst = newTemp(Ity_I32);
17717         UInt   mask = 1 << (msb - lsb);
17718         mask = (mask - 1) + mask;
17719         vassert(mask != 0); // guaranteed by "msb < lsb" check above
17720         mask <<= lsb;
17721
17722         assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
17723         assign(olddst, getIRegT(rD));
17724         assign(newdst,
17725                binop(Iop_Or32,
17726                   binop(Iop_And32,
17727                         binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
17728                         mkU32(mask)),
17729                   binop(Iop_And32,
17730                         mkexpr(olddst),
17731                         mkU32(~mask)))
17732               );
17733
17734         putIRegT(rD, mkexpr(newdst), condT);
17735
17736         if (rN == 15) {
17737            DIP("bfc r%u, #%u, #%u\n",
17738                rD, lsb, msb-lsb+1);
17739         } else {
17740            DIP("bfi r%u, r%u, #%u, #%u\n",
17741                rD, rN, lsb, msb-lsb+1);
17742         }
17743         goto decode_success;
17744      }
17745   }
17746
17747   /* ------------------- (T1) SXTAH ------------------- */
17748   /* ------------------- (T1) UXTAH ------------------- */
17749   if ((INSN0(15,4) == 0xFA1      // UXTAH
17750        || INSN0(15,4) == 0xFA0)  // SXTAH
17751       && INSN1(15,12) == BITS4(1,1,1,1)
17752       && INSN1(7,6) == BITS2(1,0)) {
17753      Bool isU = INSN0(15,4) == 0xFA1;
17754      UInt rN  = INSN0(3,0);
17755      UInt rD  = INSN1(11,8);
17756      UInt rM  = INSN1(3,0);
17757      UInt rot = INSN1(5,4);
17758      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
17759         IRTemp srcL = newTemp(Ity_I32);
17760         IRTemp srcR = newTemp(Ity_I32);
17761         IRTemp res  = newTemp(Ity_I32);
17762         assign(srcR, getIRegT(rM));
17763         assign(srcL, getIRegT(rN));
17764         assign(res,  binop(Iop_Add32,
17765                            mkexpr(srcL),
17766                            unop(isU ? Iop_16Uto32 : Iop_16Sto32,
17767                                 unop(Iop_32to16,
17768                                      genROR32(srcR, 8 * rot)))));
17769         putIRegT(rD, mkexpr(res), condT);
17770         DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
17771             isU ? 'u' : 's', rD, rN, rM, rot);
17772         goto decode_success;
17773      }
17774   }
17775
17776   /* ------------------- (T1) SXTAB ------------------- */
17777   /* ------------------- (T1) UXTAB ------------------- */
17778   if ((INSN0(15,4) == 0xFA5      // UXTAB
17779        || INSN0(15,4) == 0xFA4)  // SXTAB
17780       && INSN1(15,12) == BITS4(1,1,1,1)
17781       && INSN1(7,6) == BITS2(1,0)) {
17782      Bool isU = INSN0(15,4) == 0xFA5;
17783      UInt rN  = INSN0(3,0);
17784      UInt rD  = INSN1(11,8);
17785      UInt rM  = INSN1(3,0);
17786      UInt rot = INSN1(5,4);
17787      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
17788         IRTemp srcL = newTemp(Ity_I32);
17789         IRTemp srcR = newTemp(Ity_I32);
17790         IRTemp res  = newTemp(Ity_I32);
17791         assign(srcR, getIRegT(rM));
17792         assign(srcL, getIRegT(rN));
17793         assign(res,  binop(Iop_Add32,
17794                            mkexpr(srcL),
17795                            unop(isU ? Iop_8Uto32 : Iop_8Sto32,
17796                                 unop(Iop_32to8,
17797                                      genROR32(srcR, 8 * rot)))));
17798         putIRegT(rD, mkexpr(res), condT);
17799         DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
17800             isU ? 'u' : 's', rD, rN, rM, rot);
17801         goto decode_success;
17802      }
17803   }
17804
17805   /* ------------------- (T1) CLZ ------------------- */
17806   if (INSN0(15,4) == 0xFAB
17807       && INSN1(15,12) == BITS4(1,1,1,1)
17808       && INSN1(7,4) == BITS4(1,0,0,0)) {
17809      UInt rM1 = INSN0(3,0);
17810      UInt rD  = INSN1(11,8);
17811      UInt rM2 = INSN1(3,0);
17812      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
17813         IRTemp arg = newTemp(Ity_I32);
17814         IRTemp res = newTemp(Ity_I32);
17815         assign(arg, getIRegT(rM1));
17816         assign(res, IRExpr_Mux0X(
17817                        unop(Iop_1Uto8,binop(Iop_CmpEQ32,
17818                                             mkexpr(arg),
17819                                             mkU32(0))),
17820                        unop(Iop_Clz32, mkexpr(arg)),
17821                        mkU32(32)
17822         ));
17823         putIRegT(rD, mkexpr(res), condT);
17824         DIP("clz r%u, r%u\n", rD, rM1);
17825         goto decode_success;
17826      }
17827   }
17828
17829   /* ------------------- (T1) RBIT ------------------- */
17830   if (INSN0(15,4) == 0xFA9
17831       && INSN1(15,12) == BITS4(1,1,1,1)
17832       && INSN1(7,4) == BITS4(1,0,1,0)) {
17833      UInt rM1 = INSN0(3,0);
17834      UInt rD  = INSN1(11,8);
17835      UInt rM2 = INSN1(3,0);
17836      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
17837         IRTemp arg = newTemp(Ity_I32);
17838         assign(arg, getIRegT(rM1));
17839         IRTemp res = gen_BITREV(arg);
17840         putIRegT(rD, mkexpr(res), condT);
17841         DIP("rbit r%u, r%u\n", rD, rM1);
17842         goto decode_success;
17843      }
17844   }
17845
17846   /* ------------------- (T2) REV   ------------------- */
17847   /* ------------------- (T2) REV16 ------------------- */
17848   if (INSN0(15,4) == 0xFA9
17849       && INSN1(15,12) == BITS4(1,1,1,1)
17850       && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
17851           || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
17852      UInt rM1   = INSN0(3,0);
17853      UInt rD    = INSN1(11,8);
17854      UInt rM2   = INSN1(3,0);
17855      Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
17856      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
17857         IRTemp arg = newTemp(Ity_I32);
17858         assign(arg, getIRegT(rM1));
17859         IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
17860         putIRegT(rD, mkexpr(res), condT);
17861         DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
17862         goto decode_success;
17863      }
17864   }
17865
17866   /* -------------- (T1) MSR apsr, reg -------------- */
17867   if (INSN0(15,4) == 0xF38
17868       && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
17869      UInt rN          = INSN0(3,0);
17870      UInt write_ge    = INSN1(10,10);
17871      UInt write_nzcvq = INSN1(11,11);
17872      if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
17873         IRTemp rNt = newTemp(Ity_I32);
17874         assign(rNt, getIRegT(rN));
17875         desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
17876         DIP("msr cpsr_%s%s, r%u\n",
17877             write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
17878         goto decode_success;
17879      }
17880   }
17881
17882   /* -------------- (T1) MRS reg, apsr -------------- */
17883   if (INSN0(15,0) == 0xF3EF
17884       && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
17885      UInt rD = INSN1(11,8);
17886      if (!isBadRegT(rD)) {
17887         IRTemp apsr = synthesise_APSR();
17888         putIRegT( rD, mkexpr(apsr), condT );
17889         DIP("mrs r%u, cpsr\n", rD);
17890         goto decode_success;
17891      }
17892   }
17893
17894   /* ----------------- (T1) LDREX ----------------- */
17895   if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
17896      UInt rN   = INSN0(3,0);
17897      UInt rT   = INSN1(15,12);
17898      UInt imm8 = INSN1(7,0);
17899      if (!isBadRegT(rT) && rN != 15) {
17900         IRTemp res;
17901         // go uncond
17902         mk_skip_over_T32_if_cond_is_false( condT );
17903         // now uncond
17904         res = newTemp(Ity_I32);
17905         stmt( IRStmt_LLSC(Iend_LE,
17906                           res,
17907                           binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
17908                           NULL/*this is a load*/ ));
17909         putIRegT(rT, mkexpr(res), IRTemp_INVALID);
17910         DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
17911         goto decode_success;
17912      }
17913   }
17914
17915   /* --------------- (T1) LDREX{B,H} --------------- */
17916   if (INSN0(15,4) == 0xE8D
17917       && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
17918      UInt rN  = INSN0(3,0);
17919      UInt rT  = INSN1(15,12);
17920      Bool isH = INSN1(11,0) == 0xF5F;
17921      if (!isBadRegT(rT) && rN != 15) {
17922         IRTemp res;
17923         // go uncond
17924         mk_skip_over_T32_if_cond_is_false( condT );
17925         // now uncond
17926         res = newTemp(isH ? Ity_I16 : Ity_I8);
17927         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
17928                           NULL/*this is a load*/ ));
17929         putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
17930                      IRTemp_INVALID);
17931         DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
17932         goto decode_success;
17933      }
17934   }
17935
17936   /* --------------- (T1) LDREXD --------------- */
17937   if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
17938      UInt rN  = INSN0(3,0);
17939      UInt rT  = INSN1(15,12);
17940      UInt rT2 = INSN1(11,8);
17941      if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
17942         IRTemp res;
17943         // go uncond
17944         mk_skip_over_T32_if_cond_is_false( condT );
17945         // now uncond
17946         res = newTemp(Ity_I64);
17947         // FIXME: assumes little-endian guest
17948         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
17949                           NULL/*this is a load*/ ));
17950         // FIXME: assumes little-endian guest
17951         putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
17952         putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
17953         DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
17954         goto decode_success;
17955      }
17956   }
17957
17958   /* ----------------- (T1) STREX ----------------- */
17959   if (INSN0(15,4) == 0xE84) {
17960      UInt rN   = INSN0(3,0);
17961      UInt rT   = INSN1(15,12);
17962      UInt rD   = INSN1(11,8);
17963      UInt imm8 = INSN1(7,0);
17964      if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
17965          && rD != rN && rD != rT) {
17966         IRTemp resSC1, resSC32;
17967         // go uncond
17968         mk_skip_over_T32_if_cond_is_false( condT );
17969         // now uncond
17970         /* Ok, now we're unconditional.  Do the store. */
17971         resSC1 = newTemp(Ity_I1);
17972         stmt( IRStmt_LLSC(Iend_LE,
17973                           resSC1,
17974                           binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
17975                           getIRegT(rT)) );
17976         /* Set rD to 1 on failure, 0 on success.  Currently we have
17977            resSC1 == 0 on failure, 1 on success. */
17978         resSC32 = newTemp(Ity_I32);
17979         assign(resSC32,
17980                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
17981         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
17982         DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
17983         goto decode_success;
17984      }
17985   }
17986
17987   /* --------------- (T1) STREX{B,H} --------------- */
17988   if (INSN0(15,4) == 0xE8C
17989       && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
17990      UInt rN  = INSN0(3,0);
17991      UInt rT  = INSN1(15,12);
17992      UInt rD  = INSN1(3,0);
17993      Bool isH = INSN1(11,4) == 0xF5;
17994      if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
17995          && rD != rN && rD != rT) {
17996         IRTemp resSC1, resSC32;
17997         // go uncond
17998         mk_skip_over_T32_if_cond_is_false( condT );
17999         // now uncond
18000         /* Ok, now we're unconditional.  Do the store. */
18001         resSC1 = newTemp(Ity_I1);
18002         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
18003                           unop(isH ? Iop_32to16 : Iop_32to8,
18004                                getIRegT(rT))) );
18005         /* Set rD to 1 on failure, 0 on success.  Currently we have
18006            resSC1 == 0 on failure, 1 on success. */
18007         resSC32 = newTemp(Ity_I32);
18008         assign(resSC32,
18009                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
18010         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
18011         DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
18012         goto decode_success;
18013      }
18014   }
18015
18016   /* ---------------- (T1) STREXD ---------------- */
18017   if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
18018      UInt rN  = INSN0(3,0);
18019      UInt rT  = INSN1(15,12);
18020      UInt rT2 = INSN1(11,8);
18021      UInt rD  = INSN1(3,0);
18022      if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
18023          && rN != 15 && rD != rN && rD != rT && rD != rT) {
18024         IRTemp resSC1, resSC32, data;
18025         // go uncond
18026         mk_skip_over_T32_if_cond_is_false( condT );
18027         // now uncond
18028         /* Ok, now we're unconditional.  Do the store. */
18029         resSC1 = newTemp(Ity_I1);
18030         data = newTemp(Ity_I64);
18031         // FIXME: assumes little-endian guest
18032         assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
18033         // FIXME: assumes little-endian guest
18034         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
18035         /* Set rD to 1 on failure, 0 on success.  Currently we have
18036            resSC1 == 0 on failure, 1 on success. */
18037         resSC32 = newTemp(Ity_I32);
18038         assign(resSC32,
18039                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
18040         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
18041         DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
18042         goto decode_success;
18043      }
18044   }
18045   /* -------------- v7 barrier insns -------------- */
18046   if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
18047      /* FIXME: should this be unconditional? */
18048      /* XXX this isn't really right, is it?  The generated IR does
18049         them unconditionally.  I guess it doesn't matter since it
18050         doesn't do any harm to do them even when the guarding
18051         condition is false -- it's just a performance loss. */
18052      switch (INSN1(7,0)) {
18053         case 0x4F: /* DSB sy */
18054         case 0x4E: /* DSB st */
18055         case 0x4B: /* DSB ish */
18056         case 0x4A: /* DSB ishst */
18057         case 0x47: /* DSB nsh */
18058         case 0x46: /* DSB nshst */
18059         case 0x43: /* DSB osh */
18060         case 0x42: /* DSB oshst */
18061            stmt( IRStmt_MBE(Imbe_Fence) );
18062            DIP("DSB\n");
18063            goto decode_success;
18064         case 0x5F: /* DMB sy */
18065         case 0x5E: /* DMB st */
18066         case 0x5B: /* DMB ish */
18067         case 0x5A: /* DMB ishst */
18068         case 0x57: /* DMB nsh */
18069         case 0x56: /* DMB nshst */
18070         case 0x53: /* DMB osh */
18071         case 0x52: /* DMB oshst */
18072            stmt( IRStmt_MBE(Imbe_Fence) );
18073            DIP("DMB\n");
18074            goto decode_success;
18075         case 0x6F: /* ISB */
18076            stmt( IRStmt_MBE(Imbe_Fence) );
18077            DIP("ISB\n");
18078            goto decode_success;
18079         default:
18080            break;
18081      }
18082   }
18083
18084   /* ---------------------- PLD{,W} ---------------------- */
18085   if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
18086      /* FIXME: should this be unconditional? */
18087      /* PLD/PLDW immediate, encoding T1 */
18088      UInt rN    = INSN0(3,0);
18089      UInt bW    = INSN0(5,5);
18090      UInt imm12 = INSN1(11,0);
18091      DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
18092      goto decode_success;
18093   }
18094
18095   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
18096      /* FIXME: should this be unconditional? */
18097      /* PLD/PLDW immediate, encoding T2 */
18098      UInt rN    = INSN0(3,0);
18099      UInt bW    = INSN0(5,5);
18100      UInt imm8  = INSN1(7,0);
18101      DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
18102      goto decode_success;
18103   }
18104
18105   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
18106      /* FIXME: should this be unconditional? */
18107      /* PLD/PLDW register, encoding T1 */
18108      UInt rN   = INSN0(3,0);
18109      UInt rM   = INSN1(3,0);
18110      UInt bW   = INSN0(5,5);
18111      UInt imm2 = INSN1(5,4);
18112      if (!isBadRegT(rM)) {
18113         DIP("pld%s [r%u, r%u, lsl %d]\n", bW ? "w" : "", rN, rM, imm2);
18114         goto decode_success;
18115      }
18116      /* fall through */
18117   }
18118
18119   /* -------------- read CP15 TPIDRURO register ------------- */
18120   /* mrc     p15, 0,  r0, c13, c0, 3  up to
18121      mrc     p15, 0, r14, c13, c0, 3
18122   */
18123   /* I don't know whether this is really v7-only.  But anyway, we
18124      have to support it since arm-linux uses TPIDRURO as a thread
18125      state register. */
18126   if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
18127      /* FIXME: should this be unconditional? */
18128      UInt rD = INSN1(15,12);
18129      if (!isBadRegT(rD)) {
18130         putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), IRTemp_INVALID);
18131         DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
18132         goto decode_success;
18133      }
18134      /* fall through */
18135   }
18136
18137   /* ------------------- CLREX ------------------ */
18138   if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
18139      /* AFAICS, this simply cancels a (all?) reservations made by a
18140         (any?) preceding LDREX(es).  Arrange to hand it through to
18141         the back end. */
18142      mk_skip_over_T32_if_cond_is_false( condT );
18143      stmt( IRStmt_MBE(Imbe_CancelReservation) );
18144      DIP("clrex\n");
18145      goto decode_success;
18146   }
18147
18148   /* ------------------- NOP ------------------ */
18149   if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
18150      DIP("nop\n");
18151      goto decode_success;
18152   }
18153
18154   /* ----------------------------------------------------------- */
18155   /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
18156   /* ----------------------------------------------------------- */
18157
18158   if (INSN0(15,12) == BITS4(1,1,1,0)) {
18159      UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
18160      Bool ok_vfp = decode_CP10_CP11_instruction (
18161                       &dres, insn28, condT, ARMCondAL/*bogus*/,
18162                       True/*isT*/
18163                    );
18164      if (ok_vfp)
18165         goto decode_success;
18166   }
18167
18168   /* ----------------------------------------------------------- */
18169   /* -- NEON instructions (in Thumb mode)                     -- */
18170   /* ----------------------------------------------------------- */
18171
18172   if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
18173      UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
18174      Bool ok_neon = decode_NEON_instruction(
18175                        &dres, insn32, condT, True/*isT*/
18176                     );
18177      if (ok_neon)
18178         goto decode_success;
18179   }
18180
18181   /* ----------------------------------------------------------- */
18182   /* -- v6 media instructions (in Thumb mode)                 -- */
18183   /* ----------------------------------------------------------- */
18184
18185   { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
18186     Bool ok_v6m = decode_V6MEDIA_instruction(
18187                      &dres, insn32, condT, ARMCondAL/*bogus*/,
18188                      True/*isT*/
18189                   );
18190     if (ok_v6m)
18191        goto decode_success;
18192   }
18193
18194   /* ----------------------------------------------------------- */
18195   /* -- Undecodable                                           -- */
18196   /* ----------------------------------------------------------- */
18197
18198   goto decode_failure;
18199   /*NOTREACHED*/
18200
18201  decode_failure:
18202   /* All decode failures end up here. */
18203   vex_printf("disInstr(thumb): unhandled instruction: "
18204              "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
18205
18206   /* Back up ITSTATE to the initial value for this instruction.
18207      If we don't do that, any subsequent restart of the instruction
18208      will restart with the wrong value. */
18209   put_ITSTATE(old_itstate);
18210   /* Tell the dispatcher that this insn cannot be decoded, and so has
18211      not been executed, and (is currently) the next to be executed.
18212      R15 should be up-to-date since it made so at the start of each
18213      insn, but nevertheless be paranoid and update it again right
18214      now. */
18215   vassert(0 == (guest_R15_curr_instr_notENC & 1));
18216   llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
18217   irsb->next     = mkU32(guest_R15_curr_instr_notENC | 1 /* CPSR.T */);
18218   irsb->jumpkind = Ijk_NoDecode;
18219   dres.whatNext  = Dis_StopHere;
18220   dres.len       = 0;
18221   return dres;
18222
18223  decode_success:
18224   /* All decode successes end up here. */
18225   DIP("\n");
18226
18227   vassert(dres.len == 2 || dres.len == 4 || dres.len == 20);
18228
18229#if 0
18230   // XXX is this necessary on Thumb?
18231   /* Now then.  Do we have an implicit jump to r15 to deal with? */
18232   if (r15written) {
18233      /* If we get jump to deal with, we assume that there's been no
18234         other competing branch stuff previously generated for this
18235         insn.  That's reasonable, in the sense that the ARM insn set
18236         appears to declare as "Unpredictable" any instruction which
18237         generates more than one possible new value for r15.  Hence
18238         just assert.  The decoders themselves should check against
18239         all such instructions which are thusly Unpredictable, and
18240         decline to decode them.  Hence we should never get here if we
18241         have competing new values for r15, and hence it is safe to
18242         assert here. */
18243      vassert(dres.whatNext == Dis_Continue);
18244      vassert(irsb->next == NULL);
18245      vassert(irsb->jumpkind == Ijk_Boring);
18246      /* If r15 is unconditionally written, terminate the block by
18247         jumping to it.  If it's conditionally written, still
18248         terminate the block (a shame, but we can't do side exits to
18249         arbitrary destinations), but first jump to the next
18250         instruction if the condition doesn't hold. */
18251      /* We can't use getIRegT(15) to get the destination, since that
18252         will produce r15+4, which isn't what we want.  Must use
18253         llGetIReg(15) instead. */
18254      if (r15guard == IRTemp_INVALID) {
18255         /* unconditional */
18256      } else {
18257         /* conditional */
18258         stmt( IRStmt_Exit(
18259                  unop(Iop_32to1,
18260                       binop(Iop_Xor32,
18261                             mkexpr(r15guard), mkU32(1))),
18262                  r15kind,
18263                  IRConst_U32(guest_R15_curr_instr_notENC + 4)
18264         ));
18265      }
18266      irsb->next     = llGetIReg(15);
18267      irsb->jumpkind = r15kind;
18268      dres.whatNext  = Dis_StopHere;
18269   }
18270#endif
18271
18272   return dres;
18273
18274#  undef INSN0
18275#  undef INSN1
18276}
18277
18278#undef DIP
18279#undef DIS
18280
18281
18282/* Helper table for figuring out how many insns an IT insn
18283   conditionalises.
18284
18285   An ITxyz instruction of the format "1011 1111 firstcond mask"
18286   conditionalises some number of instructions, as indicated by the
18287   following table.  A value of zero indicates the instruction is
18288   invalid in some way.
18289
18290   mask = 0 means this isn't an IT instruction
18291   fc = 15 (NV) means unpredictable
18292
18293   The line fc = 14 (AL) is different from the others; there are
18294   additional constraints in this case.
18295
18296          mask(0 ..                   15)
18297        +--------------------------------
18298   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18299   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18300        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18301        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18302        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18303        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18304        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18305        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18306        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18307        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18308        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18309        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18310        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18311        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18312        | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
18313   15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
18314
18315   To be conservative with the analysis, let's rule out the mask = 0
18316   case, since that isn't an IT insn at all.  But for all the other
18317   cases where the table contains zero, that means unpredictable, so
18318   let's say 4 to be conservative.  Hence we have a safe value for any
18319   IT (mask,fc) pair that the CPU would actually identify as an IT
18320   instruction.  The final table is
18321
18322          mask(0 ..                   15)
18323        +--------------------------------
18324   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18325   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18326        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18327        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18328        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18329        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18330        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18331        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18332        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18333        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18334        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18335        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18336        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18337        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
18338        | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
18339   15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
18340*/
18341static const UChar it_length_table[256]
18342   = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18343       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18344       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18345       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18346       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18347       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18348       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18349       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18350       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18351       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18352       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18353       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18354       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18355       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
18356       0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
18357       0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
18358     };
18359
18360
18361/*------------------------------------------------------------*/
18362/*--- Top-level fn                                         ---*/
18363/*------------------------------------------------------------*/
18364
18365/* Disassemble a single instruction into IR.  The instruction
18366   is located in host memory at &guest_code[delta]. */
18367
18368DisResult disInstr_ARM ( IRSB*        irsb_IN,
18369                         Bool         put_IP,
18370                         Bool         (*resteerOkFn) ( void*, Addr64 ),
18371                         Bool         resteerCisOk,
18372                         void*        callback_opaque,
18373                         UChar*       guest_code_IN,
18374                         Long         delta_ENCODED,
18375                         Addr64       guest_IP_ENCODED,
18376                         VexArch      guest_arch,
18377                         VexArchInfo* archinfo,
18378                         VexAbiInfo*  abiinfo,
18379                         Bool         host_bigendian_IN )
18380{
18381   DisResult dres;
18382   Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
18383
18384   /* Set globals (see top of this file) */
18385   vassert(guest_arch == VexArchARM);
18386
18387   irsb              = irsb_IN;
18388   host_is_bigendian = host_bigendian_IN;
18389   __curr_is_Thumb   = isThumb;
18390
18391   if (isThumb) {
18392      guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
18393   } else {
18394      guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
18395   }
18396
18397   if (isThumb) {
18398      dres = disInstr_THUMB_WRK ( put_IP, resteerOkFn,
18399                                  resteerCisOk, callback_opaque,
18400                                  &guest_code_IN[delta_ENCODED - 1],
18401                                  archinfo, abiinfo );
18402   } else {
18403      dres = disInstr_ARM_WRK ( put_IP, resteerOkFn,
18404                                resteerCisOk, callback_opaque,
18405                                &guest_code_IN[delta_ENCODED],
18406                                archinfo, abiinfo );
18407   }
18408
18409   return dres;
18410}
18411
18412/* Test program for the conversion of IRCmpF64Result values to VFP
18413   nzcv values.  See handling of FCMPD et al above. */
18414/*
18415UInt foo ( UInt x )
18416{
18417   UInt ix    = ((x >> 5) & 3) | (x & 1);
18418   UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
18419   UInt termR = (ix & (ix >> 1) & 1);
18420   return termL  -  termR;
18421}
18422
18423void try ( char* s, UInt ir, UInt req )
18424{
18425   UInt act = foo(ir);
18426   printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
18427          s, ir, (req >> 3) & 1, (req >> 2) & 1,
18428                 (req >> 1) & 1, (req >> 0) & 1,
18429                 (act >> 3) & 1, (act >> 2) & 1,
18430                 (act >> 1) & 1, (act >> 0) & 1, act);
18431
18432}
18433
18434int main ( void )
18435{
18436   printf("\n");
18437   try("UN", 0x45, 0b0011);
18438   try("LT", 0x01, 0b1000);
18439   try("GT", 0x00, 0b0010);
18440   try("EQ", 0x40, 0b0110);
18441   printf("\n");
18442   return 0;
18443}
18444*/
18445
18446/*--------------------------------------------------------------------*/
18447/*--- end                                         guest_arm_toIR.c ---*/
18448/*--------------------------------------------------------------------*/
18449