guest_arm_toIR.c revision 2ca80a6a6fc069acdb73186e8e578dbf8f46af80
1
2/*--------------------------------------------------------------------*/
3/*--- begin                                       guest_arm_toIR.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2013 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36/* XXXX thumb to check:
37   that all cases where putIRegT writes r15, we generate a jump.
38
39   All uses of newTemp assign to an IRTemp and not a UInt
40
41   For all thumb loads and stores, including VFP ones, new-ITSTATE is
42   backed out before the memory op, and restored afterwards.  This
43   needs to happen even after we go uncond.  (and for sure it doesn't
44   happen for VFP loads/stores right now).
45
46   VFP on thumb: check that we exclude all r13/r15 cases that we
47   should.
48
49   XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
50   taking into account the number of insns guarded by an IT.
51
52   remove the nasty hack, in the spechelper, of looking for Or32(...,
53   0xE0) in as the first arg to armg_calculate_condition, and instead
54   use Slice44 as specified in comments in the spechelper.
55
56   add specialisations for armg_calculate_flag_c and _v, as they
57   are moderately often needed in Thumb code.
58
59   Correctness: ITSTATE handling in Thumb SVCs is wrong.
60
61   Correctness (obscure): in m_transtab, when invalidating code
62   address ranges, invalidate up to 18 bytes after the end of the
63   range.  This is because the ITSTATE optimisation at the top of
64   _THUMB_WRK below analyses up to 18 bytes before the start of any
65   given instruction, and so might depend on the invalidated area.
66*/
67
68/* Limitations, etc
69
70   - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
71     These instructions are non-restartable in the case where the
72     transfer(s) fault.
73
74   - SWP: the restart jump back is Ijk_Boring; it should be
75     Ijk_NoRedir but that's expensive.  See comments on casLE() in
76     guest_x86_toIR.c.
77*/
78
79/* "Special" instructions.
80
81   This instruction decoder can decode four special instructions
82   which mean nothing natively (are no-ops as far as regs/mem are
83   concerned) but have meaning for supporting Valgrind.  A special
84   instruction is flagged by a 16-byte preamble:
85
86      E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
87      (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
88       mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
89
90   Following that, one of the following 3 are allowed
91   (standard interpretation in parentheses):
92
93      E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
94      E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
95      E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
96      E1899009 (orr r9,r9,r9)      IR injection
97
98   Any other bytes following the 16-byte preamble are illegal and
99   constitute a failure in instruction decoding.  This all assumes
100   that the preamble will never occur except in specific code
101   fragments designed for Valgrind to catch.
102*/
103
104/* Translates ARM(v5) code to IR. */
105
106#include "libvex_basictypes.h"
107#include "libvex_ir.h"
108#include "libvex.h"
109#include "libvex_guest_arm.h"
110
111#include "main_util.h"
112#include "main_globals.h"
113#include "guest_generic_bb_to_IR.h"
114#include "guest_arm_defs.h"
115
116
117/*------------------------------------------------------------*/
118/*--- Globals                                              ---*/
119/*------------------------------------------------------------*/
120
121/* These are set at the start of the translation of a instruction, so
122   that we don't have to pass them around endlessly.  CONST means does
123   not change during translation of the instruction.
124*/
125
126/* CONST: is the host bigendian?  This has to do with float vs double
127   register accesses on VFP, but it's complex and not properly thought
128   out. */
129static Bool host_is_bigendian;
130
131/* CONST: The guest address for the instruction currently being
132   translated.  This is the real, "decoded" address (not subject
133   to the CPSR.T kludge). */
134static Addr32 guest_R15_curr_instr_notENC;
135
136/* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
137   insn is Thumb (True) or ARM (False). */
138static Bool __curr_is_Thumb;
139
140/* MOD: The IRSB* into which we're generating code. */
141static IRSB* irsb;
142
143/* These are to do with handling writes to r15.  They are initially
144   set at the start of disInstr_ARM_WRK to indicate no update,
145   possibly updated during the routine, and examined again at the end.
146   If they have been set to indicate a r15 update then a jump is
147   generated.  Note, "explicit" jumps (b, bx, etc) are generated
148   directly, not using this mechanism -- this is intended to handle
149   the implicit-style jumps resulting from (eg) assigning to r15 as
150   the result of insns we wouldn't normally consider branchy. */
151
152/* MOD.  Initially False; set to True iff abovementioned handling is
153   required. */
154static Bool r15written;
155
156/* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
157   is conditional, this holds the gating IRTemp :: Ity_I32.  If the
158   branch to be generated is unconditional, this remains
159   IRTemp_INVALID. */
160static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
161
162/* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
163   this holds the jump kind. */
164static IRTemp r15kind;
165
166
167/*------------------------------------------------------------*/
168/*--- Debugging output                                     ---*/
169/*------------------------------------------------------------*/
170
171#define DIP(format, args...)           \
172   if (vex_traceflags & VEX_TRACE_FE)  \
173      vex_printf(format, ## args)
174
175#define DIS(buf, format, args...)      \
176   if (vex_traceflags & VEX_TRACE_FE)  \
177      vex_sprintf(buf, format, ## args)
178
179#define ASSERT_IS_THUMB \
180   do { vassert(__curr_is_Thumb); } while (0)
181
182#define ASSERT_IS_ARM \
183   do { vassert(! __curr_is_Thumb); } while (0)
184
185
186/*------------------------------------------------------------*/
187/*--- Helper bits and pieces for deconstructing the        ---*/
188/*--- arm insn stream.                                     ---*/
189/*------------------------------------------------------------*/
190
191/* Do a little-endian load of a 32-bit word, regardless of the
192   endianness of the underlying host. */
193static inline UInt getUIntLittleEndianly ( UChar* p )
194{
195   UInt w = 0;
196   w = (w << 8) | p[3];
197   w = (w << 8) | p[2];
198   w = (w << 8) | p[1];
199   w = (w << 8) | p[0];
200   return w;
201}
202
203/* Do a little-endian load of a 16-bit word, regardless of the
204   endianness of the underlying host. */
205static inline UShort getUShortLittleEndianly ( UChar* p )
206{
207   UShort w = 0;
208   w = (w << 8) | p[1];
209   w = (w << 8) | p[0];
210   return w;
211}
212
213static UInt ROR32 ( UInt x, UInt sh ) {
214   vassert(sh >= 0 && sh < 32);
215   if (sh == 0)
216      return x;
217   else
218      return (x << (32-sh)) | (x >> sh);
219}
220
221static Int popcount32 ( UInt x )
222{
223   Int res = 0, i;
224   for (i = 0; i < 32; i++) {
225      res += (x & 1);
226      x >>= 1;
227   }
228   return res;
229}
230
231static UInt setbit32 ( UInt x, Int ix, UInt b )
232{
233   UInt mask = 1 << ix;
234   x &= ~mask;
235   x |= ((b << ix) & mask);
236   return x;
237}
238
239#define BITS2(_b1,_b0) \
240   (((_b1) << 1) | (_b0))
241
242#define BITS3(_b2,_b1,_b0)                      \
243  (((_b2) << 2) | ((_b1) << 1) | (_b0))
244
245#define BITS4(_b3,_b2,_b1,_b0) \
246   (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
247
248#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
249   ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
250    | BITS4((_b3),(_b2),(_b1),(_b0)))
251
252#define BITS5(_b4,_b3,_b2,_b1,_b0)  \
253   (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
254#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
255   (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
256#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
257   (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
258
259#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
260   (((_b8) << 8) \
261    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
262
263#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
264   (((_b9) << 9) | ((_b8) << 8)                                \
265    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
266
267/* produces _uint[_bMax:_bMin] */
268#define SLICE_UInt(_uint,_bMax,_bMin) \
269   (( ((UInt)(_uint)) >> (_bMin)) \
270    & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
271
272
273/*------------------------------------------------------------*/
274/*--- Helper bits and pieces for creating IR fragments.    ---*/
275/*------------------------------------------------------------*/
276
277static IRExpr* mkU64 ( ULong i )
278{
279   return IRExpr_Const(IRConst_U64(i));
280}
281
282static IRExpr* mkU32 ( UInt i )
283{
284   return IRExpr_Const(IRConst_U32(i));
285}
286
287static IRExpr* mkU8 ( UInt i )
288{
289   vassert(i < 256);
290   return IRExpr_Const(IRConst_U8( (UChar)i ));
291}
292
293static IRExpr* mkexpr ( IRTemp tmp )
294{
295   return IRExpr_RdTmp(tmp);
296}
297
298static IRExpr* unop ( IROp op, IRExpr* a )
299{
300   return IRExpr_Unop(op, a);
301}
302
303static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
304{
305   return IRExpr_Binop(op, a1, a2);
306}
307
308static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
309{
310   return IRExpr_Triop(op, a1, a2, a3);
311}
312
313static IRExpr* loadLE ( IRType ty, IRExpr* addr )
314{
315   return IRExpr_Load(Iend_LE, ty, addr);
316}
317
318/* Add a statement to the list held by "irbb". */
319static void stmt ( IRStmt* st )
320{
321   addStmtToIRSB( irsb, st );
322}
323
324static void assign ( IRTemp dst, IRExpr* e )
325{
326   stmt( IRStmt_WrTmp(dst, e) );
327}
328
329static void storeLE ( IRExpr* addr, IRExpr* data )
330{
331   stmt( IRStmt_Store(Iend_LE, addr, data) );
332}
333
334static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
335{
336   if (guardT == IRTemp_INVALID) {
337      /* unconditional */
338      storeLE(addr, data);
339   } else {
340      stmt( IRStmt_StoreG(Iend_LE, addr, data,
341                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
342   }
343}
344
345static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
346                            IRExpr* addr, IRExpr* alt,
347                            IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
348{
349   if (guardT == IRTemp_INVALID) {
350      /* unconditional */
351      IRExpr* loaded = NULL;
352      switch (cvt) {
353         case ILGop_Ident32:
354            loaded = loadLE(Ity_I32, addr); break;
355         case ILGop_8Uto32:
356            loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
357         case ILGop_8Sto32:
358            loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
359         case ILGop_16Uto32:
360            loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
361         case ILGop_16Sto32:
362            loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
363         default:
364            vassert(0);
365      }
366      vassert(loaded != NULL);
367      assign(dst, loaded);
368   } else {
369      /* Generate a guarded load into 'dst', but apply 'cvt' to the
370         loaded data before putting the data in 'dst'.  If the load
371         does not take place, 'alt' is placed directly in 'dst'. */
372      stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
373                         binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
374   }
375}
376
377/* Generate a new temporary of the given type. */
378static IRTemp newTemp ( IRType ty )
379{
380   vassert(isPlausibleIRType(ty));
381   return newIRTemp( irsb->tyenv, ty );
382}
383
384/* Produces a value in 0 .. 3, which is encoded as per the type
385   IRRoundingMode. */
386static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
387{
388   return mkU32(Irrm_NEAREST);
389}
390
391/* Generate an expression for SRC rotated right by ROT. */
392static IRExpr* genROR32( IRTemp src, Int rot )
393{
394   vassert(rot >= 0 && rot < 32);
395   if (rot == 0)
396      return mkexpr(src);
397   return
398      binop(Iop_Or32,
399            binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
400            binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
401}
402
403static IRExpr* mkU128 ( ULong i )
404{
405   return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
406}
407
408/* Generate a 4-aligned version of the given expression if
409   the given condition is true.  Else return it unchanged. */
410static IRExpr* align4if ( IRExpr* e, Bool b )
411{
412   if (b)
413      return binop(Iop_And32, e, mkU32(~3));
414   else
415      return e;
416}
417
418
419/*------------------------------------------------------------*/
420/*--- Helpers for accessing guest registers.               ---*/
421/*------------------------------------------------------------*/
422
423#define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
424#define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
425#define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
426#define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
427#define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
428#define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
429#define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
430#define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
431#define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
432#define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
433#define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
434#define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
435#define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
436#define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
437#define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
438#define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
439
440#define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
441#define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
442#define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
443#define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
444#define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
445
446#define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
447#define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
448#define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
449#define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
450#define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
451#define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
452#define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
453#define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
454#define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
455#define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
456#define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
457#define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
458#define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
459#define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
460#define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
461#define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
462#define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
463#define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
464#define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
465#define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
466#define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
467#define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
468#define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
469#define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
470#define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
471#define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
472#define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
473#define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
474#define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
475#define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
476#define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
477#define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
478
479#define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
480#define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
481#define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
482#define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
483#define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
484#define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
485#define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
486#define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
487
488#define OFFB_TISTART  offsetof(VexGuestARMState,guest_TISTART)
489#define OFFB_TILEN    offsetof(VexGuestARMState,guest_TILEN)
490
491
492/* ---------------- Integer registers ---------------- */
493
494static Int integerGuestRegOffset ( UInt iregNo )
495{
496   /* Do we care about endianness here?  We do if sub-parts of integer
497      registers are accessed, but I don't think that ever happens on
498      ARM. */
499   switch (iregNo) {
500      case 0:  return OFFB_R0;
501      case 1:  return OFFB_R1;
502      case 2:  return OFFB_R2;
503      case 3:  return OFFB_R3;
504      case 4:  return OFFB_R4;
505      case 5:  return OFFB_R5;
506      case 6:  return OFFB_R6;
507      case 7:  return OFFB_R7;
508      case 8:  return OFFB_R8;
509      case 9:  return OFFB_R9;
510      case 10: return OFFB_R10;
511      case 11: return OFFB_R11;
512      case 12: return OFFB_R12;
513      case 13: return OFFB_R13;
514      case 14: return OFFB_R14;
515      case 15: return OFFB_R15T;
516      default: vassert(0);
517   }
518}
519
520/* Plain ("low level") read from a reg; no +8 offset magic for r15. */
521static IRExpr* llGetIReg ( UInt iregNo )
522{
523   vassert(iregNo < 16);
524   return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
525}
526
527/* Architected read from a reg in ARM mode.  This automagically adds 8
528   to all reads of r15. */
529static IRExpr* getIRegA ( UInt iregNo )
530{
531   IRExpr* e;
532   ASSERT_IS_ARM;
533   vassert(iregNo < 16);
534   if (iregNo == 15) {
535      /* If asked for r15, don't read the guest state value, as that
536         may not be up to date in the case where loop unrolling has
537         happened, because the first insn's write to the block is
538         omitted; hence in the 2nd and subsequent unrollings we don't
539         have a correct value in guest r15.  Instead produce the
540         constant that we know would be produced at this point. */
541      vassert(0 == (guest_R15_curr_instr_notENC & 3));
542      e = mkU32(guest_R15_curr_instr_notENC + 8);
543   } else {
544      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
545   }
546   return e;
547}
548
549/* Architected read from a reg in Thumb mode.  This automagically adds
550   4 to all reads of r15. */
551static IRExpr* getIRegT ( UInt iregNo )
552{
553   IRExpr* e;
554   ASSERT_IS_THUMB;
555   vassert(iregNo < 16);
556   if (iregNo == 15) {
557      /* Ditto comment in getIReg. */
558      vassert(0 == (guest_R15_curr_instr_notENC & 1));
559      e = mkU32(guest_R15_curr_instr_notENC + 4);
560   } else {
561      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
562   }
563   return e;
564}
565
566/* Plain ("low level") write to a reg; no jump or alignment magic for
567   r15. */
568static void llPutIReg ( UInt iregNo, IRExpr* e )
569{
570   vassert(iregNo < 16);
571   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
572   stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
573}
574
575/* Architected write to an integer register in ARM mode.  If it is to
576   r15, record info so at the end of this insn's translation, a branch
577   to it can be made.  Also handles conditional writes to the
578   register: if guardT == IRTemp_INVALID then the write is
579   unconditional.  If writing r15, also 4-align it. */
580static void putIRegA ( UInt       iregNo,
581                       IRExpr*    e,
582                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
583                       IRJumpKind jk /* if a jump is generated */ )
584{
585   /* if writing r15, force e to be 4-aligned. */
586   // INTERWORKING FIXME.  this needs to be relaxed so that
587   // puts caused by LDMxx which load r15 interwork right.
588   // but is no aligned too relaxed?
589   //if (iregNo == 15)
590   //   e = binop(Iop_And32, e, mkU32(~3));
591   ASSERT_IS_ARM;
592   /* So, generate either an unconditional or a conditional write to
593      the reg. */
594   if (guardT == IRTemp_INVALID) {
595      /* unconditional write */
596      llPutIReg( iregNo, e );
597   } else {
598      llPutIReg( iregNo,
599                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
600                             e, llGetIReg(iregNo) ));
601   }
602   if (iregNo == 15) {
603      // assert against competing r15 updates.  Shouldn't
604      // happen; should be ruled out by the instr matching
605      // logic.
606      vassert(r15written == False);
607      vassert(r15guard   == IRTemp_INVALID);
608      vassert(r15kind    == Ijk_Boring);
609      r15written = True;
610      r15guard   = guardT;
611      r15kind    = jk;
612   }
613}
614
615
616/* Architected write to an integer register in Thumb mode.  Writes to
617   r15 are not allowed.  Handles conditional writes to the register:
618   if guardT == IRTemp_INVALID then the write is unconditional. */
619static void putIRegT ( UInt       iregNo,
620                       IRExpr*    e,
621                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
622{
623   /* So, generate either an unconditional or a conditional write to
624      the reg. */
625   ASSERT_IS_THUMB;
626   vassert(iregNo >= 0 && iregNo <= 14);
627   if (guardT == IRTemp_INVALID) {
628      /* unconditional write */
629      llPutIReg( iregNo, e );
630   } else {
631      llPutIReg( iregNo,
632                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
633                             e, llGetIReg(iregNo) ));
634   }
635}
636
637
638/* Thumb16 and Thumb32 only.
639   Returns true if reg is 13 or 15.  Implements the BadReg
640   predicate in the ARM ARM. */
641static Bool isBadRegT ( UInt r )
642{
643   vassert(r <= 15);
644   ASSERT_IS_THUMB;
645   return r == 13 || r == 15;
646}
647
648
649/* ---------------- Double registers ---------------- */
650
651static Int doubleGuestRegOffset ( UInt dregNo )
652{
653   /* Do we care about endianness here?  Probably do if we ever get
654      into the situation of dealing with the single-precision VFP
655      registers. */
656   switch (dregNo) {
657      case 0:  return OFFB_D0;
658      case 1:  return OFFB_D1;
659      case 2:  return OFFB_D2;
660      case 3:  return OFFB_D3;
661      case 4:  return OFFB_D4;
662      case 5:  return OFFB_D5;
663      case 6:  return OFFB_D6;
664      case 7:  return OFFB_D7;
665      case 8:  return OFFB_D8;
666      case 9:  return OFFB_D9;
667      case 10: return OFFB_D10;
668      case 11: return OFFB_D11;
669      case 12: return OFFB_D12;
670      case 13: return OFFB_D13;
671      case 14: return OFFB_D14;
672      case 15: return OFFB_D15;
673      case 16: return OFFB_D16;
674      case 17: return OFFB_D17;
675      case 18: return OFFB_D18;
676      case 19: return OFFB_D19;
677      case 20: return OFFB_D20;
678      case 21: return OFFB_D21;
679      case 22: return OFFB_D22;
680      case 23: return OFFB_D23;
681      case 24: return OFFB_D24;
682      case 25: return OFFB_D25;
683      case 26: return OFFB_D26;
684      case 27: return OFFB_D27;
685      case 28: return OFFB_D28;
686      case 29: return OFFB_D29;
687      case 30: return OFFB_D30;
688      case 31: return OFFB_D31;
689      default: vassert(0);
690   }
691}
692
693/* Plain ("low level") read from a VFP Dreg. */
694static IRExpr* llGetDReg ( UInt dregNo )
695{
696   vassert(dregNo < 32);
697   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
698}
699
700/* Architected read from a VFP Dreg. */
701static IRExpr* getDReg ( UInt dregNo ) {
702   return llGetDReg( dregNo );
703}
704
705/* Plain ("low level") write to a VFP Dreg. */
706static void llPutDReg ( UInt dregNo, IRExpr* e )
707{
708   vassert(dregNo < 32);
709   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
710   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
711}
712
713/* Architected write to a VFP Dreg.  Handles conditional writes to the
714   register: if guardT == IRTemp_INVALID then the write is
715   unconditional. */
716static void putDReg ( UInt    dregNo,
717                      IRExpr* e,
718                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
719{
720   /* So, generate either an unconditional or a conditional write to
721      the reg. */
722   if (guardT == IRTemp_INVALID) {
723      /* unconditional write */
724      llPutDReg( dregNo, e );
725   } else {
726      llPutDReg( dregNo,
727                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
728                             e, llGetDReg(dregNo) ));
729   }
730}
731
732/* And now exactly the same stuff all over again, but this time
733   taking/returning I64 rather than F64, to support 64-bit Neon
734   ops. */
735
736/* Plain ("low level") read from a Neon Integer Dreg. */
737static IRExpr* llGetDRegI64 ( UInt dregNo )
738{
739   vassert(dregNo < 32);
740   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
741}
742
743/* Architected read from a Neon Integer Dreg. */
744static IRExpr* getDRegI64 ( UInt dregNo ) {
745   return llGetDRegI64( dregNo );
746}
747
748/* Plain ("low level") write to a Neon Integer Dreg. */
749static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
750{
751   vassert(dregNo < 32);
752   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
753   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
754}
755
756/* Architected write to a Neon Integer Dreg.  Handles conditional
757   writes to the register: if guardT == IRTemp_INVALID then the write
758   is unconditional. */
759static void putDRegI64 ( UInt    dregNo,
760                         IRExpr* e,
761                         IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
762{
763   /* So, generate either an unconditional or a conditional write to
764      the reg. */
765   if (guardT == IRTemp_INVALID) {
766      /* unconditional write */
767      llPutDRegI64( dregNo, e );
768   } else {
769      llPutDRegI64( dregNo,
770                    IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
771                                e, llGetDRegI64(dregNo) ));
772   }
773}
774
775/* ---------------- Quad registers ---------------- */
776
777static Int quadGuestRegOffset ( UInt qregNo )
778{
779   /* Do we care about endianness here?  Probably do if we ever get
780      into the situation of dealing with the 64 bit Neon registers. */
781   switch (qregNo) {
782      case 0:  return OFFB_D0;
783      case 1:  return OFFB_D2;
784      case 2:  return OFFB_D4;
785      case 3:  return OFFB_D6;
786      case 4:  return OFFB_D8;
787      case 5:  return OFFB_D10;
788      case 6:  return OFFB_D12;
789      case 7:  return OFFB_D14;
790      case 8:  return OFFB_D16;
791      case 9:  return OFFB_D18;
792      case 10: return OFFB_D20;
793      case 11: return OFFB_D22;
794      case 12: return OFFB_D24;
795      case 13: return OFFB_D26;
796      case 14: return OFFB_D28;
797      case 15: return OFFB_D30;
798      default: vassert(0);
799   }
800}
801
802/* Plain ("low level") read from a Neon Qreg. */
803static IRExpr* llGetQReg ( UInt qregNo )
804{
805   vassert(qregNo < 16);
806   return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
807}
808
809/* Architected read from a Neon Qreg. */
810static IRExpr* getQReg ( UInt qregNo ) {
811   return llGetQReg( qregNo );
812}
813
814/* Plain ("low level") write to a Neon Qreg. */
815static void llPutQReg ( UInt qregNo, IRExpr* e )
816{
817   vassert(qregNo < 16);
818   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
819   stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
820}
821
822/* Architected write to a Neon Qreg.  Handles conditional writes to the
823   register: if guardT == IRTemp_INVALID then the write is
824   unconditional. */
825static void putQReg ( UInt    qregNo,
826                      IRExpr* e,
827                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
828{
829   /* So, generate either an unconditional or a conditional write to
830      the reg. */
831   if (guardT == IRTemp_INVALID) {
832      /* unconditional write */
833      llPutQReg( qregNo, e );
834   } else {
835      llPutQReg( qregNo,
836                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
837                             e, llGetQReg(qregNo) ));
838   }
839}
840
841
842/* ---------------- Float registers ---------------- */
843
844static Int floatGuestRegOffset ( UInt fregNo )
845{
846   /* Start with the offset of the containing double, and then correct
847      for endianness.  Actually this is completely bogus and needs
848      careful thought. */
849   Int off;
850   vassert(fregNo < 32);
851   off = doubleGuestRegOffset(fregNo >> 1);
852   if (host_is_bigendian) {
853      vassert(0);
854   } else {
855      if (fregNo & 1)
856         off += 4;
857   }
858   return off;
859}
860
861/* Plain ("low level") read from a VFP Freg. */
862static IRExpr* llGetFReg ( UInt fregNo )
863{
864   vassert(fregNo < 32);
865   return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
866}
867
868/* Architected read from a VFP Freg. */
869static IRExpr* getFReg ( UInt fregNo ) {
870   return llGetFReg( fregNo );
871}
872
873/* Plain ("low level") write to a VFP Freg. */
874static void llPutFReg ( UInt fregNo, IRExpr* e )
875{
876   vassert(fregNo < 32);
877   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
878   stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
879}
880
881/* Architected write to a VFP Freg.  Handles conditional writes to the
882   register: if guardT == IRTemp_INVALID then the write is
883   unconditional. */
884static void putFReg ( UInt    fregNo,
885                      IRExpr* e,
886                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
887{
888   /* So, generate either an unconditional or a conditional write to
889      the reg. */
890   if (guardT == IRTemp_INVALID) {
891      /* unconditional write */
892      llPutFReg( fregNo, e );
893   } else {
894      llPutFReg( fregNo,
895                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
896                             e, llGetFReg(fregNo) ));
897   }
898}
899
900
901/* ---------------- Misc registers ---------------- */
902
903static void putMiscReg32 ( UInt    gsoffset,
904                           IRExpr* e, /* :: Ity_I32 */
905                           IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
906{
907   switch (gsoffset) {
908      case OFFB_FPSCR:   break;
909      case OFFB_QFLAG32: break;
910      case OFFB_GEFLAG0: break;
911      case OFFB_GEFLAG1: break;
912      case OFFB_GEFLAG2: break;
913      case OFFB_GEFLAG3: break;
914      default: vassert(0); /* awaiting more cases */
915   }
916   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
917
918   if (guardT == IRTemp_INVALID) {
919      /* unconditional write */
920      stmt(IRStmt_Put(gsoffset, e));
921   } else {
922      stmt(IRStmt_Put(
923         gsoffset,
924         IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
925                     e, IRExpr_Get(gsoffset, Ity_I32) )
926      ));
927   }
928}
929
930static IRTemp get_ITSTATE ( void )
931{
932   ASSERT_IS_THUMB;
933   IRTemp t = newTemp(Ity_I32);
934   assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
935   return t;
936}
937
938static void put_ITSTATE ( IRTemp t )
939{
940   ASSERT_IS_THUMB;
941   stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
942}
943
944static IRTemp get_QFLAG32 ( void )
945{
946   IRTemp t = newTemp(Ity_I32);
947   assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
948   return t;
949}
950
951static void put_QFLAG32 ( IRTemp t, IRTemp condT )
952{
953   putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
954}
955
956/* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
957   Status Register) to indicate that overflow or saturation occurred.
958   Nb: t must be zero to denote no saturation, and any nonzero
959   value to indicate saturation. */
960static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
961{
962   IRTemp old = get_QFLAG32();
963   IRTemp nyu = newTemp(Ity_I32);
964   assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
965   put_QFLAG32(nyu, condT);
966}
967
968/* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
969   flagNo: which flag bit to set [3...0]
970   lowbits_to_ignore:  0 = look at all 32 bits
971                       8 = look at top 24 bits only
972                      16 = look at top 16 bits only
973                      31 = look at the top bit only
974   e: input value to be evaluated.
975   The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
976   masked out.  If the resulting value is zero then the GE flag is
977   set to 0; any other value sets the flag to 1. */
978static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
979                           Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
980                           IRExpr* e,             /* Ity_I32 */
981                           IRTemp condT )
982{
983   vassert( flagNo >= 0 && flagNo <= 3 );
984   vassert( lowbits_to_ignore == 0  ||
985            lowbits_to_ignore == 8  ||
986            lowbits_to_ignore == 16 ||
987            lowbits_to_ignore == 31 );
988   IRTemp masked = newTemp(Ity_I32);
989   assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
990
991   switch (flagNo) {
992      case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
993      case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
994      case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
995      case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
996      default: vassert(0);
997   }
998}
999
1000/* Return the (32-bit, zero-or-nonzero representation scheme) of
1001   the specified GE flag. */
1002static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
1003{
1004   switch (flagNo) {
1005      case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
1006      case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
1007      case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
1008      case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
1009      default: vassert(0);
1010   }
1011}
1012
1013/* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
1014   2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
1015   15 of the value.  All other bits are ignored. */
1016static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
1017{
1018   IRTemp ge10 = newTemp(Ity_I32);
1019   IRTemp ge32 = newTemp(Ity_I32);
1020   assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1021   assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1022   put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
1023   put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
1024   put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
1025   put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
1026}
1027
1028
1029/* Set all 4 GE flags from the given 32-bit value as follows: GE 3
1030   from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
1031   bit 7.  All other bits are ignored. */
1032static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
1033{
1034   IRTemp ge0 = newTemp(Ity_I32);
1035   IRTemp ge1 = newTemp(Ity_I32);
1036   IRTemp ge2 = newTemp(Ity_I32);
1037   IRTemp ge3 = newTemp(Ity_I32);
1038   assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
1039   assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1040   assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
1041   assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1042   put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
1043   put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
1044   put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
1045   put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
1046}
1047
1048
1049/* ---------------- FPSCR stuff ---------------- */
1050
1051/* Generate IR to get hold of the rounding mode bits in FPSCR, and
1052   convert them to IR format.  Bind the final result to the
1053   returned temp. */
1054static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1055{
1056   /* The ARMvfp encoding for rounding mode bits is:
1057         00  to nearest
1058         01  to +infinity
1059         10  to -infinity
1060         11  to zero
1061      We need to convert that to the IR encoding:
1062         00  to nearest (the default)
1063         10  to +infinity
1064         01  to -infinity
1065         11  to zero
1066      Which can be done by swapping bits 0 and 1.
1067      The rmode bits are at 23:22 in FPSCR.
1068   */
1069   IRTemp armEncd = newTemp(Ity_I32);
1070   IRTemp swapped = newTemp(Ity_I32);
1071   /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
1072      we don't zero out bits 24 and above, since the assignment to
1073      'swapped' will mask them out anyway. */
1074   assign(armEncd,
1075          binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
1076   /* Now swap them. */
1077   assign(swapped,
1078          binop(Iop_Or32,
1079                binop(Iop_And32,
1080                      binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1081                      mkU32(2)),
1082                binop(Iop_And32,
1083                      binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1084                      mkU32(1))
1085         ));
1086   return swapped;
1087}
1088
1089
1090/*------------------------------------------------------------*/
1091/*--- Helpers for flag handling and conditional insns      ---*/
1092/*------------------------------------------------------------*/
1093
1094static const HChar* name_ARMCondcode ( ARMCondcode cond )
1095{
1096   switch (cond) {
1097      case ARMCondEQ:  return "{eq}";
1098      case ARMCondNE:  return "{ne}";
1099      case ARMCondHS:  return "{hs}";  // or 'cs'
1100      case ARMCondLO:  return "{lo}";  // or 'cc'
1101      case ARMCondMI:  return "{mi}";
1102      case ARMCondPL:  return "{pl}";
1103      case ARMCondVS:  return "{vs}";
1104      case ARMCondVC:  return "{vc}";
1105      case ARMCondHI:  return "{hi}";
1106      case ARMCondLS:  return "{ls}";
1107      case ARMCondGE:  return "{ge}";
1108      case ARMCondLT:  return "{lt}";
1109      case ARMCondGT:  return "{gt}";
1110      case ARMCondLE:  return "{le}";
1111      case ARMCondAL:  return ""; // {al}: is the default
1112      case ARMCondNV:  return "{nv}";
1113      default: vpanic("name_ARMCondcode");
1114   }
1115}
1116/* and a handy shorthand for it */
1117static const HChar* nCC ( ARMCondcode cond ) {
1118   return name_ARMCondcode(cond);
1119}
1120
1121
1122/* Build IR to calculate some particular condition from stored
1123   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1124   Ity_I32, suitable for narrowing.  Although the return type is
1125   Ity_I32, the returned value is either 0 or 1.  'cond' must be
1126   :: Ity_I32 and must denote the condition to compute in
1127   bits 7:4, and be zero everywhere else.
1128*/
1129static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
1130{
1131   vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
1132   /* And 'cond' had better produce a value in which only bits 7:4 are
1133      nonzero.  However, obviously we can't assert for that. */
1134
1135   /* So what we're constructing for the first argument is
1136      "(cond << 4) | stored-operation".
1137      However, as per comments above, 'cond' must be supplied
1138      pre-shifted to this function.
1139
1140      This pairing scheme requires that the ARM_CC_OP_ values all fit
1141      in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1142      8 bits of the first argument. */
1143   IRExpr** args
1144      = mkIRExprVec_4(
1145           binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
1146           IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1147           IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1148           IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
1149        );
1150   IRExpr* call
1151      = mkIRExprCCall(
1152           Ity_I32,
1153           0/*regparm*/,
1154           "armg_calculate_condition", &armg_calculate_condition,
1155           args
1156        );
1157
1158   /* Exclude the requested condition, OP and NDEP from definedness
1159      checking.  We're only interested in DEP1 and DEP2. */
1160   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1161   return call;
1162}
1163
1164
1165/* Build IR to calculate some particular condition from stored
1166   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1167   Ity_I32, suitable for narrowing.  Although the return type is
1168   Ity_I32, the returned value is either 0 or 1.
1169*/
1170static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
1171{
1172  /* First arg is "(cond << 4) | condition".  This requires that the
1173     ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1174     (COND, OP) pair in the lowest 8 bits of the first argument. */
1175   vassert(cond >= 0 && cond <= 15);
1176   return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
1177}
1178
1179
1180/* Build IR to calculate just the carry flag from stored
1181   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1182   Ity_I32. */
1183static IRExpr* mk_armg_calculate_flag_c ( void )
1184{
1185   IRExpr** args
1186      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1187                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1188                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1189                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1190   IRExpr* call
1191      = mkIRExprCCall(
1192           Ity_I32,
1193           0/*regparm*/,
1194           "armg_calculate_flag_c", &armg_calculate_flag_c,
1195           args
1196        );
1197   /* Exclude OP and NDEP from definedness checking.  We're only
1198      interested in DEP1 and DEP2. */
1199   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1200   return call;
1201}
1202
1203
1204/* Build IR to calculate just the overflow flag from stored
1205   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1206   Ity_I32. */
1207static IRExpr* mk_armg_calculate_flag_v ( void )
1208{
1209   IRExpr** args
1210      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1211                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1212                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1213                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1214   IRExpr* call
1215      = mkIRExprCCall(
1216           Ity_I32,
1217           0/*regparm*/,
1218           "armg_calculate_flag_v", &armg_calculate_flag_v,
1219           args
1220        );
1221   /* Exclude OP and NDEP from definedness checking.  We're only
1222      interested in DEP1 and DEP2. */
1223   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1224   return call;
1225}
1226
1227
1228/* Build IR to calculate N Z C V in bits 31:28 of the
1229   returned word. */
1230static IRExpr* mk_armg_calculate_flags_nzcv ( void )
1231{
1232   IRExpr** args
1233      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1234                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1235                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1236                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1237   IRExpr* call
1238      = mkIRExprCCall(
1239           Ity_I32,
1240           0/*regparm*/,
1241           "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
1242           args
1243        );
1244   /* Exclude OP and NDEP from definedness checking.  We're only
1245      interested in DEP1 and DEP2. */
1246   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1247   return call;
1248}
1249
1250static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
1251{
1252   IRExpr** args1;
1253   IRExpr** args2;
1254   IRExpr *call1, *call2, *res;
1255
1256   if (Q) {
1257      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
1258                              binop(Iop_GetElem32x4, resL, mkU8(1)),
1259                              binop(Iop_GetElem32x4, resR, mkU8(0)),
1260                              binop(Iop_GetElem32x4, resR, mkU8(1)) );
1261      args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
1262                              binop(Iop_GetElem32x4, resL, mkU8(3)),
1263                              binop(Iop_GetElem32x4, resR, mkU8(2)),
1264                              binop(Iop_GetElem32x4, resR, mkU8(3)) );
1265   } else {
1266      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
1267                              binop(Iop_GetElem32x2, resL, mkU8(1)),
1268                              binop(Iop_GetElem32x2, resR, mkU8(0)),
1269                              binop(Iop_GetElem32x2, resR, mkU8(1)) );
1270   }
1271
1272   call1 = mkIRExprCCall(
1273             Ity_I32,
1274             0/*regparm*/,
1275             "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1276             args1
1277          );
1278   if (Q) {
1279      call2 = mkIRExprCCall(
1280                Ity_I32,
1281                0/*regparm*/,
1282                "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1283                args2
1284             );
1285   }
1286   if (Q) {
1287      res = binop(Iop_Or32, call1, call2);
1288   } else {
1289      res = call1;
1290   }
1291   return res;
1292}
1293
1294// FIXME: this is named wrongly .. looks like a sticky set of
1295// QC, not a write to it.
1296static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
1297                         IRTemp condT )
1298{
1299   putMiscReg32 (OFFB_FPSCR,
1300                 binop(Iop_Or32,
1301                       IRExpr_Get(OFFB_FPSCR, Ity_I32),
1302                       binop(Iop_Shl32,
1303                             mk_armg_calculate_flag_qc(resL, resR, Q),
1304                             mkU8(27))),
1305                 condT);
1306}
1307
1308/* Build IR to conditionally set the flags thunk.  As with putIReg, if
1309   guard is IRTemp_INVALID then it's unconditional, else it holds a
1310   condition :: Ity_I32. */
1311static
1312void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
1313                         IRTemp t_dep2, IRTemp t_ndep,
1314                         IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1315{
1316   vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
1317   vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
1318   vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
1319   vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
1320   if (guardT == IRTemp_INVALID) {
1321      /* unconditional */
1322      stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
1323      stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1324      stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1325      stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1326   } else {
1327      /* conditional */
1328      IRTemp c1 = newTemp(Ity_I1);
1329      assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
1330      stmt( IRStmt_Put(
1331               OFFB_CC_OP,
1332               IRExpr_ITE( mkexpr(c1),
1333                           mkU32(cc_op),
1334                           IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
1335      stmt( IRStmt_Put(
1336               OFFB_CC_DEP1,
1337               IRExpr_ITE( mkexpr(c1),
1338                           mkexpr(t_dep1),
1339                           IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
1340      stmt( IRStmt_Put(
1341               OFFB_CC_DEP2,
1342               IRExpr_ITE( mkexpr(c1),
1343                           mkexpr(t_dep2),
1344                           IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
1345      stmt( IRStmt_Put(
1346               OFFB_CC_NDEP,
1347               IRExpr_ITE( mkexpr(c1),
1348                           mkexpr(t_ndep),
1349                           IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
1350   }
1351}
1352
1353
1354/* Minor variant of the above that sets NDEP to zero (if it
1355   sets it at all) */
1356static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1357                             IRTemp t_dep2,
1358                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1359{
1360   IRTemp z32 = newTemp(Ity_I32);
1361   assign( z32, mkU32(0) );
1362   setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1363}
1364
1365
1366/* Minor variant of the above that sets DEP2 to zero (if it
1367   sets it at all) */
1368static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1369                             IRTemp t_ndep,
1370                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1371{
1372   IRTemp z32 = newTemp(Ity_I32);
1373   assign( z32, mkU32(0) );
1374   setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1375}
1376
1377
1378/* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1379   sets them at all) */
1380static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1381                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1382{
1383   IRTemp z32 = newTemp(Ity_I32);
1384   assign( z32, mkU32(0) );
1385   setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1386}
1387
1388
1389/* ARM only */
1390/* Generate a side-exit to the next instruction, if the given guard
1391   expression :: Ity_I32 is 0 (note!  the side exit is taken if the
1392   condition is false!)  This is used to skip over conditional
1393   instructions which we can't generate straight-line code for, either
1394   because they are too complex or (more likely) they potentially
1395   generate exceptions.
1396*/
1397static void mk_skip_over_A32_if_cond_is_false (
1398               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1399            )
1400{
1401   ASSERT_IS_ARM;
1402   vassert(guardT != IRTemp_INVALID);
1403   vassert(0 == (guest_R15_curr_instr_notENC & 3));
1404   stmt( IRStmt_Exit(
1405            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1406            Ijk_Boring,
1407            IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
1408            OFFB_R15T
1409       ));
1410}
1411
1412/* Thumb16 only */
1413/* ditto, but jump over a 16-bit thumb insn */
1414static void mk_skip_over_T16_if_cond_is_false (
1415               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1416            )
1417{
1418   ASSERT_IS_THUMB;
1419   vassert(guardT != IRTemp_INVALID);
1420   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1421   stmt( IRStmt_Exit(
1422            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1423            Ijk_Boring,
1424            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
1425            OFFB_R15T
1426       ));
1427}
1428
1429
1430/* Thumb32 only */
1431/* ditto, but jump over a 32-bit thumb insn */
1432static void mk_skip_over_T32_if_cond_is_false (
1433               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1434            )
1435{
1436   ASSERT_IS_THUMB;
1437   vassert(guardT != IRTemp_INVALID);
1438   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1439   stmt( IRStmt_Exit(
1440            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1441            Ijk_Boring,
1442            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
1443            OFFB_R15T
1444       ));
1445}
1446
1447
1448/* Thumb16 and Thumb32 only
1449   Generate a SIGILL followed by a restart of the current instruction
1450   if the given temp is nonzero. */
1451static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
1452{
1453   ASSERT_IS_THUMB;
1454   vassert(t != IRTemp_INVALID);
1455   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1456   stmt(
1457      IRStmt_Exit(
1458         binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
1459         Ijk_NoDecode,
1460         IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
1461         OFFB_R15T
1462      )
1463   );
1464}
1465
1466
1467/* Inspect the old_itstate, and generate a SIGILL if it indicates that
1468   we are currently in an IT block and are not the last in the block.
1469   This also rolls back guest_ITSTATE to its old value before the exit
1470   and restores it to its new value afterwards.  This is so that if
1471   the exit is taken, we have an up to date version of ITSTATE
1472   available.  Without doing that, we have no hope of making precise
1473   exceptions work. */
1474static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
1475               IRTemp old_itstate /* :: Ity_I32 */,
1476               IRTemp new_itstate /* :: Ity_I32 */
1477            )
1478{
1479   ASSERT_IS_THUMB;
1480   put_ITSTATE(old_itstate); // backout
1481   IRTemp guards_for_next3 = newTemp(Ity_I32);
1482   assign(guards_for_next3,
1483          binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
1484   gen_SIGILL_T_if_nonzero(guards_for_next3);
1485   put_ITSTATE(new_itstate); //restore
1486}
1487
1488
1489/* Simpler version of the above, which generates a SIGILL if
1490   we're anywhere within an IT block. */
1491static void gen_SIGILL_T_if_in_ITBlock (
1492               IRTemp old_itstate /* :: Ity_I32 */,
1493               IRTemp new_itstate /* :: Ity_I32 */
1494            )
1495{
1496   put_ITSTATE(old_itstate); // backout
1497   gen_SIGILL_T_if_nonzero(old_itstate);
1498   put_ITSTATE(new_itstate); //restore
1499}
1500
1501
1502/* Generate an APSR value, from the NZCV thunk, and
1503   from QFLAG32 and GEFLAG0 .. GEFLAG3. */
1504static IRTemp synthesise_APSR ( void )
1505{
1506   IRTemp res1 = newTemp(Ity_I32);
1507   // Get NZCV
1508   assign( res1, mk_armg_calculate_flags_nzcv() );
1509   // OR in the Q value
1510   IRTemp res2 = newTemp(Ity_I32);
1511   assign(
1512      res2,
1513      binop(Iop_Or32,
1514            mkexpr(res1),
1515            binop(Iop_Shl32,
1516                  unop(Iop_1Uto32,
1517                       binop(Iop_CmpNE32,
1518                             mkexpr(get_QFLAG32()),
1519                             mkU32(0))),
1520                  mkU8(ARMG_CC_SHIFT_Q)))
1521   );
1522   // OR in GE0 .. GE3
1523   IRExpr* ge0
1524      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
1525   IRExpr* ge1
1526      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
1527   IRExpr* ge2
1528      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
1529   IRExpr* ge3
1530      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
1531   IRTemp res3 = newTemp(Ity_I32);
1532   assign(res3,
1533          binop(Iop_Or32,
1534                mkexpr(res2),
1535                binop(Iop_Or32,
1536                      binop(Iop_Or32,
1537                            binop(Iop_Shl32, ge0, mkU8(16)),
1538                            binop(Iop_Shl32, ge1, mkU8(17))),
1539                      binop(Iop_Or32,
1540                            binop(Iop_Shl32, ge2, mkU8(18)),
1541                            binop(Iop_Shl32, ge3, mkU8(19))) )));
1542   return res3;
1543}
1544
1545
1546/* and the inverse transformation: given an APSR value,
1547   set the NZCV thunk, the Q flag, and the GE flags. */
1548static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
1549                                IRTemp apsrT, IRTemp condT )
1550{
1551   vassert(write_nzcvq || write_ge);
1552   if (write_nzcvq) {
1553      // Do NZCV
1554      IRTemp immT = newTemp(Ity_I32);
1555      assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
1556      setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
1557      // Do Q
1558      IRTemp qnewT = newTemp(Ity_I32);
1559      assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
1560      put_QFLAG32(qnewT, condT);
1561   }
1562   if (write_ge) {
1563      // Do GE3..0
1564      put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
1565                   condT);
1566      put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
1567                   condT);
1568      put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
1569                   condT);
1570      put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
1571                   condT);
1572   }
1573}
1574
1575
1576/*------------------------------------------------------------*/
1577/*--- Helpers for saturation                               ---*/
1578/*------------------------------------------------------------*/
1579
1580/* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
1581   (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
1582   (b) the floor is computed from the value of imm5.  these two fnsn
1583   should be commoned up. */
1584
1585/* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
1586   Optionally return flag resQ saying whether saturation occurred.
1587   See definition in manual, section A2.2.1, page 41
1588   (bits(N), boolean) UnsignedSatQ( integer i, integer N )
1589   {
1590     if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
1591     elsif ( i < 0 )    { result = 0; saturated = TRUE; }
1592     else               { result = i; saturated = FALSE; }
1593     return ( result<N-1:0>, saturated );
1594   }
1595*/
1596static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
1597                             IRTemp* resQ, /* OUT - Ity_I32  */
1598                             IRTemp regT,  /* value to clamp - Ity_I32 */
1599                             UInt imm5 )   /* saturation ceiling */
1600{
1601   UInt ceil  = (1 << imm5) - 1;    // (2^imm5)-1
1602   UInt floor = 0;
1603
1604   IRTemp nd0 = newTemp(Ity_I32);
1605   IRTemp nd1 = newTemp(Ity_I32);
1606   IRTemp nd2 = newTemp(Ity_I1);
1607   IRTemp nd3 = newTemp(Ity_I32);
1608   IRTemp nd4 = newTemp(Ity_I32);
1609   IRTemp nd5 = newTemp(Ity_I1);
1610   IRTemp nd6 = newTemp(Ity_I32);
1611
1612   assign( nd0, mkexpr(regT) );
1613   assign( nd1, mkU32(ceil) );
1614   assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1615   assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
1616   assign( nd4, mkU32(floor) );
1617   assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1618   assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
1619   assign( *res, mkexpr(nd6) );
1620
1621   /* if saturation occurred, then resQ is set to some nonzero value
1622      if sat did not occur, resQ is guaranteed to be zero. */
1623   if (resQ) {
1624      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1625   }
1626}
1627
1628
1629/* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
1630   Optionally return flag resQ saying whether saturation occurred.
1631   - see definition in manual, section A2.2.1, page 41
1632   (bits(N), boolean ) SignedSatQ( integer i, integer N )
1633   {
1634     if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
1635     elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
1636     else                      { result = i;           saturated = FALSE; }
1637     return ( result[N-1:0], saturated );
1638   }
1639*/
1640static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
1641                           UInt imm5,      /* saturation ceiling */
1642                           IRTemp* res,    /* OUT - Ity_I32 */
1643                           IRTemp* resQ )  /* OUT - Ity_I32  */
1644{
1645   Int ceil  =  (1 << (imm5-1)) - 1;  //  (2^(imm5-1))-1
1646   Int floor = -(1 << (imm5-1));      // -(2^(imm5-1))
1647
1648   IRTemp nd0 = newTemp(Ity_I32);
1649   IRTemp nd1 = newTemp(Ity_I32);
1650   IRTemp nd2 = newTemp(Ity_I1);
1651   IRTemp nd3 = newTemp(Ity_I32);
1652   IRTemp nd4 = newTemp(Ity_I32);
1653   IRTemp nd5 = newTemp(Ity_I1);
1654   IRTemp nd6 = newTemp(Ity_I32);
1655
1656   assign( nd0, mkexpr(regT) );
1657   assign( nd1, mkU32(ceil) );
1658   assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1659   assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
1660   assign( nd4, mkU32(floor) );
1661   assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1662   assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
1663   assign( *res, mkexpr(nd6) );
1664
1665   /* if saturation occurred, then resQ is set to some nonzero value
1666      if sat did not occur, resQ is guaranteed to be zero. */
1667   if (resQ) {
1668     assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1669   }
1670}
1671
1672
1673/* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
1674   overflow occurred for 32-bit addition.  Needs both args and the
1675   result.  HD p27. */
1676static
1677IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
1678                                      IRTemp argL, IRTemp argR )
1679{
1680   IRTemp res = newTemp(Ity_I32);
1681   assign(res, resE);
1682   return
1683      binop( Iop_Shr32,
1684             binop( Iop_And32,
1685                    binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
1686                    binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
1687             mkU8(31) );
1688}
1689
1690/* Similarly .. also from HD p27 .. */
1691static
1692IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
1693                                      IRTemp argL, IRTemp argR )
1694{
1695   IRTemp res = newTemp(Ity_I32);
1696   assign(res, resE);
1697   return
1698      binop( Iop_Shr32,
1699             binop( Iop_And32,
1700                    binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
1701                    binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
1702             mkU8(31) );
1703}
1704
1705
1706/*------------------------------------------------------------*/
1707/*--- Larger helpers                                       ---*/
1708/*------------------------------------------------------------*/
1709
1710/* Compute both the result and new C flag value for a LSL by an imm5
1711   or by a register operand.  May generate reads of the old C value
1712   (hence only safe to use before any writes to guest state happen).
1713   Are factored out so can be used by both ARM and Thumb.
1714
1715   Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
1716   "res" (the result)  is a.k.a. "shop", shifter operand
1717   "newC" (the new C)  is a.k.a. "shco", shifter carry out
1718
1719   The calling convention for res and newC is a bit funny.  They could
1720   be passed by value, but instead are passed by ref.
1721
1722   The C (shco) value computed must be zero in bits 31:1, as the IR
1723   optimisations for flag handling (guest_arm_spechelper) rely on
1724   that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
1725   for it.  Same applies to all these functions that compute shco
1726   after a shift or rotate, not just this one.
1727*/
1728
1729static void compute_result_and_C_after_LSL_by_imm5 (
1730               /*OUT*/HChar* buf,
1731               IRTemp* res,
1732               IRTemp* newC,
1733               IRTemp rMt, UInt shift_amt, /* operands */
1734               UInt rM      /* only for debug printing */
1735            )
1736{
1737   if (shift_amt == 0) {
1738      if (newC) {
1739         assign( *newC, mk_armg_calculate_flag_c() );
1740      }
1741      assign( *res, mkexpr(rMt) );
1742      DIS(buf, "r%u", rM);
1743   } else {
1744      vassert(shift_amt >= 1 && shift_amt <= 31);
1745      if (newC) {
1746         assign( *newC,
1747                 binop(Iop_And32,
1748                       binop(Iop_Shr32, mkexpr(rMt),
1749                                        mkU8(32 - shift_amt)),
1750                       mkU32(1)));
1751      }
1752      assign( *res,
1753              binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
1754      DIS(buf, "r%u, LSL #%u", rM, shift_amt);
1755   }
1756}
1757
1758
1759static void compute_result_and_C_after_LSL_by_reg (
1760               /*OUT*/HChar* buf,
1761               IRTemp* res,
1762               IRTemp* newC,
1763               IRTemp rMt, IRTemp rSt,  /* operands */
1764               UInt rM,    UInt rS      /* only for debug printing */
1765            )
1766{
1767   // shift left in range 0 .. 255
1768   // amt  = rS & 255
1769   // res  = amt < 32 ?  Rm << amt  : 0
1770   // newC = amt == 0     ? oldC  :
1771   //        amt in 1..32 ?  Rm[32-amt]  : 0
1772   IRTemp amtT = newTemp(Ity_I32);
1773   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1774   if (newC) {
1775      /* mux0X(amt == 0,
1776               mux0X(amt < 32,
1777                     0,
1778                     Rm[(32-amt) & 31]),
1779               oldC)
1780      */
1781      /* About the best you can do is pray that iropt is able
1782         to nuke most or all of the following junk. */
1783      IRTemp oldC = newTemp(Ity_I32);
1784      assign(oldC, mk_armg_calculate_flag_c() );
1785      assign(
1786         *newC,
1787         IRExpr_ITE(
1788            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1789            mkexpr(oldC),
1790            IRExpr_ITE(
1791               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1792               binop(Iop_And32,
1793                     binop(Iop_Shr32,
1794                           mkexpr(rMt),
1795                           unop(Iop_32to8,
1796                                binop(Iop_And32,
1797                                      binop(Iop_Sub32,
1798                                            mkU32(32),
1799                                            mkexpr(amtT)),
1800                                      mkU32(31)
1801                                )
1802                           )
1803                     ),
1804                     mkU32(1)
1805                     ),
1806               mkU32(0)
1807            )
1808         )
1809      );
1810   }
1811   // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1812   // Lhs of the & limits the shift to 31 bits, so as to
1813   // give known IR semantics.  Rhs of the & is all 1s for
1814   // Rs <= 31 and all 0s for Rs >= 32.
1815   assign(
1816      *res,
1817      binop(
1818         Iop_And32,
1819         binop(Iop_Shl32,
1820               mkexpr(rMt),
1821               unop(Iop_32to8,
1822                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1823         binop(Iop_Sar32,
1824               binop(Iop_Sub32,
1825                     mkexpr(amtT),
1826                     mkU32(32)),
1827               mkU8(31))));
1828    DIS(buf, "r%u, LSL r%u", rM, rS);
1829}
1830
1831
1832static void compute_result_and_C_after_LSR_by_imm5 (
1833               /*OUT*/HChar* buf,
1834               IRTemp* res,
1835               IRTemp* newC,
1836               IRTemp rMt, UInt shift_amt, /* operands */
1837               UInt rM      /* only for debug printing */
1838            )
1839{
1840   if (shift_amt == 0) {
1841      // conceptually a 32-bit shift, however:
1842      // res  = 0
1843      // newC = Rm[31]
1844      if (newC) {
1845         assign( *newC,
1846                 binop(Iop_And32,
1847                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1848                       mkU32(1)));
1849      }
1850      assign( *res, mkU32(0) );
1851      DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
1852   } else {
1853      // shift in range 1..31
1854      // res  = Rm >>u shift_amt
1855      // newC = Rm[shift_amt - 1]
1856      vassert(shift_amt >= 1 && shift_amt <= 31);
1857      if (newC) {
1858         assign( *newC,
1859                 binop(Iop_And32,
1860                       binop(Iop_Shr32, mkexpr(rMt),
1861                                        mkU8(shift_amt - 1)),
1862                       mkU32(1)));
1863      }
1864      assign( *res,
1865              binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
1866      DIS(buf, "r%u, LSR #%u", rM, shift_amt);
1867   }
1868}
1869
1870
1871static void compute_result_and_C_after_LSR_by_reg (
1872               /*OUT*/HChar* buf,
1873               IRTemp* res,
1874               IRTemp* newC,
1875               IRTemp rMt, IRTemp rSt,  /* operands */
1876               UInt rM,    UInt rS      /* only for debug printing */
1877            )
1878{
1879   // shift right in range 0 .. 255
1880   // amt = rS & 255
1881   // res  = amt < 32 ?  Rm >>u amt  : 0
1882   // newC = amt == 0     ? oldC  :
1883   //        amt in 1..32 ?  Rm[amt-1]  : 0
1884   IRTemp amtT = newTemp(Ity_I32);
1885   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1886   if (newC) {
1887      /* mux0X(amt == 0,
1888               mux0X(amt < 32,
1889                     0,
1890                     Rm[(amt-1) & 31]),
1891               oldC)
1892      */
1893      IRTemp oldC = newTemp(Ity_I32);
1894      assign(oldC, mk_armg_calculate_flag_c() );
1895      assign(
1896         *newC,
1897         IRExpr_ITE(
1898            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1899            mkexpr(oldC),
1900            IRExpr_ITE(
1901               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1902               binop(Iop_And32,
1903                     binop(Iop_Shr32,
1904                           mkexpr(rMt),
1905                           unop(Iop_32to8,
1906                                binop(Iop_And32,
1907                                      binop(Iop_Sub32,
1908                                            mkexpr(amtT),
1909                                            mkU32(1)),
1910                                      mkU32(31)
1911                                )
1912                           )
1913                     ),
1914                     mkU32(1)
1915                     ),
1916               mkU32(0)
1917            )
1918         )
1919      );
1920   }
1921   // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1922   // Lhs of the & limits the shift to 31 bits, so as to
1923   // give known IR semantics.  Rhs of the & is all 1s for
1924   // Rs <= 31 and all 0s for Rs >= 32.
1925   assign(
1926      *res,
1927      binop(
1928         Iop_And32,
1929         binop(Iop_Shr32,
1930               mkexpr(rMt),
1931               unop(Iop_32to8,
1932                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1933         binop(Iop_Sar32,
1934               binop(Iop_Sub32,
1935                     mkexpr(amtT),
1936                     mkU32(32)),
1937               mkU8(31))));
1938    DIS(buf, "r%u, LSR r%u", rM, rS);
1939}
1940
1941
1942static void compute_result_and_C_after_ASR_by_imm5 (
1943               /*OUT*/HChar* buf,
1944               IRTemp* res,
1945               IRTemp* newC,
1946               IRTemp rMt, UInt shift_amt, /* operands */
1947               UInt rM      /* only for debug printing */
1948            )
1949{
1950   if (shift_amt == 0) {
1951      // conceptually a 32-bit shift, however:
1952      // res  = Rm >>s 31
1953      // newC = Rm[31]
1954      if (newC) {
1955         assign( *newC,
1956                 binop(Iop_And32,
1957                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1958                       mkU32(1)));
1959      }
1960      assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
1961      DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
1962   } else {
1963      // shift in range 1..31
1964      // res = Rm >>s shift_amt
1965      // newC = Rm[shift_amt - 1]
1966      vassert(shift_amt >= 1 && shift_amt <= 31);
1967      if (newC) {
1968         assign( *newC,
1969                 binop(Iop_And32,
1970                       binop(Iop_Shr32, mkexpr(rMt),
1971                                        mkU8(shift_amt - 1)),
1972                       mkU32(1)));
1973      }
1974      assign( *res,
1975              binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
1976      DIS(buf, "r%u, ASR #%u", rM, shift_amt);
1977   }
1978}
1979
1980
1981static void compute_result_and_C_after_ASR_by_reg (
1982               /*OUT*/HChar* buf,
1983               IRTemp* res,
1984               IRTemp* newC,
1985               IRTemp rMt, IRTemp rSt,  /* operands */
1986               UInt rM,    UInt rS      /* only for debug printing */
1987            )
1988{
1989   // arithmetic shift right in range 0 .. 255
1990   // amt = rS & 255
1991   // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
1992   // newC = amt == 0     ? oldC  :
1993   //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
1994   IRTemp amtT = newTemp(Ity_I32);
1995   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1996   if (newC) {
1997      /* mux0X(amt == 0,
1998               mux0X(amt < 32,
1999                     Rm[31],
2000                     Rm[(amt-1) & 31])
2001               oldC)
2002      */
2003      IRTemp oldC = newTemp(Ity_I32);
2004      assign(oldC, mk_armg_calculate_flag_c() );
2005      assign(
2006         *newC,
2007         IRExpr_ITE(
2008            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
2009            mkexpr(oldC),
2010            IRExpr_ITE(
2011               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
2012               binop(Iop_And32,
2013                     binop(Iop_Shr32,
2014                           mkexpr(rMt),
2015                           unop(Iop_32to8,
2016                                binop(Iop_And32,
2017                                      binop(Iop_Sub32,
2018                                            mkexpr(amtT),
2019                                            mkU32(1)),
2020                                      mkU32(31)
2021                                )
2022                           )
2023                     ),
2024                     mkU32(1)
2025                     ),
2026               binop(Iop_And32,
2027                     binop(Iop_Shr32,
2028                           mkexpr(rMt),
2029                           mkU8(31)
2030                     ),
2031                     mkU32(1)
2032               )
2033            )
2034         )
2035      );
2036   }
2037   // (Rm >>s (amt <u 32 ? amt : 31))
2038   assign(
2039      *res,
2040      binop(
2041         Iop_Sar32,
2042         mkexpr(rMt),
2043         unop(
2044            Iop_32to8,
2045            IRExpr_ITE(
2046               binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
2047               mkexpr(amtT),
2048               mkU32(31)))));
2049    DIS(buf, "r%u, ASR r%u", rM, rS);
2050}
2051
2052
2053static void compute_result_and_C_after_ROR_by_reg (
2054               /*OUT*/HChar* buf,
2055               IRTemp* res,
2056               IRTemp* newC,
2057               IRTemp rMt, IRTemp rSt,  /* operands */
2058               UInt rM,    UInt rS      /* only for debug printing */
2059            )
2060{
2061   // rotate right in range 0 .. 255
2062   // amt = rS & 255
2063   // shop =  Rm `ror` (amt & 31)
2064   // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
2065   IRTemp amtT = newTemp(Ity_I32);
2066   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2067   IRTemp amt5T = newTemp(Ity_I32);
2068   assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
2069   IRTemp oldC = newTemp(Ity_I32);
2070   assign(oldC, mk_armg_calculate_flag_c() );
2071   if (newC) {
2072      assign(
2073         *newC,
2074         IRExpr_ITE(
2075            binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
2076            binop(Iop_And32,
2077                  binop(Iop_Shr32,
2078                        mkexpr(rMt),
2079                        unop(Iop_32to8,
2080                             binop(Iop_And32,
2081                                   binop(Iop_Sub32,
2082                                         mkexpr(amtT),
2083                                         mkU32(1)
2084                                   ),
2085                                   mkU32(31)
2086                             )
2087                        )
2088                  ),
2089                  mkU32(1)
2090            ),
2091            mkexpr(oldC)
2092         )
2093      );
2094   }
2095   assign(
2096      *res,
2097      IRExpr_ITE(
2098         binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
2099         binop(Iop_Or32,
2100               binop(Iop_Shr32,
2101                     mkexpr(rMt),
2102                     unop(Iop_32to8, mkexpr(amt5T))
2103               ),
2104               binop(Iop_Shl32,
2105                     mkexpr(rMt),
2106                     unop(Iop_32to8,
2107                          binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
2108                     )
2109               )
2110               ),
2111         mkexpr(rMt)
2112      )
2113   );
2114   DIS(buf, "r%u, ROR r#%u", rM, rS);
2115}
2116
2117
2118/* Generate an expression corresponding to the immediate-shift case of
2119   a shifter operand.  This is used both for ARM and Thumb2.
2120
2121   Bind it to a temporary, and return that via *res.  If newC is
2122   non-NULL, also compute a value for the shifter's carry out (in the
2123   LSB of a word), bind it to a temporary, and return that via *shco.
2124
2125   Generates GETs from the guest state and is therefore not safe to
2126   use once we start doing PUTs to it, for any given instruction.
2127
2128   'how' is encoded thusly:
2129      00b LSL,  01b LSR,  10b ASR,  11b ROR
2130   Most but not all ARM and Thumb integer insns use this encoding.
2131   Be careful to ensure the right value is passed here.
2132*/
2133static void compute_result_and_C_after_shift_by_imm5 (
2134               /*OUT*/HChar* buf,
2135               /*OUT*/IRTemp* res,
2136               /*OUT*/IRTemp* newC,
2137               IRTemp  rMt,       /* reg to shift */
2138               UInt    how,       /* what kind of shift */
2139               UInt    shift_amt, /* shift amount (0..31) */
2140               UInt    rM         /* only for debug printing */
2141            )
2142{
2143   vassert(shift_amt < 32);
2144   vassert(how < 4);
2145
2146   switch (how) {
2147
2148      case 0:
2149         compute_result_and_C_after_LSL_by_imm5(
2150            buf, res, newC, rMt, shift_amt, rM
2151         );
2152         break;
2153
2154      case 1:
2155         compute_result_and_C_after_LSR_by_imm5(
2156            buf, res, newC, rMt, shift_amt, rM
2157         );
2158         break;
2159
2160      case 2:
2161         compute_result_and_C_after_ASR_by_imm5(
2162            buf, res, newC, rMt, shift_amt, rM
2163         );
2164         break;
2165
2166      case 3:
2167         if (shift_amt == 0) {
2168            IRTemp oldcT = newTemp(Ity_I32);
2169            // rotate right 1 bit through carry (?)
2170            // RRX -- described at ARM ARM A5-17
2171            // res  = (oldC << 31) | (Rm >>u 1)
2172            // newC = Rm[0]
2173            if (newC) {
2174               assign( *newC,
2175                       binop(Iop_And32, mkexpr(rMt), mkU32(1)));
2176            }
2177            assign( oldcT, mk_armg_calculate_flag_c() );
2178            assign( *res,
2179                    binop(Iop_Or32,
2180                          binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
2181                          binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
2182            DIS(buf, "r%u, RRX", rM);
2183         } else {
2184            // rotate right in range 1..31
2185            // res  = Rm `ror` shift_amt
2186            // newC = Rm[shift_amt - 1]
2187            vassert(shift_amt >= 1 && shift_amt <= 31);
2188            if (newC) {
2189               assign( *newC,
2190                       binop(Iop_And32,
2191                             binop(Iop_Shr32, mkexpr(rMt),
2192                                              mkU8(shift_amt - 1)),
2193                             mkU32(1)));
2194            }
2195            assign( *res,
2196                    binop(Iop_Or32,
2197                          binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
2198                          binop(Iop_Shl32, mkexpr(rMt),
2199                                           mkU8(32-shift_amt))));
2200            DIS(buf, "r%u, ROR #%u", rM, shift_amt);
2201         }
2202         break;
2203
2204      default:
2205         /*NOTREACHED*/
2206         vassert(0);
2207   }
2208}
2209
2210
2211/* Generate an expression corresponding to the register-shift case of
2212   a shifter operand.  This is used both for ARM and Thumb2.
2213
2214   Bind it to a temporary, and return that via *res.  If newC is
2215   non-NULL, also compute a value for the shifter's carry out (in the
2216   LSB of a word), bind it to a temporary, and return that via *shco.
2217
2218   Generates GETs from the guest state and is therefore not safe to
2219   use once we start doing PUTs to it, for any given instruction.
2220
2221   'how' is encoded thusly:
2222      00b LSL,  01b LSR,  10b ASR,  11b ROR
2223   Most but not all ARM and Thumb integer insns use this encoding.
2224   Be careful to ensure the right value is passed here.
2225*/
2226static void compute_result_and_C_after_shift_by_reg (
2227               /*OUT*/HChar*  buf,
2228               /*OUT*/IRTemp* res,
2229               /*OUT*/IRTemp* newC,
2230               IRTemp  rMt,       /* reg to shift */
2231               UInt    how,       /* what kind of shift */
2232               IRTemp  rSt,       /* shift amount */
2233               UInt    rM,        /* only for debug printing */
2234               UInt    rS         /* only for debug printing */
2235            )
2236{
2237   vassert(how < 4);
2238   switch (how) {
2239      case 0: { /* LSL */
2240         compute_result_and_C_after_LSL_by_reg(
2241            buf, res, newC, rMt, rSt, rM, rS
2242         );
2243         break;
2244      }
2245      case 1: { /* LSR */
2246         compute_result_and_C_after_LSR_by_reg(
2247            buf, res, newC, rMt, rSt, rM, rS
2248         );
2249         break;
2250      }
2251      case 2: { /* ASR */
2252         compute_result_and_C_after_ASR_by_reg(
2253            buf, res, newC, rMt, rSt, rM, rS
2254         );
2255         break;
2256      }
2257      case 3: { /* ROR */
2258         compute_result_and_C_after_ROR_by_reg(
2259             buf, res, newC, rMt, rSt, rM, rS
2260         );
2261         break;
2262      }
2263      default:
2264         /*NOTREACHED*/
2265         vassert(0);
2266   }
2267}
2268
2269
2270/* Generate an expression corresponding to a shifter_operand, bind it
2271   to a temporary, and return that via *shop.  If shco is non-NULL,
2272   also compute a value for the shifter's carry out (in the LSB of a
2273   word), bind it to a temporary, and return that via *shco.
2274
2275   If for some reason we can't come up with a shifter operand (missing
2276   case?  not really a shifter operand?) return False.
2277
2278   Generates GETs from the guest state and is therefore not safe to
2279   use once we start doing PUTs to it, for any given instruction.
2280
2281   For ARM insns only; not for Thumb.
2282*/
2283static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
2284                                 /*OUT*/IRTemp* shop,
2285                                 /*OUT*/IRTemp* shco,
2286                                 /*OUT*/HChar* buf )
2287{
2288   UInt insn_4 = (insn_11_0 >> 4) & 1;
2289   UInt insn_7 = (insn_11_0 >> 7) & 1;
2290   vassert(insn_25 <= 0x1);
2291   vassert(insn_11_0 <= 0xFFF);
2292
2293   vassert(shop && *shop == IRTemp_INVALID);
2294   *shop = newTemp(Ity_I32);
2295
2296   if (shco) {
2297      vassert(*shco == IRTemp_INVALID);
2298      *shco = newTemp(Ity_I32);
2299   }
2300
2301   /* 32-bit immediate */
2302
2303   if (insn_25 == 1) {
2304      /* immediate: (7:0) rotated right by 2 * (11:8) */
2305      UInt imm = (insn_11_0 >> 0) & 0xFF;
2306      UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
2307      vassert(rot <= 30);
2308      imm = ROR32(imm, rot);
2309      if (shco) {
2310         if (rot == 0) {
2311            assign( *shco, mk_armg_calculate_flag_c() );
2312         } else {
2313            assign( *shco, mkU32( (imm >> 31) & 1 ) );
2314         }
2315      }
2316      DIS(buf, "#0x%x", imm);
2317      assign( *shop, mkU32(imm) );
2318      return True;
2319   }
2320
2321   /* Shift/rotate by immediate */
2322
2323   if (insn_25 == 0 && insn_4 == 0) {
2324      /* Rm (3:0) shifted (6:5) by immediate (11:7) */
2325      UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
2326      UInt rM        = (insn_11_0 >> 0) & 0xF;
2327      UInt how       = (insn_11_0 >> 5) & 3;
2328      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2329      IRTemp rMt = newTemp(Ity_I32);
2330      assign(rMt, getIRegA(rM));
2331
2332      vassert(shift_amt <= 31);
2333
2334      compute_result_and_C_after_shift_by_imm5(
2335         buf, shop, shco, rMt, how, shift_amt, rM
2336      );
2337      return True;
2338   }
2339
2340   /* Shift/rotate by register */
2341   if (insn_25 == 0 && insn_4 == 1) {
2342      /* Rm (3:0) shifted (6:5) by Rs (11:8) */
2343      UInt rM  = (insn_11_0 >> 0) & 0xF;
2344      UInt rS  = (insn_11_0 >> 8) & 0xF;
2345      UInt how = (insn_11_0 >> 5) & 3;
2346      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2347      IRTemp rMt = newTemp(Ity_I32);
2348      IRTemp rSt = newTemp(Ity_I32);
2349
2350      if (insn_7 == 1)
2351         return False; /* not really a shifter operand */
2352
2353      assign(rMt, getIRegA(rM));
2354      assign(rSt, getIRegA(rS));
2355
2356      compute_result_and_C_after_shift_by_reg(
2357         buf, shop, shco, rMt, how, rSt, rM, rS
2358      );
2359      return True;
2360   }
2361
2362   vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
2363   return False;
2364}
2365
2366
2367/* ARM only */
2368static
2369IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
2370                                    /*OUT*/HChar* buf )
2371{
2372   vassert(rN < 16);
2373   vassert(bU < 2);
2374   vassert(imm12 < 0x1000);
2375   HChar opChar = bU == 1 ? '+' : '-';
2376   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
2377   return
2378      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2379             getIRegA(rN),
2380             mkU32(imm12) );
2381}
2382
2383
2384/* ARM only.
2385   NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
2386*/
2387static
2388IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
2389                                          UInt sh2, UInt imm5,
2390                                          /*OUT*/HChar* buf )
2391{
2392   vassert(rN < 16);
2393   vassert(bU < 2);
2394   vassert(rM < 16);
2395   vassert(sh2 < 4);
2396   vassert(imm5 < 32);
2397   HChar   opChar = bU == 1 ? '+' : '-';
2398   IRExpr* index  = NULL;
2399   switch (sh2) {
2400      case 0: /* LSL */
2401         /* imm5 can be in the range 0 .. 31 inclusive. */
2402         index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
2403         DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
2404         break;
2405      case 1: /* LSR */
2406         if (imm5 == 0) {
2407            index = mkU32(0);
2408            vassert(0); // ATC
2409         } else {
2410            index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
2411         }
2412         DIS(buf, "[r%u, %cr%u, LSR #%u]",
2413                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2414         break;
2415      case 2: /* ASR */
2416         /* Doesn't this just mean that the behaviour with imm5 == 0
2417            is the same as if it had been 31 ? */
2418         if (imm5 == 0) {
2419            index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
2420            vassert(0); // ATC
2421         } else {
2422            index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
2423         }
2424         DIS(buf, "[r%u, %cr%u, ASR #%u]",
2425                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2426         break;
2427      case 3: /* ROR or RRX */
2428         if (imm5 == 0) {
2429            IRTemp rmT    = newTemp(Ity_I32);
2430            IRTemp cflagT = newTemp(Ity_I32);
2431            assign(rmT, getIRegA(rM));
2432            assign(cflagT, mk_armg_calculate_flag_c());
2433            index = binop(Iop_Or32,
2434                          binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
2435                          binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
2436            DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
2437         } else {
2438            IRTemp rmT = newTemp(Ity_I32);
2439            assign(rmT, getIRegA(rM));
2440            vassert(imm5 >= 1 && imm5 <= 31);
2441            index = binop(Iop_Or32,
2442                          binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
2443                          binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
2444            DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
2445         }
2446         break;
2447      default:
2448         vassert(0);
2449   }
2450   vassert(index);
2451   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2452                getIRegA(rN), index);
2453}
2454
2455
2456/* ARM only */
2457static
2458IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
2459                                   /*OUT*/HChar* buf )
2460{
2461   vassert(rN < 16);
2462   vassert(bU < 2);
2463   vassert(imm8 < 0x100);
2464   HChar opChar = bU == 1 ? '+' : '-';
2465   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
2466   return
2467      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2468             getIRegA(rN),
2469             mkU32(imm8) );
2470}
2471
2472
2473/* ARM only */
2474static
2475IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
2476                                  /*OUT*/HChar* buf )
2477{
2478   vassert(rN < 16);
2479   vassert(bU < 2);
2480   vassert(rM < 16);
2481   HChar   opChar = bU == 1 ? '+' : '-';
2482   IRExpr* index  = getIRegA(rM);
2483   DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
2484   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2485                getIRegA(rN), index);
2486}
2487
2488
2489/* irRes :: Ity_I32 holds a floating point comparison result encoded
2490   as an IRCmpF64Result.  Generate code to convert it to an
2491   ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
2492   Assign a new temp to hold that value, and return the temp. */
2493static
2494IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
2495{
2496   IRTemp ix       = newTemp(Ity_I32);
2497   IRTemp termL    = newTemp(Ity_I32);
2498   IRTemp termR    = newTemp(Ity_I32);
2499   IRTemp nzcv     = newTemp(Ity_I32);
2500
2501   /* This is where the fun starts.  We have to convert 'irRes' from
2502      an IR-convention return result (IRCmpF64Result) to an
2503      ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2504      4 bits of 'nzcv'. */
2505   /* Map compare result from IR to ARM(nzcv) */
2506   /*
2507      FP cmp result | IR   | ARM(nzcv)
2508      --------------------------------
2509      UN              0x45   0011
2510      LT              0x01   1000
2511      GT              0x00   0010
2512      EQ              0x40   0110
2513   */
2514   /* Now since you're probably wondering WTF ..
2515
2516      ix fishes the useful bits out of the IR value, bits 6 and 0, and
2517      places them side by side, giving a number which is 0, 1, 2 or 3.
2518
2519      termL is a sequence cooked up by GNU superopt.  It converts ix
2520         into an almost correct value NZCV value (incredibly), except
2521         for the case of UN, where it produces 0100 instead of the
2522         required 0011.
2523
2524      termR is therefore a correction term, also computed from ix.  It
2525         is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2526         the final correct value, we subtract termR from termL.
2527
2528      Don't take my word for it.  There's a test program at the bottom
2529      of this file, to try this out with.
2530   */
2531   assign(
2532      ix,
2533      binop(Iop_Or32,
2534            binop(Iop_And32,
2535                  binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
2536                  mkU32(3)),
2537            binop(Iop_And32, mkexpr(irRes), mkU32(1))));
2538
2539   assign(
2540      termL,
2541      binop(Iop_Add32,
2542            binop(Iop_Shr32,
2543                  binop(Iop_Sub32,
2544                        binop(Iop_Shl32,
2545                              binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
2546                              mkU8(30)),
2547                        mkU32(1)),
2548                  mkU8(29)),
2549            mkU32(1)));
2550
2551   assign(
2552      termR,
2553      binop(Iop_And32,
2554            binop(Iop_And32,
2555                  mkexpr(ix),
2556                  binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
2557            mkU32(1)));
2558
2559   assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
2560   return nzcv;
2561}
2562
2563
2564/* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
2565   updatesC is non-NULL, a boolean is written to it indicating whether
2566   or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
2567*/
2568static UInt thumbExpandImm ( Bool* updatesC,
2569                             UInt imm1, UInt imm3, UInt imm8 )
2570{
2571   vassert(imm1 < (1<<1));
2572   vassert(imm3 < (1<<3));
2573   vassert(imm8 < (1<<8));
2574   UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
2575   UInt abcdefgh = imm8;
2576   UInt lbcdefgh = imm8 | 0x80;
2577   if (updatesC) {
2578      *updatesC = i_imm3_a >= 8;
2579   }
2580   switch (i_imm3_a) {
2581      case 0: case 1:
2582         return abcdefgh;
2583      case 2: case 3:
2584         return (abcdefgh << 16) | abcdefgh;
2585      case 4: case 5:
2586         return (abcdefgh << 24) | (abcdefgh << 8);
2587      case 6: case 7:
2588         return (abcdefgh << 24) | (abcdefgh << 16)
2589                | (abcdefgh << 8) | abcdefgh;
2590      case 8 ... 31:
2591         return lbcdefgh << (32 - i_imm3_a);
2592      default:
2593         break;
2594   }
2595   /*NOTREACHED*/vassert(0);
2596}
2597
2598
2599/* Version of thumbExpandImm where we simply feed it the
2600   instruction halfwords (the lowest addressed one is I0). */
2601static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
2602                                        UShort i0s, UShort i1s )
2603{
2604   UInt i0    = (UInt)i0s;
2605   UInt i1    = (UInt)i1s;
2606   UInt imm1  = SLICE_UInt(i0,10,10);
2607   UInt imm3  = SLICE_UInt(i1,14,12);
2608   UInt imm8  = SLICE_UInt(i1,7,0);
2609   return thumbExpandImm(updatesC, imm1, imm3, imm8);
2610}
2611
2612
2613/* Thumb16 only.  Given the firstcond and mask fields from an IT
2614   instruction, compute the 32-bit ITSTATE value implied, as described
2615   in libvex_guest_arm.h.  This is not the ARM ARM representation.
2616   Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
2617   disassembly printing.  Returns False if firstcond or mask
2618   denote something invalid.
2619
2620   The number and conditions for the instructions to be
2621   conditionalised depend on firstcond and mask:
2622
2623   mask      cond 1    cond 2      cond 3      cond 4
2624
2625   1000      fc[3:0]
2626   x100      fc[3:0]   fc[3:1]:x
2627   xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
2628   xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
2629
2630   The condition fields are assembled in *itstate backwards (cond 4 at
2631   the top, cond 1 at the bottom).  Conditions are << 4'd and then
2632   ^0xE'd, and those fields that correspond to instructions in the IT
2633   block are tagged with a 1 bit.
2634*/
2635static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
2636                              /*OUT*/HChar* ch1,
2637                              /*OUT*/HChar* ch2,
2638                              /*OUT*/HChar* ch3,
2639                              UInt firstcond, UInt mask )
2640{
2641   vassert(firstcond <= 0xF);
2642   vassert(mask <= 0xF);
2643   *itstate = 0;
2644   *ch1 = *ch2 = *ch3 = '.';
2645   if (mask == 0)
2646      return False; /* the logic below actually ensures this anyway,
2647                       but clearer to make it explicit. */
2648   if (firstcond == 0xF)
2649      return False; /* NV is not allowed */
2650   if (firstcond == 0xE && popcount32(mask) != 1)
2651      return False; /* if firstcond is AL then all the rest must be too */
2652
2653   UInt m3 = (mask >> 3) & 1;
2654   UInt m2 = (mask >> 2) & 1;
2655   UInt m1 = (mask >> 1) & 1;
2656   UInt m0 = (mask >> 0) & 1;
2657
2658   UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
2659   UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
2660
2661   if (m3 == 1 && (m2|m1|m0) == 0) {
2662      *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
2663      *itstate ^= 0xE0E0E0E0;
2664      return True;
2665   }
2666
2667   if (m2 == 1 && (m1|m0) == 0) {
2668      *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
2669      *itstate ^= 0xE0E0E0E0;
2670      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2671      return True;
2672   }
2673
2674   if (m1 == 1 && m0 == 0) {
2675      *itstate = (ni << 24)
2676                 | (setbit32(fc, 4, m2) << 16)
2677                 | (setbit32(fc, 4, m3) << 8) | fc;
2678      *itstate ^= 0xE0E0E0E0;
2679      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2680      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2681      return True;
2682   }
2683
2684   if (m0 == 1) {
2685      *itstate = (setbit32(fc, 4, m1) << 24)
2686                 | (setbit32(fc, 4, m2) << 16)
2687                 | (setbit32(fc, 4, m3) << 8) | fc;
2688      *itstate ^= 0xE0E0E0E0;
2689      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2690      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2691      *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
2692      return True;
2693   }
2694
2695   return False;
2696}
2697
2698
2699/* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
2700   Chapter 7 Section 1. */
2701static IRTemp gen_BITREV ( IRTemp x0 )
2702{
2703   IRTemp x1 = newTemp(Ity_I32);
2704   IRTemp x2 = newTemp(Ity_I32);
2705   IRTemp x3 = newTemp(Ity_I32);
2706   IRTemp x4 = newTemp(Ity_I32);
2707   IRTemp x5 = newTemp(Ity_I32);
2708   UInt   c1 = 0x55555555;
2709   UInt   c2 = 0x33333333;
2710   UInt   c3 = 0x0F0F0F0F;
2711   UInt   c4 = 0x00FF00FF;
2712   UInt   c5 = 0x0000FFFF;
2713   assign(x1,
2714          binop(Iop_Or32,
2715                binop(Iop_Shl32,
2716                      binop(Iop_And32, mkexpr(x0), mkU32(c1)),
2717                      mkU8(1)),
2718                binop(Iop_Shr32,
2719                      binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
2720                      mkU8(1))
2721   ));
2722   assign(x2,
2723          binop(Iop_Or32,
2724                binop(Iop_Shl32,
2725                      binop(Iop_And32, mkexpr(x1), mkU32(c2)),
2726                      mkU8(2)),
2727                binop(Iop_Shr32,
2728                      binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
2729                      mkU8(2))
2730   ));
2731   assign(x3,
2732          binop(Iop_Or32,
2733                binop(Iop_Shl32,
2734                      binop(Iop_And32, mkexpr(x2), mkU32(c3)),
2735                      mkU8(4)),
2736                binop(Iop_Shr32,
2737                      binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
2738                      mkU8(4))
2739   ));
2740   assign(x4,
2741          binop(Iop_Or32,
2742                binop(Iop_Shl32,
2743                      binop(Iop_And32, mkexpr(x3), mkU32(c4)),
2744                      mkU8(8)),
2745                binop(Iop_Shr32,
2746                      binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
2747                      mkU8(8))
2748   ));
2749   assign(x5,
2750          binop(Iop_Or32,
2751                binop(Iop_Shl32,
2752                      binop(Iop_And32, mkexpr(x4), mkU32(c5)),
2753                      mkU8(16)),
2754                binop(Iop_Shr32,
2755                      binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
2756                      mkU8(16))
2757   ));
2758   return x5;
2759}
2760
2761
2762/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2763   0:1:2:3 (aka byte-swap). */
2764static IRTemp gen_REV ( IRTemp arg )
2765{
2766   IRTemp res = newTemp(Ity_I32);
2767   assign(res,
2768          binop(Iop_Or32,
2769                binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
2770          binop(Iop_Or32,
2771                binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2772                                 mkU32(0x00FF0000)),
2773          binop(Iop_Or32,
2774                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2775                                       mkU32(0x0000FF00)),
2776                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
2777                                       mkU32(0x000000FF) )
2778   ))));
2779   return res;
2780}
2781
2782
2783/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2784   2:3:0:1 (swap within lo and hi halves). */
2785static IRTemp gen_REV16 ( IRTemp arg )
2786{
2787   IRTemp res = newTemp(Ity_I32);
2788   assign(res,
2789          binop(Iop_Or32,
2790                binop(Iop_And32,
2791                      binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2792                      mkU32(0xFF00FF00)),
2793                binop(Iop_And32,
2794                      binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2795                      mkU32(0x00FF00FF))));
2796   return res;
2797}
2798
2799
2800/*------------------------------------------------------------*/
2801/*--- Advanced SIMD (NEON) instructions                    ---*/
2802/*------------------------------------------------------------*/
2803
2804/*------------------------------------------------------------*/
2805/*--- NEON data processing                                 ---*/
2806/*------------------------------------------------------------*/
2807
2808/* For all NEON DP ops, we use the normal scheme to handle conditional
2809   writes to registers -- pass in condT and hand that on to the
2810   put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
2811   since NEON is unconditional for ARM.  In Thumb mode condT is
2812   derived from the ITSTATE shift register in the normal way. */
2813
2814static
2815UInt get_neon_d_regno(UInt theInstr)
2816{
2817   UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2818   if (theInstr & 0x40) {
2819      if (x & 1) {
2820         x = x + 0x100;
2821      } else {
2822         x = x >> 1;
2823      }
2824   }
2825   return x;
2826}
2827
2828static
2829UInt get_neon_n_regno(UInt theInstr)
2830{
2831   UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
2832   if (theInstr & 0x40) {
2833      if (x & 1) {
2834         x = x + 0x100;
2835      } else {
2836         x = x >> 1;
2837      }
2838   }
2839   return x;
2840}
2841
2842static
2843UInt get_neon_m_regno(UInt theInstr)
2844{
2845   UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2846   if (theInstr & 0x40) {
2847      if (x & 1) {
2848         x = x + 0x100;
2849      } else {
2850         x = x >> 1;
2851      }
2852   }
2853   return x;
2854}
2855
2856static
2857Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
2858{
2859   UInt dreg = get_neon_d_regno(theInstr);
2860   UInt mreg = get_neon_m_regno(theInstr);
2861   UInt nreg = get_neon_n_regno(theInstr);
2862   UInt imm4 = (theInstr >> 8) & 0xf;
2863   UInt Q = (theInstr >> 6) & 1;
2864   HChar reg_t = Q ? 'q' : 'd';
2865
2866   if (Q) {
2867      putQReg(dreg, triop(Iop_ExtractV128, getQReg(nreg),
2868               getQReg(mreg), mkU8(imm4)), condT);
2869   } else {
2870      putDRegI64(dreg, triop(Iop_Extract64, getDRegI64(nreg),
2871                 getDRegI64(mreg), mkU8(imm4)), condT);
2872   }
2873   DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg,
2874                                         reg_t, mreg, imm4);
2875   return True;
2876}
2877
2878/* Generate specific vector FP binary ops, possibly with a fake
2879   rounding mode as required by the primop. */
2880static
2881IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
2882{
2883   switch (op) {
2884      case Iop_Add32Fx4:
2885      case Iop_Sub32Fx4:
2886      case Iop_Mul32Fx4:
2887         return triop(op, get_FAKE_roundingmode(), argL, argR );
2888      case Iop_Add32x4: case Iop_Add16x8:
2889      case Iop_Sub32x4: case Iop_Sub16x8:
2890      case Iop_Mul32x4: case Iop_Mul16x8:
2891      case Iop_Mul32x2: case Iop_Mul16x4:
2892      case Iop_Add32Fx2:
2893      case Iop_Sub32Fx2:
2894      case Iop_Mul32Fx2:
2895      case Iop_PwAdd32Fx2:
2896         return binop(op, argL, argR);
2897      default:
2898        ppIROp(op);
2899        vassert(0);
2900   }
2901}
2902
2903/* VTBL, VTBX */
2904static
2905Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
2906{
2907   UInt op = (theInstr >> 6) & 1;
2908   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
2909   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
2910   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
2911   UInt len = (theInstr >> 8) & 3;
2912   Int i;
2913   IROp cmp;
2914   ULong imm;
2915   IRTemp arg_l;
2916   IRTemp old_mask, new_mask, cur_mask;
2917   IRTemp old_res, new_res;
2918   IRTemp old_arg, new_arg;
2919
2920   if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
2921      return False;
2922   if (nreg + len > 31)
2923      return False;
2924
2925   cmp = Iop_CmpGT8Ux8;
2926
2927   old_mask = newTemp(Ity_I64);
2928   old_res = newTemp(Ity_I64);
2929   old_arg = newTemp(Ity_I64);
2930   assign(old_mask, mkU64(0));
2931   assign(old_res, mkU64(0));
2932   assign(old_arg, getDRegI64(mreg));
2933   imm = 8;
2934   imm = (imm <<  8) | imm;
2935   imm = (imm << 16) | imm;
2936   imm = (imm << 32) | imm;
2937
2938   for (i = 0; i <= len; i++) {
2939      arg_l = newTemp(Ity_I64);
2940      new_mask = newTemp(Ity_I64);
2941      cur_mask = newTemp(Ity_I64);
2942      new_res = newTemp(Ity_I64);
2943      new_arg = newTemp(Ity_I64);
2944      assign(arg_l, getDRegI64(nreg+i));
2945      assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
2946      assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
2947      assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
2948      assign(new_res, binop(Iop_Or64,
2949                            mkexpr(old_res),
2950                            binop(Iop_And64,
2951                                  binop(Iop_Perm8x8,
2952                                        mkexpr(arg_l),
2953                                        binop(Iop_And64,
2954                                              mkexpr(old_arg),
2955                                              mkexpr(cur_mask))),
2956                                  mkexpr(cur_mask))));
2957
2958      old_arg = new_arg;
2959      old_mask = new_mask;
2960      old_res = new_res;
2961   }
2962   if (op) {
2963      new_res = newTemp(Ity_I64);
2964      assign(new_res, binop(Iop_Or64,
2965                            binop(Iop_And64,
2966                                  getDRegI64(dreg),
2967                                  unop(Iop_Not64, mkexpr(old_mask))),
2968                            mkexpr(old_res)));
2969      old_res = new_res;
2970   }
2971
2972   putDRegI64(dreg, mkexpr(old_res), condT);
2973   DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
2974   if (len > 0) {
2975      DIP("d%u-d%u", nreg, nreg + len);
2976   } else {
2977      DIP("d%u", nreg);
2978   }
2979   DIP("}, d%u\n", mreg);
2980   return True;
2981}
2982
2983/* VDUP (scalar)  */
2984static
2985Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
2986{
2987   UInt Q = (theInstr >> 6) & 1;
2988   UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2989   UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2990   UInt imm4 = (theInstr >> 16) & 0xF;
2991   UInt index;
2992   UInt size;
2993   IRTemp arg_m;
2994   IRTemp res;
2995   IROp op, op2;
2996
2997   if ((imm4 == 0) || (imm4 == 8))
2998      return False;
2999   if ((Q == 1) && ((dreg & 1) == 1))
3000      return False;
3001   if (Q)
3002      dreg >>= 1;
3003   arg_m = newTemp(Ity_I64);
3004   assign(arg_m, getDRegI64(mreg));
3005   if (Q)
3006      res = newTemp(Ity_V128);
3007   else
3008      res = newTemp(Ity_I64);
3009   if ((imm4 & 1) == 1) {
3010      op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
3011      op2 = Iop_GetElem8x8;
3012      index = imm4 >> 1;
3013      size = 8;
3014   } else if ((imm4 & 3) == 2) {
3015      op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
3016      op2 = Iop_GetElem16x4;
3017      index = imm4 >> 2;
3018      size = 16;
3019   } else if ((imm4 & 7) == 4) {
3020      op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
3021      op2 = Iop_GetElem32x2;
3022      index = imm4 >> 3;
3023      size = 32;
3024   } else {
3025      return False; // can this ever happen?
3026   }
3027   assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
3028   if (Q) {
3029      putQReg(dreg, mkexpr(res), condT);
3030   } else {
3031      putDRegI64(dreg, mkexpr(res), condT);
3032   }
3033   DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
3034   return True;
3035}
3036
3037/* A7.4.1 Three registers of the same length */
3038static
3039Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
3040{
3041   UInt Q = (theInstr >> 6) & 1;
3042   UInt dreg = get_neon_d_regno(theInstr);
3043   UInt nreg = get_neon_n_regno(theInstr);
3044   UInt mreg = get_neon_m_regno(theInstr);
3045   UInt A = (theInstr >> 8) & 0xF;
3046   UInt B = (theInstr >> 4) & 1;
3047   UInt C = (theInstr >> 20) & 0x3;
3048   UInt U = (theInstr >> 24) & 1;
3049   UInt size = C;
3050
3051   IRTemp arg_n;
3052   IRTemp arg_m;
3053   IRTemp res;
3054
3055   if (Q) {
3056      arg_n = newTemp(Ity_V128);
3057      arg_m = newTemp(Ity_V128);
3058      res = newTemp(Ity_V128);
3059      assign(arg_n, getQReg(nreg));
3060      assign(arg_m, getQReg(mreg));
3061   } else {
3062      arg_n = newTemp(Ity_I64);
3063      arg_m = newTemp(Ity_I64);
3064      res = newTemp(Ity_I64);
3065      assign(arg_n, getDRegI64(nreg));
3066      assign(arg_m, getDRegI64(mreg));
3067   }
3068
3069   switch(A) {
3070      case 0:
3071         if (B == 0) {
3072            /* VHADD */
3073            ULong imm = 0;
3074            IRExpr *imm_val;
3075            IROp addOp;
3076            IROp andOp;
3077            IROp shOp;
3078            HChar regType = Q ? 'q' : 'd';
3079
3080            if (size == 3)
3081               return False;
3082            switch(size) {
3083               case 0: imm = 0x101010101010101LL; break;
3084               case 1: imm = 0x1000100010001LL; break;
3085               case 2: imm = 0x100000001LL; break;
3086               default: vassert(0);
3087            }
3088            if (Q) {
3089               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3090               andOp = Iop_AndV128;
3091            } else {
3092               imm_val = mkU64(imm);
3093               andOp = Iop_And64;
3094            }
3095            if (U) {
3096               switch(size) {
3097                  case 0:
3098                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3099                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3100                     break;
3101                  case 1:
3102                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3103                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3104                     break;
3105                  case 2:
3106                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3107                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3108                     break;
3109                  default:
3110                     vassert(0);
3111               }
3112            } else {
3113               switch(size) {
3114                  case 0:
3115                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3116                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3117                     break;
3118                  case 1:
3119                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3120                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3121                     break;
3122                  case 2:
3123                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3124                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3125                     break;
3126                  default:
3127                     vassert(0);
3128               }
3129            }
3130            assign(res,
3131                   binop(addOp,
3132                         binop(addOp,
3133                               binop(shOp, mkexpr(arg_m), mkU8(1)),
3134                               binop(shOp, mkexpr(arg_n), mkU8(1))),
3135                         binop(shOp,
3136                               binop(addOp,
3137                                     binop(andOp, mkexpr(arg_m), imm_val),
3138                                     binop(andOp, mkexpr(arg_n), imm_val)),
3139                               mkU8(1))));
3140            DIP("vhadd.%c%d %c%d, %c%d, %c%d\n",
3141                U ? 'u' : 's', 8 << size, regType,
3142                dreg, regType, nreg, regType, mreg);
3143         } else {
3144            /* VQADD */
3145            IROp op, op2;
3146            IRTemp tmp;
3147            HChar reg_t = Q ? 'q' : 'd';
3148            if (Q) {
3149               switch (size) {
3150                  case 0:
3151                     op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
3152                     op2 = Iop_Add8x16;
3153                     break;
3154                  case 1:
3155                     op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
3156                     op2 = Iop_Add16x8;
3157                     break;
3158                  case 2:
3159                     op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
3160                     op2 = Iop_Add32x4;
3161                     break;
3162                  case 3:
3163                     op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
3164                     op2 = Iop_Add64x2;
3165                     break;
3166                  default:
3167                     vassert(0);
3168               }
3169            } else {
3170               switch (size) {
3171                  case 0:
3172                     op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
3173                     op2 = Iop_Add8x8;
3174                     break;
3175                  case 1:
3176                     op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
3177                     op2 = Iop_Add16x4;
3178                     break;
3179                  case 2:
3180                     op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
3181                     op2 = Iop_Add32x2;
3182                     break;
3183                  case 3:
3184                     op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
3185                     op2 = Iop_Add64;
3186                     break;
3187                  default:
3188                     vassert(0);
3189               }
3190            }
3191            if (Q) {
3192               tmp = newTemp(Ity_V128);
3193            } else {
3194               tmp = newTemp(Ity_I64);
3195            }
3196            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3197            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3198            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3199            DIP("vqadd.%c%d %c%d, %c%d, %c%d\n",
3200                U ? 'u' : 's',
3201                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3202         }
3203         break;
3204      case 1:
3205         if (B == 0) {
3206            /* VRHADD */
3207            /* VRHADD C, A, B ::=
3208                 C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
3209            IROp shift_op, add_op;
3210            IRTemp cc;
3211            ULong one = 1;
3212            HChar reg_t = Q ? 'q' : 'd';
3213            switch (size) {
3214               case 0: one = (one <<  8) | one; /* fall through */
3215               case 1: one = (one << 16) | one; /* fall through */
3216               case 2: one = (one << 32) | one; break;
3217               case 3: return False;
3218               default: vassert(0);
3219            }
3220            if (Q) {
3221               switch (size) {
3222                  case 0:
3223                     shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
3224                     add_op = Iop_Add8x16;
3225                     break;
3226                  case 1:
3227                     shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
3228                     add_op = Iop_Add16x8;
3229                     break;
3230                  case 2:
3231                     shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
3232                     add_op = Iop_Add32x4;
3233                     break;
3234                  case 3:
3235                     return False;
3236                  default:
3237                     vassert(0);
3238               }
3239            } else {
3240               switch (size) {
3241                  case 0:
3242                     shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
3243                     add_op = Iop_Add8x8;
3244                     break;
3245                  case 1:
3246                     shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
3247                     add_op = Iop_Add16x4;
3248                     break;
3249                  case 2:
3250                     shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
3251                     add_op = Iop_Add32x2;
3252                     break;
3253                  case 3:
3254                     return False;
3255                  default:
3256                     vassert(0);
3257               }
3258            }
3259            if (Q) {
3260               cc = newTemp(Ity_V128);
3261               assign(cc, binop(shift_op,
3262                                binop(add_op,
3263                                      binop(add_op,
3264                                            binop(Iop_AndV128,
3265                                                  mkexpr(arg_n),
3266                                                  binop(Iop_64HLtoV128,
3267                                                        mkU64(one),
3268                                                        mkU64(one))),
3269                                            binop(Iop_AndV128,
3270                                                  mkexpr(arg_m),
3271                                                  binop(Iop_64HLtoV128,
3272                                                        mkU64(one),
3273                                                        mkU64(one)))),
3274                                      binop(Iop_64HLtoV128,
3275                                            mkU64(one),
3276                                            mkU64(one))),
3277                                mkU8(1)));
3278               assign(res, binop(add_op,
3279                                 binop(add_op,
3280                                       binop(shift_op,
3281                                             mkexpr(arg_n),
3282                                             mkU8(1)),
3283                                       binop(shift_op,
3284                                             mkexpr(arg_m),
3285                                             mkU8(1))),
3286                                 mkexpr(cc)));
3287            } else {
3288               cc = newTemp(Ity_I64);
3289               assign(cc, binop(shift_op,
3290                                binop(add_op,
3291                                      binop(add_op,
3292                                            binop(Iop_And64,
3293                                                  mkexpr(arg_n),
3294                                                  mkU64(one)),
3295                                            binop(Iop_And64,
3296                                                  mkexpr(arg_m),
3297                                                  mkU64(one))),
3298                                      mkU64(one)),
3299                                mkU8(1)));
3300               assign(res, binop(add_op,
3301                                 binop(add_op,
3302                                       binop(shift_op,
3303                                             mkexpr(arg_n),
3304                                             mkU8(1)),
3305                                       binop(shift_op,
3306                                             mkexpr(arg_m),
3307                                             mkU8(1))),
3308                                 mkexpr(cc)));
3309            }
3310            DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n",
3311                U ? 'u' : 's',
3312                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3313         } else {
3314            if (U == 0)  {
3315               switch(C) {
3316                  case 0: {
3317                     /* VAND  */
3318                     HChar reg_t = Q ? 'q' : 'd';
3319                     if (Q) {
3320                        assign(res, binop(Iop_AndV128, mkexpr(arg_n),
3321                                                       mkexpr(arg_m)));
3322                     } else {
3323                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3324                                                     mkexpr(arg_m)));
3325                     }
3326                     DIP("vand %c%d, %c%d, %c%d\n",
3327                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3328                     break;
3329                  }
3330                  case 1: {
3331                     /* VBIC  */
3332                     HChar reg_t = Q ? 'q' : 'd';
3333                     if (Q) {
3334                        assign(res, binop(Iop_AndV128,mkexpr(arg_n),
3335                               unop(Iop_NotV128, mkexpr(arg_m))));
3336                     } else {
3337                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3338                               unop(Iop_Not64, mkexpr(arg_m))));
3339                     }
3340                     DIP("vbic %c%d, %c%d, %c%d\n",
3341                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3342                     break;
3343                  }
3344                  case 2:
3345                     if ( nreg != mreg) {
3346                        /* VORR  */
3347                        HChar reg_t = Q ? 'q' : 'd';
3348                        if (Q) {
3349                           assign(res, binop(Iop_OrV128, mkexpr(arg_n),
3350                                                         mkexpr(arg_m)));
3351                        } else {
3352                           assign(res, binop(Iop_Or64, mkexpr(arg_n),
3353                                                       mkexpr(arg_m)));
3354                        }
3355                        DIP("vorr %c%d, %c%d, %c%d\n",
3356                            reg_t, dreg, reg_t, nreg, reg_t, mreg);
3357                     } else {
3358                        /* VMOV  */
3359                        HChar reg_t = Q ? 'q' : 'd';
3360                        assign(res, mkexpr(arg_m));
3361                        DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg);
3362                     }
3363                     break;
3364                  case 3:{
3365                     /* VORN  */
3366                     HChar reg_t = Q ? 'q' : 'd';
3367                     if (Q) {
3368                        assign(res, binop(Iop_OrV128,mkexpr(arg_n),
3369                               unop(Iop_NotV128, mkexpr(arg_m))));
3370                     } else {
3371                        assign(res, binop(Iop_Or64, mkexpr(arg_n),
3372                               unop(Iop_Not64, mkexpr(arg_m))));
3373                     }
3374                     DIP("vorn %c%d, %c%d, %c%d\n",
3375                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3376                     break;
3377                  }
3378               }
3379            } else {
3380               switch(C) {
3381                  case 0:
3382                     /* VEOR (XOR)  */
3383                     if (Q) {
3384                        assign(res, binop(Iop_XorV128, mkexpr(arg_n),
3385                                                       mkexpr(arg_m)));
3386                     } else {
3387                        assign(res, binop(Iop_Xor64, mkexpr(arg_n),
3388                                                     mkexpr(arg_m)));
3389                     }
3390                     DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
3391                           Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3392                     break;
3393                  case 1:
3394                     /* VBSL  */
3395                     if (Q) {
3396                        IRTemp reg_d = newTemp(Ity_V128);
3397                        assign(reg_d, getQReg(dreg));
3398                        assign(res,
3399                               binop(Iop_OrV128,
3400                                     binop(Iop_AndV128, mkexpr(arg_n),
3401                                                        mkexpr(reg_d)),
3402                                     binop(Iop_AndV128,
3403                                           mkexpr(arg_m),
3404                                           unop(Iop_NotV128,
3405                                                 mkexpr(reg_d)) ) ) );
3406                     } else {
3407                        IRTemp reg_d = newTemp(Ity_I64);
3408                        assign(reg_d, getDRegI64(dreg));
3409                        assign(res,
3410                               binop(Iop_Or64,
3411                                     binop(Iop_And64, mkexpr(arg_n),
3412                                                      mkexpr(reg_d)),
3413                                     binop(Iop_And64,
3414                                           mkexpr(arg_m),
3415                                           unop(Iop_Not64, mkexpr(reg_d)))));
3416                     }
3417                     DIP("vbsl %c%u, %c%u, %c%u\n",
3418                         Q ? 'q' : 'd', dreg,
3419                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3420                     break;
3421                  case 2:
3422                     /* VBIT  */
3423                     if (Q) {
3424                        IRTemp reg_d = newTemp(Ity_V128);
3425                        assign(reg_d, getQReg(dreg));
3426                        assign(res,
3427                               binop(Iop_OrV128,
3428                                     binop(Iop_AndV128, mkexpr(arg_n),
3429                                                        mkexpr(arg_m)),
3430                                     binop(Iop_AndV128,
3431                                           mkexpr(reg_d),
3432                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3433                     } else {
3434                        IRTemp reg_d = newTemp(Ity_I64);
3435                        assign(reg_d, getDRegI64(dreg));
3436                        assign(res,
3437                               binop(Iop_Or64,
3438                                     binop(Iop_And64, mkexpr(arg_n),
3439                                                      mkexpr(arg_m)),
3440                                     binop(Iop_And64,
3441                                           mkexpr(reg_d),
3442                                           unop(Iop_Not64, mkexpr(arg_m)))));
3443                     }
3444                     DIP("vbit %c%u, %c%u, %c%u\n",
3445                         Q ? 'q' : 'd', dreg,
3446                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3447                     break;
3448                  case 3:
3449                     /* VBIF  */
3450                     if (Q) {
3451                        IRTemp reg_d = newTemp(Ity_V128);
3452                        assign(reg_d, getQReg(dreg));
3453                        assign(res,
3454                               binop(Iop_OrV128,
3455                                     binop(Iop_AndV128, mkexpr(reg_d),
3456                                                        mkexpr(arg_m)),
3457                                     binop(Iop_AndV128,
3458                                           mkexpr(arg_n),
3459                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3460                     } else {
3461                        IRTemp reg_d = newTemp(Ity_I64);
3462                        assign(reg_d, getDRegI64(dreg));
3463                        assign(res,
3464                               binop(Iop_Or64,
3465                                     binop(Iop_And64, mkexpr(reg_d),
3466                                                      mkexpr(arg_m)),
3467                                     binop(Iop_And64,
3468                                           mkexpr(arg_n),
3469                                           unop(Iop_Not64, mkexpr(arg_m)))));
3470                     }
3471                     DIP("vbif %c%u, %c%u, %c%u\n",
3472                         Q ? 'q' : 'd', dreg,
3473                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3474                     break;
3475               }
3476            }
3477         }
3478         break;
3479      case 2:
3480         if (B == 0) {
3481            /* VHSUB */
3482            /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
3483            ULong imm = 0;
3484            IRExpr *imm_val;
3485            IROp subOp;
3486            IROp notOp;
3487            IROp andOp;
3488            IROp shOp;
3489            if (size == 3)
3490               return False;
3491            switch(size) {
3492               case 0: imm = 0x101010101010101LL; break;
3493               case 1: imm = 0x1000100010001LL; break;
3494               case 2: imm = 0x100000001LL; break;
3495               default: vassert(0);
3496            }
3497            if (Q) {
3498               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3499               andOp = Iop_AndV128;
3500               notOp = Iop_NotV128;
3501            } else {
3502               imm_val = mkU64(imm);
3503               andOp = Iop_And64;
3504               notOp = Iop_Not64;
3505            }
3506            if (U) {
3507               switch(size) {
3508                  case 0:
3509                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3510                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3511                     break;
3512                  case 1:
3513                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3514                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3515                     break;
3516                  case 2:
3517                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3518                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3519                     break;
3520                  default:
3521                     vassert(0);
3522               }
3523            } else {
3524               switch(size) {
3525                  case 0:
3526                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3527                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3528                     break;
3529                  case 1:
3530                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3531                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3532                     break;
3533                  case 2:
3534                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3535                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3536                     break;
3537                  default:
3538                     vassert(0);
3539               }
3540            }
3541            assign(res,
3542                   binop(subOp,
3543                         binop(subOp,
3544                               binop(shOp, mkexpr(arg_n), mkU8(1)),
3545                               binop(shOp, mkexpr(arg_m), mkU8(1))),
3546                         binop(andOp,
3547                               binop(andOp,
3548                                     unop(notOp, mkexpr(arg_n)),
3549                                     mkexpr(arg_m)),
3550                               imm_val)));
3551            DIP("vhsub.%c%u %c%u, %c%u, %c%u\n",
3552                U ? 'u' : 's', 8 << size,
3553                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3554                mreg);
3555         } else {
3556            /* VQSUB */
3557            IROp op, op2;
3558            IRTemp tmp;
3559            if (Q) {
3560               switch (size) {
3561                  case 0:
3562                     op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
3563                     op2 = Iop_Sub8x16;
3564                     break;
3565                  case 1:
3566                     op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
3567                     op2 = Iop_Sub16x8;
3568                     break;
3569                  case 2:
3570                     op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
3571                     op2 = Iop_Sub32x4;
3572                     break;
3573                  case 3:
3574                     op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
3575                     op2 = Iop_Sub64x2;
3576                     break;
3577                  default:
3578                     vassert(0);
3579               }
3580            } else {
3581               switch (size) {
3582                  case 0:
3583                     op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
3584                     op2 = Iop_Sub8x8;
3585                     break;
3586                  case 1:
3587                     op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
3588                     op2 = Iop_Sub16x4;
3589                     break;
3590                  case 2:
3591                     op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
3592                     op2 = Iop_Sub32x2;
3593                     break;
3594                  case 3:
3595                     op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
3596                     op2 = Iop_Sub64;
3597                     break;
3598                  default:
3599                     vassert(0);
3600               }
3601            }
3602            if (Q)
3603               tmp = newTemp(Ity_V128);
3604            else
3605               tmp = newTemp(Ity_I64);
3606            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3607            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3608            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3609            DIP("vqsub.%c%u %c%u, %c%u, %c%u\n",
3610                U ? 'u' : 's', 8 << size,
3611                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3612                mreg);
3613         }
3614         break;
3615      case 3: {
3616            IROp op;
3617            if (Q) {
3618               switch (size) {
3619                  case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
3620                  case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
3621                  case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
3622                  case 3: return False;
3623                  default: vassert(0);
3624               }
3625            } else {
3626               switch (size) {
3627                  case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
3628                  case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
3629                  case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
3630                  case 3: return False;
3631                  default: vassert(0);
3632               }
3633            }
3634            if (B == 0) {
3635               /* VCGT  */
3636               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3637               DIP("vcgt.%c%u %c%u, %c%u, %c%u\n",
3638                   U ? 'u' : 's', 8 << size,
3639                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3640                   mreg);
3641            } else {
3642               /* VCGE  */
3643               /* VCGE res, argn, argm
3644                    is equal to
3645                  VCGT tmp, argm, argn
3646                  VNOT res, tmp */
3647               assign(res,
3648                      unop(Q ? Iop_NotV128 : Iop_Not64,
3649                           binop(op, mkexpr(arg_m), mkexpr(arg_n))));
3650               DIP("vcge.%c%u %c%u, %c%u, %c%u\n",
3651                   U ? 'u' : 's', 8 << size,
3652                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3653                   mreg);
3654            }
3655         }
3656         break;
3657      case 4:
3658         if (B == 0) {
3659            /* VSHL */
3660            IROp op, sub_op;
3661            IRTemp tmp;
3662            if (U) {
3663               switch (size) {
3664                  case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
3665                  case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
3666                  case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
3667                  case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
3668                  default: vassert(0);
3669               }
3670            } else {
3671               tmp = newTemp(Q ? Ity_V128 : Ity_I64);
3672               switch (size) {
3673                  case 0:
3674                     op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3675                     sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3676                     break;
3677                  case 1:
3678                     op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3679                     sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3680                     break;
3681                  case 2:
3682                     op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3683                     sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3684                     break;
3685                  case 3:
3686                     op = Q ? Iop_Sar64x2 : Iop_Sar64;
3687                     sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
3688                     break;
3689                  default:
3690                     vassert(0);
3691               }
3692            }
3693            if (U) {
3694               if (!Q && (size == 3))
3695                  assign(res, binop(op, mkexpr(arg_m),
3696                                        unop(Iop_64to8, mkexpr(arg_n))));
3697               else
3698                  assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3699            } else {
3700               if (Q)
3701                  assign(tmp, binop(sub_op,
3702                                    binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
3703                                    mkexpr(arg_n)));
3704               else
3705                  assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
3706               if (!Q && (size == 3))
3707                  assign(res, binop(op, mkexpr(arg_m),
3708                                        unop(Iop_64to8, mkexpr(tmp))));
3709               else
3710                  assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
3711            }
3712            DIP("vshl.%c%u %c%u, %c%u, %c%u\n",
3713                U ? 'u' : 's', 8 << size,
3714                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3715                nreg);
3716         } else {
3717            /* VQSHL */
3718            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
3719            IRTemp tmp, shval, mask, old_shval;
3720            UInt i;
3721            ULong esize;
3722            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3723            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3724            if (U) {
3725               switch (size) {
3726                  case 0:
3727                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
3728                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
3729                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3730                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3731                     break;
3732                  case 1:
3733                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
3734                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
3735                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3736                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3737                     break;
3738                  case 2:
3739                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
3740                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
3741                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3742                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3743                     break;
3744                  case 3:
3745                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
3746                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
3747                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3748                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3749                     break;
3750                  default:
3751                     vassert(0);
3752               }
3753            } else {
3754               switch (size) {
3755                  case 0:
3756                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
3757                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3758                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3759                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3760                     break;
3761                  case 1:
3762                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
3763                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3764                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3765                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3766                     break;
3767                  case 2:
3768                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
3769                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3770                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3771                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3772                     break;
3773                  case 3:
3774                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
3775                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
3776                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3777                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3778                     break;
3779                  default:
3780                     vassert(0);
3781               }
3782            }
3783            if (Q) {
3784               tmp = newTemp(Ity_V128);
3785               shval = newTemp(Ity_V128);
3786               mask = newTemp(Ity_V128);
3787            } else {
3788               tmp = newTemp(Ity_I64);
3789               shval = newTemp(Ity_I64);
3790               mask = newTemp(Ity_I64);
3791            }
3792            assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3793            /* Only least significant byte from second argument is used.
3794               Copy this byte to the whole vector element. */
3795            assign(shval, binop(op_shrn,
3796                                binop(op_shln,
3797                                       mkexpr(arg_n),
3798                                       mkU8((8 << size) - 8)),
3799                                mkU8((8 << size) - 8)));
3800            for(i = 0; i < size; i++) {
3801               old_shval = shval;
3802               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3803               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3804                                   mkexpr(old_shval),
3805                                   binop(op_shln,
3806                                         mkexpr(old_shval),
3807                                         mkU8(8 << i))));
3808            }
3809            /* If shift is greater or equal to the element size and
3810               element is non-zero, then QC flag should be set. */
3811            esize = (8 << size) - 1;
3812            esize = (esize <<  8) | esize;
3813            esize = (esize << 16) | esize;
3814            esize = (esize << 32) | esize;
3815            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3816                             binop(cmp_gt, mkexpr(shval),
3817                                           Q ? mkU128(esize) : mkU64(esize)),
3818                             unop(cmp_neq, mkexpr(arg_m))),
3819                       Q ? mkU128(0) : mkU64(0),
3820                       Q, condT);
3821            /* Othervise QC flag should be set if shift value is positive and
3822               result beign rightshifted the same value is not equal to left
3823               argument. */
3824            assign(mask, binop(cmp_gt, mkexpr(shval),
3825                                       Q ? mkU128(0) : mkU64(0)));
3826            if (!Q && size == 3)
3827               assign(tmp, binop(op_rev, mkexpr(res),
3828                                         unop(Iop_64to8, mkexpr(arg_n))));
3829            else
3830               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
3831            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3832                             mkexpr(tmp), mkexpr(mask)),
3833                       binop(Q ? Iop_AndV128 : Iop_And64,
3834                             mkexpr(arg_m), mkexpr(mask)),
3835                       Q, condT);
3836            DIP("vqshl.%c%u %c%u, %c%u, %c%u\n",
3837                U ? 'u' : 's', 8 << size,
3838                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3839                nreg);
3840         }
3841         break;
3842      case 5:
3843         if (B == 0) {
3844            /* VRSHL */
3845            IROp op, op_shrn, op_shln, cmp_gt, op_add;
3846            IRTemp shval, old_shval, imm_val, round;
3847            UInt i;
3848            ULong imm;
3849            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3850            imm = 1L;
3851            switch (size) {
3852               case 0: imm = (imm <<  8) | imm; /* fall through */
3853               case 1: imm = (imm << 16) | imm; /* fall through */
3854               case 2: imm = (imm << 32) | imm; /* fall through */
3855               case 3: break;
3856               default: vassert(0);
3857            }
3858            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3859            round = newTemp(Q ? Ity_V128 : Ity_I64);
3860            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3861            if (U) {
3862               switch (size) {
3863                  case 0:
3864                     op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
3865                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3866                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3867                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3868                     break;
3869                  case 1:
3870                     op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
3871                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3872                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3873                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3874                     break;
3875                  case 2:
3876                     op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
3877                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3878                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3879                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3880                     break;
3881                  case 3:
3882                     op = Q ? Iop_Shl64x2 : Iop_Shl64;
3883                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3884                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3885                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3886                     break;
3887                  default:
3888                     vassert(0);
3889               }
3890            } else {
3891               switch (size) {
3892                  case 0:
3893                     op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
3894                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3895                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3896                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3897                     break;
3898                  case 1:
3899                     op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
3900                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3901                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3902                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3903                     break;
3904                  case 2:
3905                     op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
3906                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3907                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3908                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3909                     break;
3910                  case 3:
3911                     op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
3912                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3913                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3914                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3915                     break;
3916                  default:
3917                     vassert(0);
3918               }
3919            }
3920            if (Q) {
3921               shval = newTemp(Ity_V128);
3922            } else {
3923               shval = newTemp(Ity_I64);
3924            }
3925            /* Only least significant byte from second argument is used.
3926               Copy this byte to the whole vector element. */
3927            assign(shval, binop(op_shrn,
3928                                binop(op_shln,
3929                                       mkexpr(arg_n),
3930                                       mkU8((8 << size) - 8)),
3931                                mkU8((8 << size) - 8)));
3932            for (i = 0; i < size; i++) {
3933               old_shval = shval;
3934               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3935               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3936                                   mkexpr(old_shval),
3937                                   binop(op_shln,
3938                                         mkexpr(old_shval),
3939                                         mkU8(8 << i))));
3940            }
3941            /* Compute the result */
3942            if (!Q && size == 3 && U) {
3943               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3944                                   binop(op,
3945                                         mkexpr(arg_m),
3946                                         unop(Iop_64to8,
3947                                              binop(op_add,
3948                                                    mkexpr(arg_n),
3949                                                    mkexpr(imm_val)))),
3950                                   binop(Q ? Iop_AndV128 : Iop_And64,
3951                                         mkexpr(imm_val),
3952                                         binop(cmp_gt,
3953                                               Q ? mkU128(0) : mkU64(0),
3954                                               mkexpr(arg_n)))));
3955               assign(res, binop(op_add,
3956                                 binop(op,
3957                                       mkexpr(arg_m),
3958                                       unop(Iop_64to8, mkexpr(arg_n))),
3959                                 mkexpr(round)));
3960            } else {
3961               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3962                                   binop(op,
3963                                         mkexpr(arg_m),
3964                                         binop(op_add,
3965                                               mkexpr(arg_n),
3966                                               mkexpr(imm_val))),
3967                                   binop(Q ? Iop_AndV128 : Iop_And64,
3968                                         mkexpr(imm_val),
3969                                         binop(cmp_gt,
3970                                               Q ? mkU128(0) : mkU64(0),
3971                                               mkexpr(arg_n)))));
3972               assign(res, binop(op_add,
3973                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
3974                                 mkexpr(round)));
3975            }
3976            DIP("vrshl.%c%u %c%u, %c%u, %c%u\n",
3977                U ? 'u' : 's', 8 << size,
3978                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3979                nreg);
3980         } else {
3981            /* VQRSHL */
3982            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
3983            IRTemp tmp, shval, mask, old_shval, imm_val, round;
3984            UInt i;
3985            ULong esize, imm;
3986            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3987            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3988            imm = 1L;
3989            switch (size) {
3990               case 0: imm = (imm <<  8) | imm; /* fall through */
3991               case 1: imm = (imm << 16) | imm; /* fall through */
3992               case 2: imm = (imm << 32) | imm; /* fall through */
3993               case 3: break;
3994               default: vassert(0);
3995            }
3996            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3997            round = newTemp(Q ? Ity_V128 : Ity_I64);
3998            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3999            if (U) {
4000               switch (size) {
4001                  case 0:
4002                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
4003                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4004                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
4005                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4006                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4007                     break;
4008                  case 1:
4009                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
4010                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4011                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
4012                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4013                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4014                     break;
4015                  case 2:
4016                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
4017                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4018                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
4019                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4020                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4021                     break;
4022                  case 3:
4023                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
4024                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
4025                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
4026                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4027                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4028                     break;
4029                  default:
4030                     vassert(0);
4031               }
4032            } else {
4033               switch (size) {
4034                  case 0:
4035                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
4036                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4037                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
4038                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4039                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4040                     break;
4041                  case 1:
4042                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
4043                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4044                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
4045                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4046                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4047                     break;
4048                  case 2:
4049                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
4050                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4051                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
4052                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4053                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4054                     break;
4055                  case 3:
4056                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
4057                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
4058                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
4059                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4060                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4061                     break;
4062                  default:
4063                     vassert(0);
4064               }
4065            }
4066            if (Q) {
4067               tmp = newTemp(Ity_V128);
4068               shval = newTemp(Ity_V128);
4069               mask = newTemp(Ity_V128);
4070            } else {
4071               tmp = newTemp(Ity_I64);
4072               shval = newTemp(Ity_I64);
4073               mask = newTemp(Ity_I64);
4074            }
4075            /* Only least significant byte from second argument is used.
4076               Copy this byte to the whole vector element. */
4077            assign(shval, binop(op_shrn,
4078                                binop(op_shln,
4079                                       mkexpr(arg_n),
4080                                       mkU8((8 << size) - 8)),
4081                                mkU8((8 << size) - 8)));
4082            for (i = 0; i < size; i++) {
4083               old_shval = shval;
4084               shval = newTemp(Q ? Ity_V128 : Ity_I64);
4085               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
4086                                   mkexpr(old_shval),
4087                                   binop(op_shln,
4088                                         mkexpr(old_shval),
4089                                         mkU8(8 << i))));
4090            }
4091            /* Compute the result */
4092            assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4093                                binop(op,
4094                                      mkexpr(arg_m),
4095                                      binop(op_add,
4096                                            mkexpr(arg_n),
4097                                            mkexpr(imm_val))),
4098                                binop(Q ? Iop_AndV128 : Iop_And64,
4099                                      mkexpr(imm_val),
4100                                      binop(cmp_gt,
4101                                            Q ? mkU128(0) : mkU64(0),
4102                                            mkexpr(arg_n)))));
4103            assign(res, binop(op_add,
4104                              binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4105                              mkexpr(round)));
4106            /* If shift is greater or equal to the element size and element is
4107               non-zero, then QC flag should be set. */
4108            esize = (8 << size) - 1;
4109            esize = (esize <<  8) | esize;
4110            esize = (esize << 16) | esize;
4111            esize = (esize << 32) | esize;
4112            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4113                             binop(cmp_gt, mkexpr(shval),
4114                                           Q ? mkU128(esize) : mkU64(esize)),
4115                             unop(cmp_neq, mkexpr(arg_m))),
4116                       Q ? mkU128(0) : mkU64(0),
4117                       Q, condT);
4118            /* Othervise QC flag should be set if shift value is positive and
4119               result beign rightshifted the same value is not equal to left
4120               argument. */
4121            assign(mask, binop(cmp_gt, mkexpr(shval),
4122                               Q ? mkU128(0) : mkU64(0)));
4123            if (!Q && size == 3)
4124               assign(tmp, binop(op_rev, mkexpr(res),
4125                                         unop(Iop_64to8, mkexpr(arg_n))));
4126            else
4127               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
4128            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4129                             mkexpr(tmp), mkexpr(mask)),
4130                       binop(Q ? Iop_AndV128 : Iop_And64,
4131                             mkexpr(arg_m), mkexpr(mask)),
4132                       Q, condT);
4133            DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n",
4134                U ? 'u' : 's', 8 << size,
4135                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4136                nreg);
4137         }
4138         break;
4139      case 6:
4140         /* VMAX, VMIN  */
4141         if (B == 0) {
4142            /* VMAX */
4143            IROp op;
4144            if (U == 0) {
4145               switch (size) {
4146                  case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
4147                  case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
4148                  case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
4149                  case 3: return False;
4150                  default: vassert(0);
4151               }
4152            } else {
4153               switch (size) {
4154                  case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
4155                  case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
4156                  case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
4157                  case 3: return False;
4158                  default: vassert(0);
4159               }
4160            }
4161            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4162            DIP("vmax.%c%u %c%u, %c%u, %c%u\n",
4163                U ? 'u' : 's', 8 << size,
4164                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4165                mreg);
4166         } else {
4167            /* VMIN */
4168            IROp op;
4169            if (U == 0) {
4170               switch (size) {
4171                  case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
4172                  case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
4173                  case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
4174                  case 3: return False;
4175                  default: vassert(0);
4176               }
4177            } else {
4178               switch (size) {
4179                  case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
4180                  case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
4181                  case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
4182                  case 3: return False;
4183                  default: vassert(0);
4184               }
4185            }
4186            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4187            DIP("vmin.%c%u %c%u, %c%u, %c%u\n",
4188                U ? 'u' : 's', 8 << size,
4189                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4190                mreg);
4191         }
4192         break;
4193      case 7:
4194         if (B == 0) {
4195            /* VABD */
4196            IROp op_cmp, op_sub;
4197            IRTemp cond;
4198            if ((theInstr >> 23) & 1) {
4199               vpanic("VABDL should not be in dis_neon_data_3same\n");
4200            }
4201            if (Q) {
4202               switch (size) {
4203                  case 0:
4204                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4205                     op_sub = Iop_Sub8x16;
4206                     break;
4207                  case 1:
4208                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4209                     op_sub = Iop_Sub16x8;
4210                     break;
4211                  case 2:
4212                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4213                     op_sub = Iop_Sub32x4;
4214                     break;
4215                  case 3:
4216                     return False;
4217                  default:
4218                     vassert(0);
4219               }
4220            } else {
4221               switch (size) {
4222                  case 0:
4223                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4224                     op_sub = Iop_Sub8x8;
4225                     break;
4226                  case 1:
4227                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4228                     op_sub = Iop_Sub16x4;
4229                     break;
4230                  case 2:
4231                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4232                     op_sub = Iop_Sub32x2;
4233                     break;
4234                  case 3:
4235                     return False;
4236                  default:
4237                     vassert(0);
4238               }
4239            }
4240            if (Q) {
4241               cond = newTemp(Ity_V128);
4242            } else {
4243               cond = newTemp(Ity_I64);
4244            }
4245            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4246            assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
4247                              binop(Q ? Iop_AndV128 : Iop_And64,
4248                                    binop(op_sub, mkexpr(arg_n),
4249                                                  mkexpr(arg_m)),
4250                                    mkexpr(cond)),
4251                              binop(Q ? Iop_AndV128 : Iop_And64,
4252                                    binop(op_sub, mkexpr(arg_m),
4253                                                  mkexpr(arg_n)),
4254                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4255                                         mkexpr(cond)))));
4256            DIP("vabd.%c%u %c%u, %c%u, %c%u\n",
4257                U ? 'u' : 's', 8 << size,
4258                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4259                mreg);
4260         } else {
4261            /* VABA */
4262            IROp op_cmp, op_sub, op_add;
4263            IRTemp cond, acc, tmp;
4264            if ((theInstr >> 23) & 1) {
4265               vpanic("VABAL should not be in dis_neon_data_3same");
4266            }
4267            if (Q) {
4268               switch (size) {
4269                  case 0:
4270                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4271                     op_sub = Iop_Sub8x16;
4272                     op_add = Iop_Add8x16;
4273                     break;
4274                  case 1:
4275                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4276                     op_sub = Iop_Sub16x8;
4277                     op_add = Iop_Add16x8;
4278                     break;
4279                  case 2:
4280                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4281                     op_sub = Iop_Sub32x4;
4282                     op_add = Iop_Add32x4;
4283                     break;
4284                  case 3:
4285                     return False;
4286                  default:
4287                     vassert(0);
4288               }
4289            } else {
4290               switch (size) {
4291                  case 0:
4292                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4293                     op_sub = Iop_Sub8x8;
4294                     op_add = Iop_Add8x8;
4295                     break;
4296                  case 1:
4297                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4298                     op_sub = Iop_Sub16x4;
4299                     op_add = Iop_Add16x4;
4300                     break;
4301                  case 2:
4302                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4303                     op_sub = Iop_Sub32x2;
4304                     op_add = Iop_Add32x2;
4305                     break;
4306                  case 3:
4307                     return False;
4308                  default:
4309                     vassert(0);
4310               }
4311            }
4312            if (Q) {
4313               cond = newTemp(Ity_V128);
4314               acc = newTemp(Ity_V128);
4315               tmp = newTemp(Ity_V128);
4316               assign(acc, getQReg(dreg));
4317            } else {
4318               cond = newTemp(Ity_I64);
4319               acc = newTemp(Ity_I64);
4320               tmp = newTemp(Ity_I64);
4321               assign(acc, getDRegI64(dreg));
4322            }
4323            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4324            assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
4325                              binop(Q ? Iop_AndV128 : Iop_And64,
4326                                    binop(op_sub, mkexpr(arg_n),
4327                                                  mkexpr(arg_m)),
4328                                    mkexpr(cond)),
4329                              binop(Q ? Iop_AndV128 : Iop_And64,
4330                                    binop(op_sub, mkexpr(arg_m),
4331                                                  mkexpr(arg_n)),
4332                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4333                                         mkexpr(cond)))));
4334            assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
4335            DIP("vaba.%c%u %c%u, %c%u, %c%u\n",
4336                U ? 'u' : 's', 8 << size,
4337                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4338                mreg);
4339         }
4340         break;
4341      case 8:
4342         if (B == 0) {
4343            IROp op;
4344            if (U == 0) {
4345               /* VADD  */
4346               switch (size) {
4347                  case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
4348                  case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
4349                  case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
4350                  case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
4351                  default: vassert(0);
4352               }
4353               DIP("vadd.i%u %c%u, %c%u, %c%u\n",
4354                   8 << size, Q ? 'q' : 'd',
4355                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4356            } else {
4357               /* VSUB  */
4358               switch (size) {
4359                  case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
4360                  case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
4361                  case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
4362                  case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
4363                  default: vassert(0);
4364               }
4365               DIP("vsub.i%u %c%u, %c%u, %c%u\n",
4366                   8 << size, Q ? 'q' : 'd',
4367                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4368            }
4369            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4370         } else {
4371            IROp op;
4372            switch (size) {
4373               case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
4374               case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
4375               case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
4376               case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
4377               default: vassert(0);
4378            }
4379            if (U == 0) {
4380               /* VTST  */
4381               assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
4382                                          mkexpr(arg_n),
4383                                          mkexpr(arg_m))));
4384               DIP("vtst.%u %c%u, %c%u, %c%u\n",
4385                   8 << size, Q ? 'q' : 'd',
4386                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4387            } else {
4388               /* VCEQ  */
4389               assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
4390                                unop(op,
4391                                     binop(Q ? Iop_XorV128 : Iop_Xor64,
4392                                           mkexpr(arg_n),
4393                                           mkexpr(arg_m)))));
4394               DIP("vceq.i%u %c%u, %c%u, %c%u\n",
4395                   8 << size, Q ? 'q' : 'd',
4396                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4397            }
4398         }
4399         break;
4400      case 9:
4401         if (B == 0) {
4402            /* VMLA, VMLS (integer) */
4403            IROp op, op2;
4404            UInt P = (theInstr >> 24) & 1;
4405            if (P) {
4406               switch (size) {
4407                  case 0:
4408                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4409                     op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
4410                     break;
4411                  case 1:
4412                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4413                     op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
4414                     break;
4415                  case 2:
4416                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4417                     op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
4418                     break;
4419                  case 3:
4420                     return False;
4421                  default:
4422                     vassert(0);
4423               }
4424            } else {
4425               switch (size) {
4426                  case 0:
4427                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4428                     op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
4429                     break;
4430                  case 1:
4431                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4432                     op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
4433                     break;
4434                  case 2:
4435                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4436                     op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
4437                     break;
4438                  case 3:
4439                     return False;
4440                  default:
4441                     vassert(0);
4442               }
4443            }
4444            assign(res, binop(op2,
4445                              Q ? getQReg(dreg) : getDRegI64(dreg),
4446                              binop(op, mkexpr(arg_n), mkexpr(arg_m))));
4447            DIP("vml%c.i%u %c%u, %c%u, %c%u\n",
4448                P ? 's' : 'a', 8 << size,
4449                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4450                mreg);
4451         } else {
4452            /* VMUL */
4453            IROp op;
4454            UInt P = (theInstr >> 24) & 1;
4455            if (P) {
4456               switch (size) {
4457                  case 0:
4458                     op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
4459                     break;
4460                  case 1: case 2: case 3: return False;
4461                  default: vassert(0);
4462               }
4463            } else {
4464               switch (size) {
4465                  case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
4466                  case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
4467                  case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
4468                  case 3: return False;
4469                  default: vassert(0);
4470               }
4471            }
4472            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4473            DIP("vmul.%c%u %c%u, %c%u, %c%u\n",
4474                P ? 'p' : 'i', 8 << size,
4475                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4476                mreg);
4477         }
4478         break;
4479      case 10: {
4480         /* VPMAX, VPMIN  */
4481         UInt P = (theInstr >> 4) & 1;
4482         IROp op;
4483         if (Q)
4484            return False;
4485         if (P) {
4486            switch (size) {
4487               case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
4488               case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
4489               case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
4490               case 3: return False;
4491               default: vassert(0);
4492            }
4493         } else {
4494            switch (size) {
4495               case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
4496               case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
4497               case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
4498               case 3: return False;
4499               default: vassert(0);
4500            }
4501         }
4502         assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4503         DIP("vp%s.%c%u %c%u, %c%u, %c%u\n",
4504             P ? "min" : "max", U ? 'u' : 's',
4505             8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4506             Q ? 'q' : 'd', mreg);
4507         break;
4508      }
4509      case 11:
4510         if (B == 0) {
4511            if (U == 0) {
4512               /* VQDMULH  */
4513               IROp op ,op2;
4514               ULong imm;
4515               switch (size) {
4516                  case 0: case 3:
4517                     return False;
4518                  case 1:
4519                     op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
4520                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4521                     imm = 1LL << 15;
4522                     imm = (imm << 16) | imm;
4523                     imm = (imm << 32) | imm;
4524                     break;
4525                  case 2:
4526                     op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
4527                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4528                     imm = 1LL << 31;
4529                     imm = (imm << 32) | imm;
4530                     break;
4531                  default:
4532                     vassert(0);
4533               }
4534               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4535               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4536                                binop(op2, mkexpr(arg_n),
4537                                           Q ? mkU128(imm) : mkU64(imm)),
4538                                binop(op2, mkexpr(arg_m),
4539                                           Q ? mkU128(imm) : mkU64(imm))),
4540                          Q ? mkU128(0) : mkU64(0),
4541                          Q, condT);
4542               DIP("vqdmulh.s%u %c%u, %c%u, %c%u\n",
4543                   8 << size, Q ? 'q' : 'd',
4544                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4545            } else {
4546               /* VQRDMULH */
4547               IROp op ,op2;
4548               ULong imm;
4549               switch(size) {
4550                  case 0: case 3:
4551                     return False;
4552                  case 1:
4553                     imm = 1LL << 15;
4554                     imm = (imm << 16) | imm;
4555                     imm = (imm << 32) | imm;
4556                     op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
4557                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4558                     break;
4559                  case 2:
4560                     imm = 1LL << 31;
4561                     imm = (imm << 32) | imm;
4562                     op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
4563                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4564                     break;
4565                  default:
4566                     vassert(0);
4567               }
4568               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4569               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4570                                binop(op2, mkexpr(arg_n),
4571                                           Q ? mkU128(imm) : mkU64(imm)),
4572                                binop(op2, mkexpr(arg_m),
4573                                           Q ? mkU128(imm) : mkU64(imm))),
4574                          Q ? mkU128(0) : mkU64(0),
4575                          Q, condT);
4576               DIP("vqrdmulh.s%u %c%u, %c%u, %c%u\n",
4577                   8 << size, Q ? 'q' : 'd',
4578                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4579            }
4580         } else {
4581            if (U == 0) {
4582               /* VPADD */
4583               IROp op;
4584               if (Q)
4585                  return False;
4586               switch (size) {
4587                  case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
4588                  case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
4589                  case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
4590                  case 3: return False;
4591                  default: vassert(0);
4592               }
4593               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4594               DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
4595                   8 << size, Q ? 'q' : 'd',
4596                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4597            }
4598         }
4599         break;
4600      /* Starting from here these are FP SIMD cases */
4601      case 13:
4602         if (B == 0) {
4603            IROp op;
4604            if (U == 0) {
4605               if ((C >> 1) == 0) {
4606                  /* VADD  */
4607                  op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
4608                  DIP("vadd.f32 %c%u, %c%u, %c%u\n",
4609                      Q ? 'q' : 'd', dreg,
4610                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4611               } else {
4612                  /* VSUB  */
4613                  op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
4614                  DIP("vsub.f32 %c%u, %c%u, %c%u\n",
4615                      Q ? 'q' : 'd', dreg,
4616                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4617               }
4618            } else {
4619               if ((C >> 1) == 0) {
4620                  /* VPADD */
4621                  if (Q)
4622                     return False;
4623                  op = Iop_PwAdd32Fx2;
4624                  DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4625               } else {
4626                  /* VABD  */
4627                  if (Q) {
4628                     assign(res, unop(Iop_Abs32Fx4,
4629                                      triop(Iop_Sub32Fx4,
4630                                            get_FAKE_roundingmode(),
4631                                            mkexpr(arg_n),
4632                                            mkexpr(arg_m))));
4633                  } else {
4634                     assign(res, unop(Iop_Abs32Fx2,
4635                                      binop(Iop_Sub32Fx2,
4636                                            mkexpr(arg_n),
4637                                            mkexpr(arg_m))));
4638                  }
4639                  DIP("vabd.f32 %c%u, %c%u, %c%u\n",
4640                      Q ? 'q' : 'd', dreg,
4641                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4642                  break;
4643               }
4644            }
4645            assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4646         } else {
4647            if (U == 0) {
4648               /* VMLA, VMLS  */
4649               IROp op, op2;
4650               UInt P = (theInstr >> 21) & 1;
4651               if (P) {
4652                  switch (size & 1) {
4653                     case 0:
4654                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4655                        op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
4656                        break;
4657                     case 1: return False;
4658                     default: vassert(0);
4659                  }
4660               } else {
4661                  switch (size & 1) {
4662                     case 0:
4663                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4664                        op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
4665                        break;
4666                     case 1: return False;
4667                     default: vassert(0);
4668                  }
4669               }
4670               assign(res, binop_w_fake_RM(
4671                              op2,
4672                              Q ? getQReg(dreg) : getDRegI64(dreg),
4673                              binop_w_fake_RM(op, mkexpr(arg_n),
4674                                                  mkexpr(arg_m))));
4675
4676               DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
4677                   P ? 's' : 'a', Q ? 'q' : 'd',
4678                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4679            } else {
4680               /* VMUL  */
4681               IROp op;
4682               if ((C >> 1) != 0)
4683                  return False;
4684               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
4685               assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4686               DIP("vmul.f32 %c%u, %c%u, %c%u\n",
4687                   Q ? 'q' : 'd', dreg,
4688                   Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4689            }
4690         }
4691         break;
4692      case 14:
4693         if (B == 0) {
4694            if (U == 0) {
4695               if ((C >> 1) == 0) {
4696                  /* VCEQ  */
4697                  IROp op;
4698                  if ((theInstr >> 20) & 1)
4699                     return False;
4700                  op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
4701                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4702                  DIP("vceq.f32 %c%u, %c%u, %c%u\n",
4703                      Q ? 'q' : 'd', dreg,
4704                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4705               } else {
4706                  return False;
4707               }
4708            } else {
4709               if ((C >> 1) == 0) {
4710                  /* VCGE  */
4711                  IROp op;
4712                  if ((theInstr >> 20) & 1)
4713                     return False;
4714                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4715                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4716                  DIP("vcge.f32 %c%u, %c%u, %c%u\n",
4717                      Q ? 'q' : 'd', dreg,
4718                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4719               } else {
4720                  /* VCGT  */
4721                  IROp op;
4722                  if ((theInstr >> 20) & 1)
4723                     return False;
4724                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4725                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4726                  DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
4727                      Q ? 'q' : 'd', dreg,
4728                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4729               }
4730            }
4731         } else {
4732            if (U == 1) {
4733               /* VACGE, VACGT */
4734               UInt op_bit = (theInstr >> 21) & 1;
4735               IROp op, op2;
4736               op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
4737               if (op_bit) {
4738                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4739                  assign(res, binop(op,
4740                                    unop(op2, mkexpr(arg_n)),
4741                                    unop(op2, mkexpr(arg_m))));
4742               } else {
4743                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4744                  assign(res, binop(op,
4745                                    unop(op2, mkexpr(arg_n)),
4746                                    unop(op2, mkexpr(arg_m))));
4747               }
4748               DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
4749                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4750                   Q ? 'q' : 'd', mreg);
4751            }
4752         }
4753         break;
4754      case 15:
4755         if (B == 0) {
4756            if (U == 0) {
4757               /* VMAX, VMIN  */
4758               IROp op;
4759               if ((theInstr >> 20) & 1)
4760                  return False;
4761               if ((theInstr >> 21) & 1) {
4762                  op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
4763                  DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4764                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4765               } else {
4766                  op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
4767                  DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4768                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4769               }
4770               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4771            } else {
4772               /* VPMAX, VPMIN   */
4773               IROp op;
4774               if (Q)
4775                  return False;
4776               if ((theInstr >> 20) & 1)
4777                  return False;
4778               if ((theInstr >> 21) & 1) {
4779                  op = Iop_PwMin32Fx2;
4780                  DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4781               } else {
4782                  op = Iop_PwMax32Fx2;
4783                  DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4784               }
4785               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4786            }
4787         } else {
4788            if (U == 0) {
4789               if ((C >> 1) == 0) {
4790                  /* VRECPS */
4791                  if ((theInstr >> 20) & 1)
4792                     return False;
4793                  assign(res, binop(Q ? Iop_Recps32Fx4 : Iop_Recps32Fx2,
4794                                    mkexpr(arg_n),
4795                                    mkexpr(arg_m)));
4796                  DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4797                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4798               } else {
4799                  /* VRSQRTS  */
4800                  if ((theInstr >> 20) & 1)
4801                     return False;
4802                  assign(res, binop(Q ? Iop_Rsqrts32Fx4 : Iop_Rsqrts32Fx2,
4803                                    mkexpr(arg_n),
4804                                    mkexpr(arg_m)));
4805                  DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4806                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4807               }
4808            }
4809         }
4810         break;
4811   }
4812
4813   if (Q) {
4814      putQReg(dreg, mkexpr(res), condT);
4815   } else {
4816      putDRegI64(dreg, mkexpr(res), condT);
4817   }
4818
4819   return True;
4820}
4821
4822/* A7.4.2 Three registers of different length */
4823static
4824Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
4825{
4826   UInt A = (theInstr >> 8) & 0xf;
4827   UInt B = (theInstr >> 20) & 3;
4828   UInt U = (theInstr >> 24) & 1;
4829   UInt P = (theInstr >> 9) & 1;
4830   UInt mreg = get_neon_m_regno(theInstr);
4831   UInt nreg = get_neon_n_regno(theInstr);
4832   UInt dreg = get_neon_d_regno(theInstr);
4833   UInt size = B;
4834   ULong imm;
4835   IRTemp res, arg_m, arg_n, cond, tmp;
4836   IROp cvt, cvt2, cmp, op, op2, sh, add;
4837   switch (A) {
4838      case 0: case 1: case 2: case 3:
4839         /* VADDL, VADDW, VSUBL, VSUBW */
4840         if (dreg & 1)
4841            return False;
4842         dreg >>= 1;
4843         size = B;
4844         switch (size) {
4845            case 0:
4846               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4847               op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
4848               break;
4849            case 1:
4850               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4851               op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
4852               break;
4853            case 2:
4854               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4855               op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
4856               break;
4857            case 3:
4858               return False;
4859            default:
4860               vassert(0);
4861         }
4862         arg_n = newTemp(Ity_V128);
4863         arg_m = newTemp(Ity_V128);
4864         if (A & 1) {
4865            if (nreg & 1)
4866               return False;
4867            nreg >>= 1;
4868            assign(arg_n, getQReg(nreg));
4869         } else {
4870            assign(arg_n, unop(cvt, getDRegI64(nreg)));
4871         }
4872         assign(arg_m, unop(cvt, getDRegI64(mreg)));
4873         putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4874                       condT);
4875         DIP("v%s%c.%c%u q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
4876             (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
4877             (A & 1) ? 'q' : 'd', nreg, mreg);
4878         return True;
4879      case 4:
4880         /* VADDHN, VRADDHN */
4881         if (mreg & 1)
4882            return False;
4883         mreg >>= 1;
4884         if (nreg & 1)
4885            return False;
4886         nreg >>= 1;
4887         size = B;
4888         switch (size) {
4889            case 0:
4890               op = Iop_Add16x8;
4891               cvt = Iop_NarrowUn16to8x8;
4892               sh = Iop_ShrN16x8;
4893               imm = 1U << 7;
4894               imm = (imm << 16) | imm;
4895               imm = (imm << 32) | imm;
4896               break;
4897            case 1:
4898               op = Iop_Add32x4;
4899               cvt = Iop_NarrowUn32to16x4;
4900               sh = Iop_ShrN32x4;
4901               imm = 1U << 15;
4902               imm = (imm << 32) | imm;
4903               break;
4904            case 2:
4905               op = Iop_Add64x2;
4906               cvt = Iop_NarrowUn64to32x2;
4907               sh = Iop_ShrN64x2;
4908               imm = 1U << 31;
4909               break;
4910            case 3:
4911               return False;
4912            default:
4913               vassert(0);
4914         }
4915         tmp = newTemp(Ity_V128);
4916         res = newTemp(Ity_V128);
4917         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
4918         if (U) {
4919            /* VRADDHN */
4920            assign(res, binop(op, mkexpr(tmp),
4921                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
4922         } else {
4923            assign(res, mkexpr(tmp));
4924         }
4925         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
4926                    condT);
4927         DIP("v%saddhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
4928             nreg, mreg);
4929         return True;
4930      case 5:
4931         /* VABAL */
4932         if (!((theInstr >> 23) & 1)) {
4933            vpanic("VABA should not be in dis_neon_data_3diff\n");
4934         }
4935         if (dreg & 1)
4936            return False;
4937         dreg >>= 1;
4938         switch (size) {
4939            case 0:
4940               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4941               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4942               cvt2 = Iop_Widen8Sto16x8;
4943               op = Iop_Sub16x8;
4944               op2 = Iop_Add16x8;
4945               break;
4946            case 1:
4947               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4948               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4949               cvt2 = Iop_Widen16Sto32x4;
4950               op = Iop_Sub32x4;
4951               op2 = Iop_Add32x4;
4952               break;
4953            case 2:
4954               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4955               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4956               cvt2 = Iop_Widen32Sto64x2;
4957               op = Iop_Sub64x2;
4958               op2 = Iop_Add64x2;
4959               break;
4960            case 3:
4961               return False;
4962            default:
4963               vassert(0);
4964         }
4965         arg_n = newTemp(Ity_V128);
4966         arg_m = newTemp(Ity_V128);
4967         cond = newTemp(Ity_V128);
4968         res = newTemp(Ity_V128);
4969         assign(arg_n, unop(cvt, getDRegI64(nreg)));
4970         assign(arg_m, unop(cvt, getDRegI64(mreg)));
4971         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
4972                                            getDRegI64(mreg))));
4973         assign(res, binop(op2,
4974                           binop(Iop_OrV128,
4975                                 binop(Iop_AndV128,
4976                                       binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4977                                       mkexpr(cond)),
4978                                 binop(Iop_AndV128,
4979                                       binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4980                                       unop(Iop_NotV128, mkexpr(cond)))),
4981                           getQReg(dreg)));
4982         putQReg(dreg, mkexpr(res), condT);
4983         DIP("vabal.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
4984             nreg, mreg);
4985         return True;
4986      case 6:
4987         /* VSUBHN, VRSUBHN */
4988         if (mreg & 1)
4989            return False;
4990         mreg >>= 1;
4991         if (nreg & 1)
4992            return False;
4993         nreg >>= 1;
4994         size = B;
4995         switch (size) {
4996            case 0:
4997               op = Iop_Sub16x8;
4998               op2 = Iop_Add16x8;
4999               cvt = Iop_NarrowUn16to8x8;
5000               sh = Iop_ShrN16x8;
5001               imm = 1U << 7;
5002               imm = (imm << 16) | imm;
5003               imm = (imm << 32) | imm;
5004               break;
5005            case 1:
5006               op = Iop_Sub32x4;
5007               op2 = Iop_Add32x4;
5008               cvt = Iop_NarrowUn32to16x4;
5009               sh = Iop_ShrN32x4;
5010               imm = 1U << 15;
5011               imm = (imm << 32) | imm;
5012               break;
5013            case 2:
5014               op = Iop_Sub64x2;
5015               op2 = Iop_Add64x2;
5016               cvt = Iop_NarrowUn64to32x2;
5017               sh = Iop_ShrN64x2;
5018               imm = 1U << 31;
5019               break;
5020            case 3:
5021               return False;
5022            default:
5023               vassert(0);
5024         }
5025         tmp = newTemp(Ity_V128);
5026         res = newTemp(Ity_V128);
5027         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
5028         if (U) {
5029            /* VRSUBHN */
5030            assign(res, binop(op2, mkexpr(tmp),
5031                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
5032         } else {
5033            assign(res, mkexpr(tmp));
5034         }
5035         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
5036                    condT);
5037         DIP("v%ssubhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
5038             nreg, mreg);
5039         return True;
5040      case 7:
5041         /* VABDL */
5042         if (!((theInstr >> 23) & 1)) {
5043            vpanic("VABL should not be in dis_neon_data_3diff\n");
5044         }
5045         if (dreg & 1)
5046            return False;
5047         dreg >>= 1;
5048         switch (size) {
5049            case 0:
5050               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5051               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5052               cvt2 = Iop_Widen8Sto16x8;
5053               op = Iop_Sub16x8;
5054               break;
5055            case 1:
5056               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5057               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5058               cvt2 = Iop_Widen16Sto32x4;
5059               op = Iop_Sub32x4;
5060               break;
5061            case 2:
5062               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5063               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5064               cvt2 = Iop_Widen32Sto64x2;
5065               op = Iop_Sub64x2;
5066               break;
5067            case 3:
5068               return False;
5069            default:
5070               vassert(0);
5071         }
5072         arg_n = newTemp(Ity_V128);
5073         arg_m = newTemp(Ity_V128);
5074         cond = newTemp(Ity_V128);
5075         res = newTemp(Ity_V128);
5076         assign(arg_n, unop(cvt, getDRegI64(nreg)));
5077         assign(arg_m, unop(cvt, getDRegI64(mreg)));
5078         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5079                                            getDRegI64(mreg))));
5080         assign(res, binop(Iop_OrV128,
5081                           binop(Iop_AndV128,
5082                                 binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5083                                 mkexpr(cond)),
5084                           binop(Iop_AndV128,
5085                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5086                                 unop(Iop_NotV128, mkexpr(cond)))));
5087         putQReg(dreg, mkexpr(res), condT);
5088         DIP("vabdl.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5089             nreg, mreg);
5090         return True;
5091      case 8:
5092      case 10:
5093         /* VMLAL, VMLSL (integer) */
5094         if (dreg & 1)
5095            return False;
5096         dreg >>= 1;
5097         size = B;
5098         switch (size) {
5099            case 0:
5100               op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5101               op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
5102               break;
5103            case 1:
5104               op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5105               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5106               break;
5107            case 2:
5108               op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5109               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5110               break;
5111            case 3:
5112               return False;
5113            default:
5114               vassert(0);
5115         }
5116         res = newTemp(Ity_V128);
5117         assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
5118         putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5119         DIP("vml%cl.%c%u q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
5120             8 << size, dreg, nreg, mreg);
5121         return True;
5122      case 9:
5123      case 11:
5124         /* VQDMLAL, VQDMLSL */
5125         if (U)
5126            return False;
5127         if (dreg & 1)
5128            return False;
5129         dreg >>= 1;
5130         size = B;
5131         switch (size) {
5132            case 0: case 3:
5133               return False;
5134            case 1:
5135               op = Iop_QDMulLong16Sx4;
5136               cmp = Iop_CmpEQ16x4;
5137               add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5138               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5139               imm = 1LL << 15;
5140               imm = (imm << 16) | imm;
5141               imm = (imm << 32) | imm;
5142               break;
5143            case 2:
5144               op = Iop_QDMulLong32Sx2;
5145               cmp = Iop_CmpEQ32x2;
5146               add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5147               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5148               imm = 1LL << 31;
5149               imm = (imm << 32) | imm;
5150               break;
5151            default:
5152               vassert(0);
5153         }
5154         res = newTemp(Ity_V128);
5155         tmp = newTemp(Ity_V128);
5156         assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
5157         assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5158         setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5159                    True, condT);
5160         setFlag_QC(binop(Iop_And64,
5161                          binop(cmp, getDRegI64(nreg), mkU64(imm)),
5162                          binop(cmp, getDRegI64(mreg), mkU64(imm))),
5163                    mkU64(0),
5164                    False, condT);
5165         putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5166         DIP("vqdml%cl.s%u q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
5167             nreg, mreg);
5168         return True;
5169      case 12:
5170      case 14:
5171         /* VMULL (integer or polynomial) */
5172         if (dreg & 1)
5173            return False;
5174         dreg >>= 1;
5175         size = B;
5176         switch (size) {
5177            case 0:
5178               op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5179               if (P)
5180                  op = Iop_PolynomialMull8x8;
5181               break;
5182            case 1:
5183               op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5184               break;
5185            case 2:
5186               op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5187               break;
5188            default:
5189               vassert(0);
5190         }
5191         putQReg(dreg, binop(op, getDRegI64(nreg),
5192                                 getDRegI64(mreg)), condT);
5193         DIP("vmull.%c%u q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
5194               8 << size, dreg, nreg, mreg);
5195         return True;
5196      case 13:
5197         /* VQDMULL */
5198         if (U)
5199            return False;
5200         if (dreg & 1)
5201            return False;
5202         dreg >>= 1;
5203         size = B;
5204         switch (size) {
5205            case 0:
5206            case 3:
5207               return False;
5208            case 1:
5209               op = Iop_QDMulLong16Sx4;
5210               op2 = Iop_CmpEQ16x4;
5211               imm = 1LL << 15;
5212               imm = (imm << 16) | imm;
5213               imm = (imm << 32) | imm;
5214               break;
5215            case 2:
5216               op = Iop_QDMulLong32Sx2;
5217               op2 = Iop_CmpEQ32x2;
5218               imm = 1LL << 31;
5219               imm = (imm << 32) | imm;
5220               break;
5221            default:
5222               vassert(0);
5223         }
5224         putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
5225               condT);
5226         setFlag_QC(binop(Iop_And64,
5227                          binop(op2, getDRegI64(nreg), mkU64(imm)),
5228                          binop(op2, getDRegI64(mreg), mkU64(imm))),
5229                    mkU64(0),
5230                    False, condT);
5231         DIP("vqdmull.s%u q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
5232         return True;
5233      default:
5234         return False;
5235   }
5236   return False;
5237}
5238
5239/* A7.4.3 Two registers and a scalar */
5240static
5241Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
5242{
5243#  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
5244   UInt U = INSN(24,24);
5245   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
5246   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
5247   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
5248   UInt size = INSN(21,20);
5249   UInt index;
5250   UInt Q = INSN(24,24);
5251
5252   if (INSN(27,25) != 1 || INSN(23,23) != 1
5253       || INSN(6,6) != 1 || INSN(4,4) != 0)
5254      return False;
5255
5256   /* VMLA, VMLS (scalar)  */
5257   if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
5258      IRTemp res, arg_m, arg_n;
5259      IROp dup, get, op, op2, add, sub;
5260      if (Q) {
5261         if ((dreg & 1) || (nreg & 1))
5262            return False;
5263         dreg >>= 1;
5264         nreg >>= 1;
5265         res = newTemp(Ity_V128);
5266         arg_m = newTemp(Ity_V128);
5267         arg_n = newTemp(Ity_V128);
5268         assign(arg_n, getQReg(nreg));
5269         switch(size) {
5270            case 1:
5271               dup = Iop_Dup16x8;
5272               get = Iop_GetElem16x4;
5273               index = mreg >> 3;
5274               mreg &= 7;
5275               break;
5276            case 2:
5277               dup = Iop_Dup32x4;
5278               get = Iop_GetElem32x2;
5279               index = mreg >> 4;
5280               mreg &= 0xf;
5281               break;
5282            case 0:
5283            case 3:
5284               return False;
5285            default:
5286               vassert(0);
5287         }
5288         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5289      } else {
5290         res = newTemp(Ity_I64);
5291         arg_m = newTemp(Ity_I64);
5292         arg_n = newTemp(Ity_I64);
5293         assign(arg_n, getDRegI64(nreg));
5294         switch(size) {
5295            case 1:
5296               dup = Iop_Dup16x4;
5297               get = Iop_GetElem16x4;
5298               index = mreg >> 3;
5299               mreg &= 7;
5300               break;
5301            case 2:
5302               dup = Iop_Dup32x2;
5303               get = Iop_GetElem32x2;
5304               index = mreg >> 4;
5305               mreg &= 0xf;
5306               break;
5307            case 0:
5308            case 3:
5309               return False;
5310            default:
5311               vassert(0);
5312         }
5313         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5314      }
5315      if (INSN(8,8)) {
5316         switch (size) {
5317            case 2:
5318               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5319               add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
5320               sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
5321               break;
5322            case 0:
5323            case 1:
5324            case 3:
5325               return False;
5326            default:
5327               vassert(0);
5328         }
5329      } else {
5330         switch (size) {
5331            case 1:
5332               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5333               add = Q ? Iop_Add16x8 : Iop_Add16x4;
5334               sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
5335               break;
5336            case 2:
5337               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5338               add = Q ? Iop_Add32x4 : Iop_Add32x2;
5339               sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
5340               break;
5341            case 0:
5342            case 3:
5343               return False;
5344            default:
5345               vassert(0);
5346         }
5347      }
5348      op2 = INSN(10,10) ? sub : add;
5349      assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5350      if (Q)
5351         putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
5352                 condT);
5353      else
5354         putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
5355                    condT);
5356      DIP("vml%c.%c%u %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
5357            INSN(8,8) ? 'f' : 'i', 8 << size,
5358            Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
5359      return True;
5360   }
5361
5362   /* VMLAL, VMLSL (scalar)   */
5363   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
5364      IRTemp res, arg_m, arg_n;
5365      IROp dup, get, op, op2, add, sub;
5366      if (dreg & 1)
5367         return False;
5368      dreg >>= 1;
5369      res = newTemp(Ity_V128);
5370      arg_m = newTemp(Ity_I64);
5371      arg_n = newTemp(Ity_I64);
5372      assign(arg_n, getDRegI64(nreg));
5373      switch(size) {
5374         case 1:
5375            dup = Iop_Dup16x4;
5376            get = Iop_GetElem16x4;
5377            index = mreg >> 3;
5378            mreg &= 7;
5379            break;
5380         case 2:
5381            dup = Iop_Dup32x2;
5382            get = Iop_GetElem32x2;
5383            index = mreg >> 4;
5384            mreg &= 0xf;
5385            break;
5386         case 0:
5387         case 3:
5388            return False;
5389         default:
5390            vassert(0);
5391      }
5392      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5393      switch (size) {
5394         case 1:
5395            op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5396            add = Iop_Add32x4;
5397            sub = Iop_Sub32x4;
5398            break;
5399         case 2:
5400            op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5401            add = Iop_Add64x2;
5402            sub = Iop_Sub64x2;
5403            break;
5404         case 0:
5405         case 3:
5406            return False;
5407         default:
5408            vassert(0);
5409      }
5410      op2 = INSN(10,10) ? sub : add;
5411      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5412      putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5413      DIP("vml%cl.%c%u q%u, d%u, d%u[%u]\n",
5414          INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
5415          8 << size, dreg, nreg, mreg, index);
5416      return True;
5417   }
5418
5419   /* VQDMLAL, VQDMLSL (scalar)  */
5420   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
5421      IRTemp res, arg_m, arg_n, tmp;
5422      IROp dup, get, op, op2, add, cmp;
5423      UInt P = INSN(10,10);
5424      ULong imm;
5425      if (dreg & 1)
5426         return False;
5427      dreg >>= 1;
5428      res = newTemp(Ity_V128);
5429      arg_m = newTemp(Ity_I64);
5430      arg_n = newTemp(Ity_I64);
5431      assign(arg_n, getDRegI64(nreg));
5432      switch(size) {
5433         case 1:
5434            dup = Iop_Dup16x4;
5435            get = Iop_GetElem16x4;
5436            index = mreg >> 3;
5437            mreg &= 7;
5438            break;
5439         case 2:
5440            dup = Iop_Dup32x2;
5441            get = Iop_GetElem32x2;
5442            index = mreg >> 4;
5443            mreg &= 0xf;
5444            break;
5445         case 0:
5446         case 3:
5447            return False;
5448         default:
5449            vassert(0);
5450      }
5451      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5452      switch (size) {
5453         case 0:
5454         case 3:
5455            return False;
5456         case 1:
5457            op = Iop_QDMulLong16Sx4;
5458            cmp = Iop_CmpEQ16x4;
5459            add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5460            op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5461            imm = 1LL << 15;
5462            imm = (imm << 16) | imm;
5463            imm = (imm << 32) | imm;
5464            break;
5465         case 2:
5466            op = Iop_QDMulLong32Sx2;
5467            cmp = Iop_CmpEQ32x2;
5468            add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5469            op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5470            imm = 1LL << 31;
5471            imm = (imm << 32) | imm;
5472            break;
5473         default:
5474            vassert(0);
5475      }
5476      res = newTemp(Ity_V128);
5477      tmp = newTemp(Ity_V128);
5478      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5479      assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5480      setFlag_QC(binop(Iop_And64,
5481                       binop(cmp, mkexpr(arg_n), mkU64(imm)),
5482                       binop(cmp, mkexpr(arg_m), mkU64(imm))),
5483                 mkU64(0),
5484                 False, condT);
5485      setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5486                 True, condT);
5487      putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5488      DIP("vqdml%cl.s%u q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
5489          dreg, nreg, mreg, index);
5490      return True;
5491   }
5492
5493   /* VMUL (by scalar)  */
5494   if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
5495      IRTemp res, arg_m, arg_n;
5496      IROp dup, get, op;
5497      if (Q) {
5498         if ((dreg & 1) || (nreg & 1))
5499            return False;
5500         dreg >>= 1;
5501         nreg >>= 1;
5502         res = newTemp(Ity_V128);
5503         arg_m = newTemp(Ity_V128);
5504         arg_n = newTemp(Ity_V128);
5505         assign(arg_n, getQReg(nreg));
5506         switch(size) {
5507            case 1:
5508               dup = Iop_Dup16x8;
5509               get = Iop_GetElem16x4;
5510               index = mreg >> 3;
5511               mreg &= 7;
5512               break;
5513            case 2:
5514               dup = Iop_Dup32x4;
5515               get = Iop_GetElem32x2;
5516               index = mreg >> 4;
5517               mreg &= 0xf;
5518               break;
5519            case 0:
5520            case 3:
5521               return False;
5522            default:
5523               vassert(0);
5524         }
5525         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5526      } else {
5527         res = newTemp(Ity_I64);
5528         arg_m = newTemp(Ity_I64);
5529         arg_n = newTemp(Ity_I64);
5530         assign(arg_n, getDRegI64(nreg));
5531         switch(size) {
5532            case 1:
5533               dup = Iop_Dup16x4;
5534               get = Iop_GetElem16x4;
5535               index = mreg >> 3;
5536               mreg &= 7;
5537               break;
5538            case 2:
5539               dup = Iop_Dup32x2;
5540               get = Iop_GetElem32x2;
5541               index = mreg >> 4;
5542               mreg &= 0xf;
5543               break;
5544            case 0:
5545            case 3:
5546               return False;
5547            default:
5548               vassert(0);
5549         }
5550         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5551      }
5552      if (INSN(8,8)) {
5553         switch (size) {
5554            case 2:
5555               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5556               break;
5557            case 0:
5558            case 1:
5559            case 3:
5560               return False;
5561            default:
5562               vassert(0);
5563         }
5564      } else {
5565         switch (size) {
5566            case 1:
5567               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5568               break;
5569            case 2:
5570               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5571               break;
5572            case 0:
5573            case 3:
5574               return False;
5575            default:
5576               vassert(0);
5577         }
5578      }
5579      assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5580      if (Q)
5581         putQReg(dreg, mkexpr(res), condT);
5582      else
5583         putDRegI64(dreg, mkexpr(res), condT);
5584      DIP("vmul.%c%u %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
5585          8 << size, Q ? 'q' : 'd', dreg,
5586          Q ? 'q' : 'd', nreg, mreg, index);
5587      return True;
5588   }
5589
5590   /* VMULL (scalar) */
5591   if (INSN(11,8) == BITS4(1,0,1,0)) {
5592      IRTemp res, arg_m, arg_n;
5593      IROp dup, get, op;
5594      if (dreg & 1)
5595         return False;
5596      dreg >>= 1;
5597      res = newTemp(Ity_V128);
5598      arg_m = newTemp(Ity_I64);
5599      arg_n = newTemp(Ity_I64);
5600      assign(arg_n, getDRegI64(nreg));
5601      switch(size) {
5602         case 1:
5603            dup = Iop_Dup16x4;
5604            get = Iop_GetElem16x4;
5605            index = mreg >> 3;
5606            mreg &= 7;
5607            break;
5608         case 2:
5609            dup = Iop_Dup32x2;
5610            get = Iop_GetElem32x2;
5611            index = mreg >> 4;
5612            mreg &= 0xf;
5613            break;
5614         case 0:
5615         case 3:
5616            return False;
5617         default:
5618            vassert(0);
5619      }
5620      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5621      switch (size) {
5622         case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
5623         case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
5624         case 0: case 3: return False;
5625         default: vassert(0);
5626      }
5627      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5628      putQReg(dreg, mkexpr(res), condT);
5629      DIP("vmull.%c%u q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
5630          nreg, mreg, index);
5631      return True;
5632   }
5633
5634   /* VQDMULL */
5635   if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
5636      IROp op ,op2, dup, get;
5637      ULong imm;
5638      IRTemp arg_m, arg_n;
5639      if (dreg & 1)
5640         return False;
5641      dreg >>= 1;
5642      arg_m = newTemp(Ity_I64);
5643      arg_n = newTemp(Ity_I64);
5644      assign(arg_n, getDRegI64(nreg));
5645      switch(size) {
5646         case 1:
5647            dup = Iop_Dup16x4;
5648            get = Iop_GetElem16x4;
5649            index = mreg >> 3;
5650            mreg &= 7;
5651            break;
5652         case 2:
5653            dup = Iop_Dup32x2;
5654            get = Iop_GetElem32x2;
5655            index = mreg >> 4;
5656            mreg &= 0xf;
5657            break;
5658         case 0:
5659         case 3:
5660            return False;
5661         default:
5662            vassert(0);
5663      }
5664      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5665      switch (size) {
5666         case 0:
5667         case 3:
5668            return False;
5669         case 1:
5670            op = Iop_QDMulLong16Sx4;
5671            op2 = Iop_CmpEQ16x4;
5672            imm = 1LL << 15;
5673            imm = (imm << 16) | imm;
5674            imm = (imm << 32) | imm;
5675            break;
5676         case 2:
5677            op = Iop_QDMulLong32Sx2;
5678            op2 = Iop_CmpEQ32x2;
5679            imm = 1LL << 31;
5680            imm = (imm << 32) | imm;
5681            break;
5682         default:
5683            vassert(0);
5684      }
5685      putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5686            condT);
5687      setFlag_QC(binop(Iop_And64,
5688                       binop(op2, mkexpr(arg_n), mkU64(imm)),
5689                       binop(op2, mkexpr(arg_m), mkU64(imm))),
5690                 mkU64(0),
5691                 False, condT);
5692      DIP("vqdmull.s%u q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
5693          index);
5694      return True;
5695   }
5696
5697   /* VQDMULH */
5698   if (INSN(11,8) == BITS4(1,1,0,0)) {
5699      IROp op ,op2, dup, get;
5700      ULong imm;
5701      IRTemp res, arg_m, arg_n;
5702      if (Q) {
5703         if ((dreg & 1) || (nreg & 1))
5704            return False;
5705         dreg >>= 1;
5706         nreg >>= 1;
5707         res = newTemp(Ity_V128);
5708         arg_m = newTemp(Ity_V128);
5709         arg_n = newTemp(Ity_V128);
5710         assign(arg_n, getQReg(nreg));
5711         switch(size) {
5712            case 1:
5713               dup = Iop_Dup16x8;
5714               get = Iop_GetElem16x4;
5715               index = mreg >> 3;
5716               mreg &= 7;
5717               break;
5718            case 2:
5719               dup = Iop_Dup32x4;
5720               get = Iop_GetElem32x2;
5721               index = mreg >> 4;
5722               mreg &= 0xf;
5723               break;
5724            case 0:
5725            case 3:
5726               return False;
5727            default:
5728               vassert(0);
5729         }
5730         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5731      } else {
5732         res = newTemp(Ity_I64);
5733         arg_m = newTemp(Ity_I64);
5734         arg_n = newTemp(Ity_I64);
5735         assign(arg_n, getDRegI64(nreg));
5736         switch(size) {
5737            case 1:
5738               dup = Iop_Dup16x4;
5739               get = Iop_GetElem16x4;
5740               index = mreg >> 3;
5741               mreg &= 7;
5742               break;
5743            case 2:
5744               dup = Iop_Dup32x2;
5745               get = Iop_GetElem32x2;
5746               index = mreg >> 4;
5747               mreg &= 0xf;
5748               break;
5749            case 0:
5750            case 3:
5751               return False;
5752            default:
5753               vassert(0);
5754         }
5755         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5756      }
5757      switch (size) {
5758         case 0:
5759         case 3:
5760            return False;
5761         case 1:
5762            op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
5763            op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5764            imm = 1LL << 15;
5765            imm = (imm << 16) | imm;
5766            imm = (imm << 32) | imm;
5767            break;
5768         case 2:
5769            op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
5770            op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5771            imm = 1LL << 31;
5772            imm = (imm << 32) | imm;
5773            break;
5774         default:
5775            vassert(0);
5776      }
5777      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5778      setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5779                       binop(op2, mkexpr(arg_n),
5780                                  Q ? mkU128(imm) : mkU64(imm)),
5781                       binop(op2, mkexpr(arg_m),
5782                             Q ? mkU128(imm) : mkU64(imm))),
5783                 Q ? mkU128(0) : mkU64(0),
5784                 Q, condT);
5785      if (Q)
5786         putQReg(dreg, mkexpr(res), condT);
5787      else
5788         putDRegI64(dreg, mkexpr(res), condT);
5789      DIP("vqdmulh.s%u %c%u, %c%u, d%u[%u]\n",
5790          8 << size, Q ? 'q' : 'd', dreg,
5791          Q ? 'q' : 'd', nreg, mreg, index);
5792      return True;
5793   }
5794
5795   /* VQRDMULH (scalar) */
5796   if (INSN(11,8) == BITS4(1,1,0,1)) {
5797      IROp op ,op2, dup, get;
5798      ULong imm;
5799      IRTemp res, arg_m, arg_n;
5800      if (Q) {
5801         if ((dreg & 1) || (nreg & 1))
5802            return False;
5803         dreg >>= 1;
5804         nreg >>= 1;
5805         res = newTemp(Ity_V128);
5806         arg_m = newTemp(Ity_V128);
5807         arg_n = newTemp(Ity_V128);
5808         assign(arg_n, getQReg(nreg));
5809         switch(size) {
5810            case 1:
5811               dup = Iop_Dup16x8;
5812               get = Iop_GetElem16x4;
5813               index = mreg >> 3;
5814               mreg &= 7;
5815               break;
5816            case 2:
5817               dup = Iop_Dup32x4;
5818               get = Iop_GetElem32x2;
5819               index = mreg >> 4;
5820               mreg &= 0xf;
5821               break;
5822            case 0:
5823            case 3:
5824               return False;
5825            default:
5826               vassert(0);
5827         }
5828         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5829      } else {
5830         res = newTemp(Ity_I64);
5831         arg_m = newTemp(Ity_I64);
5832         arg_n = newTemp(Ity_I64);
5833         assign(arg_n, getDRegI64(nreg));
5834         switch(size) {
5835            case 1:
5836               dup = Iop_Dup16x4;
5837               get = Iop_GetElem16x4;
5838               index = mreg >> 3;
5839               mreg &= 7;
5840               break;
5841            case 2:
5842               dup = Iop_Dup32x2;
5843               get = Iop_GetElem32x2;
5844               index = mreg >> 4;
5845               mreg &= 0xf;
5846               break;
5847            case 0:
5848            case 3:
5849               return False;
5850            default:
5851               vassert(0);
5852         }
5853         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5854      }
5855      switch (size) {
5856         case 0:
5857         case 3:
5858            return False;
5859         case 1:
5860            op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
5861            op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5862            imm = 1LL << 15;
5863            imm = (imm << 16) | imm;
5864            imm = (imm << 32) | imm;
5865            break;
5866         case 2:
5867            op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
5868            op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5869            imm = 1LL << 31;
5870            imm = (imm << 32) | imm;
5871            break;
5872         default:
5873            vassert(0);
5874      }
5875      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5876      setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5877                       binop(op2, mkexpr(arg_n),
5878                                  Q ? mkU128(imm) : mkU64(imm)),
5879                       binop(op2, mkexpr(arg_m),
5880                                  Q ? mkU128(imm) : mkU64(imm))),
5881                 Q ? mkU128(0) : mkU64(0),
5882                 Q, condT);
5883      if (Q)
5884         putQReg(dreg, mkexpr(res), condT);
5885      else
5886         putDRegI64(dreg, mkexpr(res), condT);
5887      DIP("vqrdmulh.s%u %c%u, %c%u, d%u[%u]\n",
5888          8 << size, Q ? 'q' : 'd', dreg,
5889          Q ? 'q' : 'd', nreg, mreg, index);
5890      return True;
5891   }
5892
5893   return False;
5894#  undef INSN
5895}
5896
5897/* A7.4.4 Two registers and a shift amount */
5898static
5899Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
5900{
5901   UInt A = (theInstr >> 8) & 0xf;
5902   UInt B = (theInstr >> 6) & 1;
5903   UInt L = (theInstr >> 7) & 1;
5904   UInt U = (theInstr >> 24) & 1;
5905   UInt Q = B;
5906   UInt imm6 = (theInstr >> 16) & 0x3f;
5907   UInt shift_imm;
5908   UInt size = 4;
5909   UInt tmp;
5910   UInt mreg = get_neon_m_regno(theInstr);
5911   UInt dreg = get_neon_d_regno(theInstr);
5912   ULong imm = 0;
5913   IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
5914   IRTemp reg_m, res, mask;
5915
5916   if (L == 0 && ((theInstr >> 19) & 7) == 0)
5917      /* It is one reg and immediate */
5918      return False;
5919
5920   tmp = (L << 6) | imm6;
5921   if (tmp & 0x40) {
5922      size = 3;
5923      shift_imm = 64 - imm6;
5924   } else if (tmp & 0x20) {
5925      size = 2;
5926      shift_imm = 64 - imm6;
5927   } else if (tmp & 0x10) {
5928      size = 1;
5929      shift_imm = 32 - imm6;
5930   } else if (tmp & 0x8) {
5931      size = 0;
5932      shift_imm = 16 - imm6;
5933   } else {
5934      return False;
5935   }
5936
5937   switch (A) {
5938      case 3:
5939      case 2:
5940         /* VRSHR, VRSRA */
5941         if (shift_imm > 0) {
5942            IRExpr *imm_val;
5943            imm = 1L;
5944            switch (size) {
5945               case 0:
5946                  imm = (imm << 8) | imm;
5947                  /* fall through */
5948               case 1:
5949                  imm = (imm << 16) | imm;
5950                  /* fall through */
5951               case 2:
5952                  imm = (imm << 32) | imm;
5953                  /* fall through */
5954               case 3:
5955                  break;
5956               default:
5957                  vassert(0);
5958            }
5959            if (Q) {
5960               reg_m = newTemp(Ity_V128);
5961               res = newTemp(Ity_V128);
5962               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
5963               assign(reg_m, getQReg(mreg));
5964               switch (size) {
5965                  case 0:
5966                     add = Iop_Add8x16;
5967                     op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
5968                     break;
5969                  case 1:
5970                     add = Iop_Add16x8;
5971                     op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
5972                     break;
5973                  case 2:
5974                     add = Iop_Add32x4;
5975                     op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
5976                     break;
5977                  case 3:
5978                     add = Iop_Add64x2;
5979                     op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
5980                     break;
5981                  default:
5982                     vassert(0);
5983               }
5984            } else {
5985               reg_m = newTemp(Ity_I64);
5986               res = newTemp(Ity_I64);
5987               imm_val = mkU64(imm);
5988               assign(reg_m, getDRegI64(mreg));
5989               switch (size) {
5990                  case 0:
5991                     add = Iop_Add8x8;
5992                     op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
5993                     break;
5994                  case 1:
5995                     add = Iop_Add16x4;
5996                     op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
5997                     break;
5998                  case 2:
5999                     add = Iop_Add32x2;
6000                     op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6001                     break;
6002                  case 3:
6003                     add = Iop_Add64;
6004                     op = U ? Iop_Shr64 : Iop_Sar64;
6005                     break;
6006                  default:
6007                     vassert(0);
6008               }
6009            }
6010            assign(res,
6011                   binop(add,
6012                         binop(op,
6013                               mkexpr(reg_m),
6014                               mkU8(shift_imm)),
6015                         binop(Q ? Iop_AndV128 : Iop_And64,
6016                               binop(op,
6017                                     mkexpr(reg_m),
6018                                     mkU8(shift_imm - 1)),
6019                               imm_val)));
6020         } else {
6021            if (Q) {
6022               res = newTemp(Ity_V128);
6023               assign(res, getQReg(mreg));
6024            } else {
6025               res = newTemp(Ity_I64);
6026               assign(res, getDRegI64(mreg));
6027            }
6028         }
6029         if (A == 3) {
6030            if (Q) {
6031               putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6032                             condT);
6033            } else {
6034               putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6035                                condT);
6036            }
6037            DIP("vrsra.%c%u %c%u, %c%u, #%u\n",
6038                U ? 'u' : 's', 8 << size,
6039                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6040         } else {
6041            if (Q) {
6042               putQReg(dreg, mkexpr(res), condT);
6043            } else {
6044               putDRegI64(dreg, mkexpr(res), condT);
6045            }
6046            DIP("vrshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6047                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6048         }
6049         return True;
6050      case 1:
6051      case 0:
6052         /* VSHR, VSRA */
6053         if (Q) {
6054            reg_m = newTemp(Ity_V128);
6055            assign(reg_m, getQReg(mreg));
6056            res = newTemp(Ity_V128);
6057         } else {
6058            reg_m = newTemp(Ity_I64);
6059            assign(reg_m, getDRegI64(mreg));
6060            res = newTemp(Ity_I64);
6061         }
6062         if (Q) {
6063            switch (size) {
6064               case 0:
6065                  op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6066                  add = Iop_Add8x16;
6067                  break;
6068               case 1:
6069                  op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6070                  add = Iop_Add16x8;
6071                  break;
6072               case 2:
6073                  op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6074                  add = Iop_Add32x4;
6075                  break;
6076               case 3:
6077                  op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6078                  add = Iop_Add64x2;
6079                  break;
6080               default:
6081                  vassert(0);
6082            }
6083         } else {
6084            switch (size) {
6085               case 0:
6086                  op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
6087                  add = Iop_Add8x8;
6088                  break;
6089               case 1:
6090                  op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6091                  add = Iop_Add16x4;
6092                  break;
6093               case 2:
6094                  op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6095                  add = Iop_Add32x2;
6096                  break;
6097               case 3:
6098                  op = U ? Iop_Shr64 : Iop_Sar64;
6099                  add = Iop_Add64;
6100                  break;
6101               default:
6102                  vassert(0);
6103            }
6104         }
6105         assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6106         if (A == 1) {
6107            if (Q) {
6108               putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6109                             condT);
6110            } else {
6111               putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6112                                condT);
6113            }
6114            DIP("vsra.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6115                  Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6116         } else {
6117            if (Q) {
6118               putQReg(dreg, mkexpr(res), condT);
6119            } else {
6120               putDRegI64(dreg, mkexpr(res), condT);
6121            }
6122            DIP("vshr.%c%u %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6123                  Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6124         }
6125         return True;
6126      case 4:
6127         /* VSRI */
6128         if (!U)
6129            return False;
6130         if (Q) {
6131            res = newTemp(Ity_V128);
6132            mask = newTemp(Ity_V128);
6133         } else {
6134            res = newTemp(Ity_I64);
6135            mask = newTemp(Ity_I64);
6136         }
6137         switch (size) {
6138            case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
6139            case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
6140            case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
6141            case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
6142            default: vassert(0);
6143         }
6144         if (Q) {
6145            assign(mask, binop(op, binop(Iop_64HLtoV128,
6146                                         mkU64(0xFFFFFFFFFFFFFFFFLL),
6147                                         mkU64(0xFFFFFFFFFFFFFFFFLL)),
6148                               mkU8(shift_imm)));
6149            assign(res, binop(Iop_OrV128,
6150                              binop(Iop_AndV128,
6151                                    getQReg(dreg),
6152                                    unop(Iop_NotV128,
6153                                         mkexpr(mask))),
6154                              binop(op,
6155                                    getQReg(mreg),
6156                                    mkU8(shift_imm))));
6157            putQReg(dreg, mkexpr(res), condT);
6158         } else {
6159            assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6160                               mkU8(shift_imm)));
6161            assign(res, binop(Iop_Or64,
6162                              binop(Iop_And64,
6163                                    getDRegI64(dreg),
6164                                    unop(Iop_Not64,
6165                                         mkexpr(mask))),
6166                              binop(op,
6167                                    getDRegI64(mreg),
6168                                    mkU8(shift_imm))));
6169            putDRegI64(dreg, mkexpr(res), condT);
6170         }
6171         DIP("vsri.%u %c%u, %c%u, #%u\n",
6172             8 << size, Q ? 'q' : 'd', dreg,
6173             Q ? 'q' : 'd', mreg, shift_imm);
6174         return True;
6175      case 5:
6176         if (U) {
6177            /* VSLI */
6178            shift_imm = 8 * (1 << size) - shift_imm;
6179            if (Q) {
6180               res = newTemp(Ity_V128);
6181               mask = newTemp(Ity_V128);
6182            } else {
6183               res = newTemp(Ity_I64);
6184               mask = newTemp(Ity_I64);
6185            }
6186            switch (size) {
6187               case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6188               case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6189               case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6190               case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6191               default: vassert(0);
6192            }
6193            if (Q) {
6194               assign(mask, binop(op, binop(Iop_64HLtoV128,
6195                                            mkU64(0xFFFFFFFFFFFFFFFFLL),
6196                                            mkU64(0xFFFFFFFFFFFFFFFFLL)),
6197                                  mkU8(shift_imm)));
6198               assign(res, binop(Iop_OrV128,
6199                                 binop(Iop_AndV128,
6200                                       getQReg(dreg),
6201                                       unop(Iop_NotV128,
6202                                            mkexpr(mask))),
6203                                 binop(op,
6204                                       getQReg(mreg),
6205                                       mkU8(shift_imm))));
6206               putQReg(dreg, mkexpr(res), condT);
6207            } else {
6208               assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6209                                  mkU8(shift_imm)));
6210               assign(res, binop(Iop_Or64,
6211                                 binop(Iop_And64,
6212                                       getDRegI64(dreg),
6213                                       unop(Iop_Not64,
6214                                            mkexpr(mask))),
6215                                 binop(op,
6216                                       getDRegI64(mreg),
6217                                       mkU8(shift_imm))));
6218               putDRegI64(dreg, mkexpr(res), condT);
6219            }
6220            DIP("vsli.%u %c%u, %c%u, #%u\n",
6221                8 << size, Q ? 'q' : 'd', dreg,
6222                Q ? 'q' : 'd', mreg, shift_imm);
6223            return True;
6224         } else {
6225            /* VSHL #imm */
6226            shift_imm = 8 * (1 << size) - shift_imm;
6227            if (Q) {
6228               res = newTemp(Ity_V128);
6229            } else {
6230               res = newTemp(Ity_I64);
6231            }
6232            switch (size) {
6233               case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6234               case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6235               case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6236               case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6237               default: vassert(0);
6238            }
6239            assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
6240                     mkU8(shift_imm)));
6241            if (Q) {
6242               putQReg(dreg, mkexpr(res), condT);
6243            } else {
6244               putDRegI64(dreg, mkexpr(res), condT);
6245            }
6246            DIP("vshl.i%u %c%u, %c%u, #%u\n",
6247                8 << size, Q ? 'q' : 'd', dreg,
6248                Q ? 'q' : 'd', mreg, shift_imm);
6249            return True;
6250         }
6251         break;
6252      case 6:
6253      case 7:
6254         /* VQSHL, VQSHLU */
6255         shift_imm = 8 * (1 << size) - shift_imm;
6256         if (U) {
6257            if (A & 1) {
6258               switch (size) {
6259                  case 0:
6260                     op = Q ? Iop_QShlN8x16 : Iop_QShlN8x8;
6261                     op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6262                     break;
6263                  case 1:
6264                     op = Q ? Iop_QShlN16x8 : Iop_QShlN16x4;
6265                     op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6266                     break;
6267                  case 2:
6268                     op = Q ? Iop_QShlN32x4 : Iop_QShlN32x2;
6269                     op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6270                     break;
6271                  case 3:
6272                     op = Q ? Iop_QShlN64x2 : Iop_QShlN64x1;
6273                     op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6274                     break;
6275                  default:
6276                     vassert(0);
6277               }
6278               DIP("vqshl.u%u %c%u, %c%u, #%u\n",
6279                   8 << size,
6280                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6281            } else {
6282               switch (size) {
6283                  case 0:
6284                     op = Q ? Iop_QShlN8Sx16 : Iop_QShlN8Sx8;
6285                     op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6286                     break;
6287                  case 1:
6288                     op = Q ? Iop_QShlN16Sx8 : Iop_QShlN16Sx4;
6289                     op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6290                     break;
6291                  case 2:
6292                     op = Q ? Iop_QShlN32Sx4 : Iop_QShlN32Sx2;
6293                     op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6294                     break;
6295                  case 3:
6296                     op = Q ? Iop_QShlN64Sx2 : Iop_QShlN64Sx1;
6297                     op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6298                     break;
6299                  default:
6300                     vassert(0);
6301               }
6302               DIP("vqshlu.s%u %c%u, %c%u, #%u\n",
6303                   8 << size,
6304                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6305            }
6306         } else {
6307            if (!(A & 1))
6308               return False;
6309            switch (size) {
6310               case 0:
6311                  op = Q ? Iop_QSalN8x16 : Iop_QSalN8x8;
6312                  op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
6313                  break;
6314               case 1:
6315                  op = Q ? Iop_QSalN16x8 : Iop_QSalN16x4;
6316                  op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
6317                  break;
6318               case 2:
6319                  op = Q ? Iop_QSalN32x4 : Iop_QSalN32x2;
6320                  op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
6321                  break;
6322               case 3:
6323                  op = Q ? Iop_QSalN64x2 : Iop_QSalN64x1;
6324                  op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
6325                  break;
6326               default:
6327                  vassert(0);
6328            }
6329            DIP("vqshl.s%u %c%u, %c%u, #%u\n",
6330                8 << size,
6331                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6332         }
6333         if (Q) {
6334            tmp = newTemp(Ity_V128);
6335            res = newTemp(Ity_V128);
6336            reg_m = newTemp(Ity_V128);
6337            assign(reg_m, getQReg(mreg));
6338         } else {
6339            tmp = newTemp(Ity_I64);
6340            res = newTemp(Ity_I64);
6341            reg_m = newTemp(Ity_I64);
6342            assign(reg_m, getDRegI64(mreg));
6343         }
6344         assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6345         assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
6346         setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
6347         if (Q)
6348            putQReg(dreg, mkexpr(res), condT);
6349         else
6350            putDRegI64(dreg, mkexpr(res), condT);
6351         return True;
6352      case 8:
6353         if (!U) {
6354            if (L == 1)
6355               return False;
6356            size++;
6357            dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6358            mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
6359            if (mreg & 1)
6360               return False;
6361            mreg >>= 1;
6362            if (!B) {
6363               /* VSHRN*/
6364               IROp narOp;
6365               reg_m = newTemp(Ity_V128);
6366               assign(reg_m, getQReg(mreg));
6367               res = newTemp(Ity_I64);
6368               switch (size) {
6369                  case 1:
6370                     op = Iop_ShrN16x8;
6371                     narOp = Iop_NarrowUn16to8x8;
6372                     break;
6373                  case 2:
6374                     op = Iop_ShrN32x4;
6375                     narOp = Iop_NarrowUn32to16x4;
6376                     break;
6377                  case 3:
6378                     op = Iop_ShrN64x2;
6379                     narOp = Iop_NarrowUn64to32x2;
6380                     break;
6381                  default:
6382                     vassert(0);
6383               }
6384               assign(res, unop(narOp,
6385                                binop(op,
6386                                      mkexpr(reg_m),
6387                                      mkU8(shift_imm))));
6388               putDRegI64(dreg, mkexpr(res), condT);
6389               DIP("vshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6390                   shift_imm);
6391               return True;
6392            } else {
6393               /* VRSHRN   */
6394               IROp addOp, shOp, narOp;
6395               IRExpr *imm_val;
6396               reg_m = newTemp(Ity_V128);
6397               assign(reg_m, getQReg(mreg));
6398               res = newTemp(Ity_I64);
6399               imm = 1L;
6400               switch (size) {
6401                  case 0: imm = (imm <<  8) | imm; /* fall through */
6402                  case 1: imm = (imm << 16) | imm; /* fall through */
6403                  case 2: imm = (imm << 32) | imm; /* fall through */
6404                  case 3: break;
6405                  default: vassert(0);
6406               }
6407               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6408               switch (size) {
6409                  case 1:
6410                     addOp = Iop_Add16x8;
6411                     shOp = Iop_ShrN16x8;
6412                     narOp = Iop_NarrowUn16to8x8;
6413                     break;
6414                  case 2:
6415                     addOp = Iop_Add32x4;
6416                     shOp = Iop_ShrN32x4;
6417                     narOp = Iop_NarrowUn32to16x4;
6418                     break;
6419                  case 3:
6420                     addOp = Iop_Add64x2;
6421                     shOp = Iop_ShrN64x2;
6422                     narOp = Iop_NarrowUn64to32x2;
6423                     break;
6424                  default:
6425                     vassert(0);
6426               }
6427               assign(res, unop(narOp,
6428                                binop(addOp,
6429                                      binop(shOp,
6430                                            mkexpr(reg_m),
6431                                            mkU8(shift_imm)),
6432                                      binop(Iop_AndV128,
6433                                            binop(shOp,
6434                                                  mkexpr(reg_m),
6435                                                  mkU8(shift_imm - 1)),
6436                                            imm_val))));
6437               putDRegI64(dreg, mkexpr(res), condT);
6438               if (shift_imm == 0) {
6439                  DIP("vmov%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6440                      shift_imm);
6441               } else {
6442                  DIP("vrshrn.i%u d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6443                      shift_imm);
6444               }
6445               return True;
6446            }
6447         } else {
6448            /* fall through */
6449         }
6450      case 9:
6451         dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6452         mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
6453         if (mreg & 1)
6454            return False;
6455         mreg >>= 1;
6456         size++;
6457         if ((theInstr >> 8) & 1) {
6458            switch (size) {
6459               case 1:
6460                  op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6461                  cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
6462                  cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6463                  break;
6464               case 2:
6465                  op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6466                  cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
6467                  cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6468                  break;
6469               case 3:
6470                  op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6471                  cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
6472                  cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6473                  break;
6474               default:
6475                  vassert(0);
6476            }
6477            DIP("vq%sshrn.%c%u d%u, q%u, #%u\n", B ? "r" : "",
6478                U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
6479         } else {
6480            vassert(U);
6481            switch (size) {
6482               case 1:
6483                  op = Iop_SarN16x8;
6484                  cvt = Iop_QNarrowUn16Sto8Ux8;
6485                  cvt2 = Iop_Widen8Uto16x8;
6486                  break;
6487               case 2:
6488                  op = Iop_SarN32x4;
6489                  cvt = Iop_QNarrowUn32Sto16Ux4;
6490                  cvt2 = Iop_Widen16Uto32x4;
6491                  break;
6492               case 3:
6493                  op = Iop_SarN64x2;
6494                  cvt = Iop_QNarrowUn64Sto32Ux2;
6495                  cvt2 = Iop_Widen32Uto64x2;
6496                  break;
6497               default:
6498                  vassert(0);
6499            }
6500            DIP("vq%sshrun.s%u d%u, q%u, #%u\n", B ? "r" : "",
6501                8 << size, dreg, mreg, shift_imm);
6502         }
6503         if (B) {
6504            if (shift_imm > 0) {
6505               imm = 1;
6506               switch (size) {
6507                  case 1: imm = (imm << 16) | imm; /* fall through */
6508                  case 2: imm = (imm << 32) | imm; /* fall through */
6509                  case 3: break;
6510                  case 0: default: vassert(0);
6511               }
6512               switch (size) {
6513                  case 1: add = Iop_Add16x8; break;
6514                  case 2: add = Iop_Add32x4; break;
6515                  case 3: add = Iop_Add64x2; break;
6516                  case 0: default: vassert(0);
6517               }
6518            }
6519         }
6520         reg_m = newTemp(Ity_V128);
6521         res = newTemp(Ity_V128);
6522         assign(reg_m, getQReg(mreg));
6523         if (B) {
6524            /* VQRSHRN, VQRSHRUN */
6525            assign(res, binop(add,
6526                              binop(op, mkexpr(reg_m), mkU8(shift_imm)),
6527                              binop(Iop_AndV128,
6528                                    binop(op,
6529                                          mkexpr(reg_m),
6530                                          mkU8(shift_imm - 1)),
6531                                    mkU128(imm))));
6532         } else {
6533            /* VQSHRN, VQSHRUN */
6534            assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6535         }
6536         setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
6537                    True, condT);
6538         putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
6539         return True;
6540      case 10:
6541         /* VSHLL
6542            VMOVL ::= VSHLL #0 */
6543         if (B)
6544            return False;
6545         if (dreg & 1)
6546            return False;
6547         dreg >>= 1;
6548         shift_imm = (8 << size) - shift_imm;
6549         res = newTemp(Ity_V128);
6550         switch (size) {
6551            case 0:
6552               op = Iop_ShlN16x8;
6553               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6554               break;
6555            case 1:
6556               op = Iop_ShlN32x4;
6557               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6558               break;
6559            case 2:
6560               op = Iop_ShlN64x2;
6561               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6562               break;
6563            case 3:
6564               return False;
6565            default:
6566               vassert(0);
6567         }
6568         assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
6569         putQReg(dreg, mkexpr(res), condT);
6570         if (shift_imm == 0) {
6571            DIP("vmovl.%c%u q%u, d%u\n", U ? 'u' : 's', 8 << size,
6572                dreg, mreg);
6573         } else {
6574            DIP("vshll.%c%u q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
6575                dreg, mreg, shift_imm);
6576         }
6577         return True;
6578      case 14:
6579      case 15:
6580         /* VCVT floating-point <-> fixed-point */
6581         if ((theInstr >> 8) & 1) {
6582            if (U) {
6583               op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
6584            } else {
6585               op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
6586            }
6587            DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6588                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6589                64 - ((theInstr >> 16) & 0x3f));
6590         } else {
6591            if (U) {
6592               op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
6593            } else {
6594               op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
6595            }
6596            DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6597                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6598                64 - ((theInstr >> 16) & 0x3f));
6599         }
6600         if (((theInstr >> 21) & 1) == 0)
6601            return False;
6602         if (Q) {
6603            putQReg(dreg, binop(op, getQReg(mreg),
6604                     mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6605         } else {
6606            putDRegI64(dreg, binop(op, getDRegI64(mreg),
6607                       mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6608         }
6609         return True;
6610      default:
6611         return False;
6612
6613   }
6614   return False;
6615}
6616
6617/* A7.4.5 Two registers, miscellaneous */
6618static
6619Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
6620{
6621   UInt A = (theInstr >> 16) & 3;
6622   UInt B = (theInstr >> 6) & 0x1f;
6623   UInt Q = (theInstr >> 6) & 1;
6624   UInt U = (theInstr >> 24) & 1;
6625   UInt size = (theInstr >> 18) & 3;
6626   UInt dreg = get_neon_d_regno(theInstr);
6627   UInt mreg = get_neon_m_regno(theInstr);
6628   UInt F = (theInstr >> 10) & 1;
6629   IRTemp arg_d = IRTemp_INVALID;
6630   IRTemp arg_m = IRTemp_INVALID;
6631   IRTemp res = IRTemp_INVALID;
6632   switch (A) {
6633      case 0:
6634         if (Q) {
6635            arg_m = newTemp(Ity_V128);
6636            res = newTemp(Ity_V128);
6637            assign(arg_m, getQReg(mreg));
6638         } else {
6639            arg_m = newTemp(Ity_I64);
6640            res = newTemp(Ity_I64);
6641            assign(arg_m, getDRegI64(mreg));
6642         }
6643         switch (B >> 1) {
6644            case 0: {
6645               /* VREV64 */
6646               IROp op;
6647               switch (size) {
6648                  case 0:
6649                     op = Q ? Iop_Reverse64_8x16 : Iop_Reverse64_8x8;
6650                     break;
6651                  case 1:
6652                     op = Q ? Iop_Reverse64_16x8 : Iop_Reverse64_16x4;
6653                     break;
6654                  case 2:
6655                     op = Q ? Iop_Reverse64_32x4 : Iop_Reverse64_32x2;
6656                     break;
6657                  case 3:
6658                     return False;
6659                  default:
6660                     vassert(0);
6661               }
6662               assign(res, unop(op, mkexpr(arg_m)));
6663               DIP("vrev64.%u %c%u, %c%u\n", 8 << size,
6664                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6665               break;
6666            }
6667            case 1: {
6668               /* VREV32 */
6669               IROp op;
6670               switch (size) {
6671                  case 0:
6672                     op = Q ? Iop_Reverse32_8x16 : Iop_Reverse32_8x8;
6673                     break;
6674                  case 1:
6675                     op = Q ? Iop_Reverse32_16x8 : Iop_Reverse32_16x4;
6676                     break;
6677                  case 2:
6678                  case 3:
6679                     return False;
6680                  default:
6681                     vassert(0);
6682               }
6683               assign(res, unop(op, mkexpr(arg_m)));
6684               DIP("vrev32.%u %c%u, %c%u\n", 8 << size,
6685                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6686               break;
6687            }
6688            case 2: {
6689               /* VREV16 */
6690               IROp op;
6691               switch (size) {
6692                  case 0:
6693                     op = Q ? Iop_Reverse16_8x16 : Iop_Reverse16_8x8;
6694                     break;
6695                  case 1:
6696                  case 2:
6697                  case 3:
6698                     return False;
6699                  default:
6700                     vassert(0);
6701               }
6702               assign(res, unop(op, mkexpr(arg_m)));
6703               DIP("vrev16.%u %c%u, %c%u\n", 8 << size,
6704                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6705               break;
6706            }
6707            case 3:
6708               return False;
6709            case 4:
6710            case 5: {
6711               /* VPADDL */
6712               IROp op;
6713               U = (theInstr >> 7) & 1;
6714               if (Q) {
6715                  switch (size) {
6716                     case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
6717                     case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
6718                     case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
6719                     case 3: return False;
6720                     default: vassert(0);
6721                  }
6722               } else {
6723                  switch (size) {
6724                     case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
6725                     case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
6726                     case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
6727                     case 3: return False;
6728                     default: vassert(0);
6729                  }
6730               }
6731               assign(res, unop(op, mkexpr(arg_m)));
6732               DIP("vpaddl.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6733                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6734               break;
6735            }
6736            case 6:
6737            case 7:
6738               return False;
6739            case 8: {
6740               /* VCLS */
6741               IROp op;
6742               switch (size) {
6743                  case 0: op = Q ? Iop_Cls8Sx16 : Iop_Cls8Sx8; break;
6744                  case 1: op = Q ? Iop_Cls16Sx8 : Iop_Cls16Sx4; break;
6745                  case 2: op = Q ? Iop_Cls32Sx4 : Iop_Cls32Sx2; break;
6746                  case 3: return False;
6747                  default: vassert(0);
6748               }
6749               assign(res, unop(op, mkexpr(arg_m)));
6750               DIP("vcls.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6751                   Q ? 'q' : 'd', mreg);
6752               break;
6753            }
6754            case 9: {
6755               /* VCLZ */
6756               IROp op;
6757               switch (size) {
6758                  case 0: op = Q ? Iop_Clz8Sx16 : Iop_Clz8Sx8; break;
6759                  case 1: op = Q ? Iop_Clz16Sx8 : Iop_Clz16Sx4; break;
6760                  case 2: op = Q ? Iop_Clz32Sx4 : Iop_Clz32Sx2; break;
6761                  case 3: return False;
6762                  default: vassert(0);
6763               }
6764               assign(res, unop(op, mkexpr(arg_m)));
6765               DIP("vclz.i%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6766                   Q ? 'q' : 'd', mreg);
6767               break;
6768            }
6769            case 10:
6770               /* VCNT */
6771               assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
6772               DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6773                   mreg);
6774               break;
6775            case 11:
6776               /* VMVN */
6777               if (Q)
6778                  assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
6779               else
6780                  assign(res, unop(Iop_Not64, mkexpr(arg_m)));
6781               DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6782                   mreg);
6783               break;
6784            case 12:
6785            case 13: {
6786               /* VPADAL */
6787               IROp op, add_op;
6788               U = (theInstr >> 7) & 1;
6789               if (Q) {
6790                  switch (size) {
6791                     case 0:
6792                        op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
6793                        add_op = Iop_Add16x8;
6794                        break;
6795                     case 1:
6796                        op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
6797                        add_op = Iop_Add32x4;
6798                        break;
6799                     case 2:
6800                        op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
6801                        add_op = Iop_Add64x2;
6802                        break;
6803                     case 3:
6804                        return False;
6805                     default:
6806                        vassert(0);
6807                  }
6808               } else {
6809                  switch (size) {
6810                     case 0:
6811                        op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
6812                        add_op = Iop_Add16x4;
6813                        break;
6814                     case 1:
6815                        op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
6816                        add_op = Iop_Add32x2;
6817                        break;
6818                     case 2:
6819                        op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
6820                        add_op = Iop_Add64;
6821                        break;
6822                     case 3:
6823                        return False;
6824                     default:
6825                        vassert(0);
6826                  }
6827               }
6828               if (Q) {
6829                  arg_d = newTemp(Ity_V128);
6830                  assign(arg_d, getQReg(dreg));
6831               } else {
6832                  arg_d = newTemp(Ity_I64);
6833                  assign(arg_d, getDRegI64(dreg));
6834               }
6835               assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
6836                                         mkexpr(arg_d)));
6837               DIP("vpadal.%c%u %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6838                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6839               break;
6840            }
6841            case 14: {
6842               /* VQABS */
6843               IROp op_sub, op_qsub, op_cmp;
6844               IRTemp mask, tmp;
6845               IRExpr *zero1, *zero2;
6846               IRExpr *neg, *neg2;
6847               if (Q) {
6848                  zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6849                  zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6850                  mask = newTemp(Ity_V128);
6851                  tmp = newTemp(Ity_V128);
6852               } else {
6853                  zero1 = mkU64(0);
6854                  zero2 = mkU64(0);
6855                  mask = newTemp(Ity_I64);
6856                  tmp = newTemp(Ity_I64);
6857               }
6858               switch (size) {
6859                  case 0:
6860                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6861                     op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6862                     op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
6863                     break;
6864                  case 1:
6865                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6866                     op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6867                     op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
6868                     break;
6869                  case 2:
6870                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6871                     op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6872                     op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
6873                     break;
6874                  case 3:
6875                     return False;
6876                  default:
6877                     vassert(0);
6878               }
6879               assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
6880               neg = binop(op_qsub, zero2, mkexpr(arg_m));
6881               neg2 = binop(op_sub, zero2, mkexpr(arg_m));
6882               assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
6883                                 binop(Q ? Iop_AndV128 : Iop_And64,
6884                                       mkexpr(mask),
6885                                       mkexpr(arg_m)),
6886                                 binop(Q ? Iop_AndV128 : Iop_And64,
6887                                       unop(Q ? Iop_NotV128 : Iop_Not64,
6888                                            mkexpr(mask)),
6889                                       neg)));
6890               assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
6891                                 binop(Q ? Iop_AndV128 : Iop_And64,
6892                                       mkexpr(mask),
6893                                       mkexpr(arg_m)),
6894                                 binop(Q ? Iop_AndV128 : Iop_And64,
6895                                       unop(Q ? Iop_NotV128 : Iop_Not64,
6896                                            mkexpr(mask)),
6897                                       neg2)));
6898               setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
6899               DIP("vqabs.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6900                   Q ? 'q' : 'd', mreg);
6901               break;
6902            }
6903            case 15: {
6904               /* VQNEG */
6905               IROp op, op2;
6906               IRExpr *zero;
6907               if (Q) {
6908                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6909               } else {
6910                  zero = mkU64(0);
6911               }
6912               switch (size) {
6913                  case 0:
6914                     op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6915                     op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6916                     break;
6917                  case 1:
6918                     op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6919                     op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6920                     break;
6921                  case 2:
6922                     op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6923                     op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6924                     break;
6925                  case 3:
6926                     return False;
6927                  default:
6928                     vassert(0);
6929               }
6930               assign(res, binop(op, zero, mkexpr(arg_m)));
6931               setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
6932                          Q, condT);
6933               DIP("vqneg.s%u %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6934                   Q ? 'q' : 'd', mreg);
6935               break;
6936            }
6937            default:
6938               vassert(0);
6939         }
6940         if (Q) {
6941            putQReg(dreg, mkexpr(res), condT);
6942         } else {
6943            putDRegI64(dreg, mkexpr(res), condT);
6944         }
6945         return True;
6946      case 1:
6947         if (Q) {
6948            arg_m = newTemp(Ity_V128);
6949            res = newTemp(Ity_V128);
6950            assign(arg_m, getQReg(mreg));
6951         } else {
6952            arg_m = newTemp(Ity_I64);
6953            res = newTemp(Ity_I64);
6954            assign(arg_m, getDRegI64(mreg));
6955         }
6956         switch ((B >> 1) & 0x7) {
6957            case 0: {
6958               /* VCGT #0 */
6959               IRExpr *zero;
6960               IROp op;
6961               if (Q) {
6962                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6963               } else {
6964                  zero = mkU64(0);
6965               }
6966               if (F) {
6967                  switch (size) {
6968                     case 0: case 1: case 3: return False;
6969                     case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
6970                     default: vassert(0);
6971                  }
6972               } else {
6973                  switch (size) {
6974                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
6975                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
6976                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
6977                     case 3: return False;
6978                     default: vassert(0);
6979                  }
6980               }
6981               assign(res, binop(op, mkexpr(arg_m), zero));
6982               DIP("vcgt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
6983                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6984               break;
6985            }
6986            case 1: {
6987               /* VCGE #0 */
6988               IROp op;
6989               IRExpr *zero;
6990               if (Q) {
6991                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6992               } else {
6993                  zero = mkU64(0);
6994               }
6995               if (F) {
6996                  switch (size) {
6997                     case 0: case 1: case 3: return False;
6998                     case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
6999                     default: vassert(0);
7000                  }
7001                  assign(res, binop(op, mkexpr(arg_m), zero));
7002               } else {
7003                  switch (size) {
7004                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7005                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7006                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7007                     case 3: return False;
7008                     default: vassert(0);
7009                  }
7010                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7011                                   binop(op, zero, mkexpr(arg_m))));
7012               }
7013               DIP("vcge.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7014                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7015               break;
7016            }
7017            case 2: {
7018               /* VCEQ #0 */
7019               IROp op;
7020               IRExpr *zero;
7021               if (F) {
7022                  if (Q) {
7023                     zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7024                  } else {
7025                     zero = mkU64(0);
7026                  }
7027                  switch (size) {
7028                     case 0: case 1: case 3: return False;
7029                     case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
7030                     default: vassert(0);
7031                  }
7032                  assign(res, binop(op, zero, mkexpr(arg_m)));
7033               } else {
7034                  switch (size) {
7035                     case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
7036                     case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
7037                     case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
7038                     case 3: return False;
7039                     default: vassert(0);
7040                  }
7041                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7042                                   unop(op, mkexpr(arg_m))));
7043               }
7044               DIP("vceq.%c%u %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
7045                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7046               break;
7047            }
7048            case 3: {
7049               /* VCLE #0 */
7050               IRExpr *zero;
7051               IROp op;
7052               if (Q) {
7053                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7054               } else {
7055                  zero = mkU64(0);
7056               }
7057               if (F) {
7058                  switch (size) {
7059                     case 0: case 1: case 3: return False;
7060                     case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7061                     default: vassert(0);
7062                  }
7063                  assign(res, binop(op, zero, mkexpr(arg_m)));
7064               } else {
7065                  switch (size) {
7066                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7067                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7068                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7069                     case 3: return False;
7070                     default: vassert(0);
7071                  }
7072                  assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7073                                   binop(op, mkexpr(arg_m), zero)));
7074               }
7075               DIP("vcle.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7076                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7077               break;
7078            }
7079            case 4: {
7080               /* VCLT #0 */
7081               IROp op;
7082               IRExpr *zero;
7083               if (Q) {
7084                  zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7085               } else {
7086                  zero = mkU64(0);
7087               }
7088               if (F) {
7089                  switch (size) {
7090                     case 0: case 1: case 3: return False;
7091                     case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7092                     default: vassert(0);
7093                  }
7094                  assign(res, binop(op, zero, mkexpr(arg_m)));
7095               } else {
7096                  switch (size) {
7097                     case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7098                     case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7099                     case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7100                     case 3: return False;
7101                     default: vassert(0);
7102                  }
7103                  assign(res, binop(op, zero, mkexpr(arg_m)));
7104               }
7105               DIP("vclt.%c%u %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7106                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7107               break;
7108            }
7109            case 5:
7110               return False;
7111            case 6: {
7112               /* VABS */
7113               if (!F) {
7114                  IROp op;
7115                  switch(size) {
7116                     case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
7117                     case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
7118                     case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
7119                     case 3: return False;
7120                     default: vassert(0);
7121                  }
7122                  assign(res, unop(op, mkexpr(arg_m)));
7123               } else {
7124                  assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
7125                                   mkexpr(arg_m)));
7126               }
7127               DIP("vabs.%c%u %c%u, %c%u\n",
7128                   F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7129                   Q ? 'q' : 'd', mreg);
7130               break;
7131            }
7132            case 7: {
7133               /* VNEG */
7134               IROp op;
7135               IRExpr *zero;
7136               if (F) {
7137                  switch (size) {
7138                     case 0: case 1: case 3: return False;
7139                     case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
7140                     default: vassert(0);
7141                  }
7142                  assign(res, unop(op, mkexpr(arg_m)));
7143               } else {
7144                  if (Q) {
7145                     zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7146                  } else {
7147                     zero = mkU64(0);
7148                  }
7149                  switch (size) {
7150                     case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
7151                     case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
7152                     case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
7153                     case 3: return False;
7154                     default: vassert(0);
7155                  }
7156                  assign(res, binop(op, zero, mkexpr(arg_m)));
7157               }
7158               DIP("vneg.%c%u %c%u, %c%u\n",
7159                   F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7160                   Q ? 'q' : 'd', mreg);
7161               break;
7162            }
7163            default:
7164               vassert(0);
7165         }
7166         if (Q) {
7167            putQReg(dreg, mkexpr(res), condT);
7168         } else {
7169            putDRegI64(dreg, mkexpr(res), condT);
7170         }
7171         return True;
7172      case 2:
7173         if ((B >> 1) == 0) {
7174            /* VSWP */
7175            if (Q) {
7176               arg_m = newTemp(Ity_V128);
7177               assign(arg_m, getQReg(mreg));
7178               putQReg(mreg, getQReg(dreg), condT);
7179               putQReg(dreg, mkexpr(arg_m), condT);
7180            } else {
7181               arg_m = newTemp(Ity_I64);
7182               assign(arg_m, getDRegI64(mreg));
7183               putDRegI64(mreg, getDRegI64(dreg), condT);
7184               putDRegI64(dreg, mkexpr(arg_m), condT);
7185            }
7186            DIP("vswp %c%u, %c%u\n",
7187                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7188            return True;
7189         } else if ((B >> 1) == 1) {
7190            /* VTRN */
7191            IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
7192            IRTemp old_m, old_d, new_d, new_m;
7193            if (Q) {
7194               old_m = newTemp(Ity_V128);
7195               old_d = newTemp(Ity_V128);
7196               new_m = newTemp(Ity_V128);
7197               new_d = newTemp(Ity_V128);
7198               assign(old_m, getQReg(mreg));
7199               assign(old_d, getQReg(dreg));
7200            } else {
7201               old_m = newTemp(Ity_I64);
7202               old_d = newTemp(Ity_I64);
7203               new_m = newTemp(Ity_I64);
7204               new_d = newTemp(Ity_I64);
7205               assign(old_m, getDRegI64(mreg));
7206               assign(old_d, getDRegI64(dreg));
7207            }
7208            if (Q) {
7209               switch (size) {
7210                  case 0:
7211                     op_odd  = Iop_InterleaveOddLanes8x16;
7212                     op_even = Iop_InterleaveEvenLanes8x16;
7213                     break;
7214                  case 1:
7215                     op_odd  = Iop_InterleaveOddLanes16x8;
7216                     op_even = Iop_InterleaveEvenLanes16x8;
7217                     break;
7218                  case 2:
7219                     op_odd  = Iop_InterleaveOddLanes32x4;
7220                     op_even = Iop_InterleaveEvenLanes32x4;
7221                     break;
7222                  case 3:
7223                     return False;
7224                  default:
7225                     vassert(0);
7226               }
7227            } else {
7228               switch (size) {
7229                  case 0:
7230                     op_odd  = Iop_InterleaveOddLanes8x8;
7231                     op_even = Iop_InterleaveEvenLanes8x8;
7232                     break;
7233                  case 1:
7234                     op_odd  = Iop_InterleaveOddLanes16x4;
7235                     op_even = Iop_InterleaveEvenLanes16x4;
7236                     break;
7237                  case 2:
7238                     op_odd  = Iop_InterleaveHI32x2;
7239                     op_even = Iop_InterleaveLO32x2;
7240                     break;
7241                  case 3:
7242                     return False;
7243                  default:
7244                     vassert(0);
7245               }
7246            }
7247            assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7248            assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
7249            if (Q) {
7250               putQReg(dreg, mkexpr(new_d), condT);
7251               putQReg(mreg, mkexpr(new_m), condT);
7252            } else {
7253               putDRegI64(dreg, mkexpr(new_d), condT);
7254               putDRegI64(mreg, mkexpr(new_m), condT);
7255            }
7256            DIP("vtrn.%u %c%u, %c%u\n",
7257                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7258            return True;
7259         } else if ((B >> 1) == 2) {
7260            /* VUZP */
7261            IROp op_even, op_odd;
7262            IRTemp old_m, old_d, new_m, new_d;
7263            if (!Q && size == 2)
7264               return False;
7265            if (Q) {
7266               old_m = newTemp(Ity_V128);
7267               old_d = newTemp(Ity_V128);
7268               new_m = newTemp(Ity_V128);
7269               new_d = newTemp(Ity_V128);
7270               assign(old_m, getQReg(mreg));
7271               assign(old_d, getQReg(dreg));
7272            } else {
7273               old_m = newTemp(Ity_I64);
7274               old_d = newTemp(Ity_I64);
7275               new_m = newTemp(Ity_I64);
7276               new_d = newTemp(Ity_I64);
7277               assign(old_m, getDRegI64(mreg));
7278               assign(old_d, getDRegI64(dreg));
7279            }
7280            switch (size) {
7281               case 0:
7282                  op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
7283                  op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
7284                  break;
7285               case 1:
7286                  op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
7287                  op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
7288                  break;
7289               case 2:
7290                  op_odd  = Iop_CatOddLanes32x4;
7291                  op_even = Iop_CatEvenLanes32x4;
7292                  break;
7293               case 3:
7294                  return False;
7295               default:
7296                  vassert(0);
7297            }
7298            assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7299            assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
7300            if (Q) {
7301               putQReg(dreg, mkexpr(new_d), condT);
7302               putQReg(mreg, mkexpr(new_m), condT);
7303            } else {
7304               putDRegI64(dreg, mkexpr(new_d), condT);
7305               putDRegI64(mreg, mkexpr(new_m), condT);
7306            }
7307            DIP("vuzp.%u %c%u, %c%u\n",
7308                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7309            return True;
7310         } else if ((B >> 1) == 3) {
7311            /* VZIP */
7312            IROp op_lo, op_hi;
7313            IRTemp old_m, old_d, new_m, new_d;
7314            if (!Q && size == 2)
7315               return False;
7316            if (Q) {
7317               old_m = newTemp(Ity_V128);
7318               old_d = newTemp(Ity_V128);
7319               new_m = newTemp(Ity_V128);
7320               new_d = newTemp(Ity_V128);
7321               assign(old_m, getQReg(mreg));
7322               assign(old_d, getQReg(dreg));
7323            } else {
7324               old_m = newTemp(Ity_I64);
7325               old_d = newTemp(Ity_I64);
7326               new_m = newTemp(Ity_I64);
7327               new_d = newTemp(Ity_I64);
7328               assign(old_m, getDRegI64(mreg));
7329               assign(old_d, getDRegI64(dreg));
7330            }
7331            switch (size) {
7332               case 0:
7333                  op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
7334                  op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
7335                  break;
7336               case 1:
7337                  op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
7338                  op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
7339                  break;
7340               case 2:
7341                  op_hi = Iop_InterleaveHI32x4;
7342                  op_lo = Iop_InterleaveLO32x4;
7343                  break;
7344               case 3:
7345                  return False;
7346               default:
7347                  vassert(0);
7348            }
7349            assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
7350            assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
7351            if (Q) {
7352               putQReg(dreg, mkexpr(new_d), condT);
7353               putQReg(mreg, mkexpr(new_m), condT);
7354            } else {
7355               putDRegI64(dreg, mkexpr(new_d), condT);
7356               putDRegI64(mreg, mkexpr(new_m), condT);
7357            }
7358            DIP("vzip.%u %c%u, %c%u\n",
7359                8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7360            return True;
7361         } else if (B == 8) {
7362            /* VMOVN */
7363            IROp op;
7364            mreg >>= 1;
7365            switch (size) {
7366               case 0: op = Iop_NarrowUn16to8x8;  break;
7367               case 1: op = Iop_NarrowUn32to16x4; break;
7368               case 2: op = Iop_NarrowUn64to32x2; break;
7369               case 3: return False;
7370               default: vassert(0);
7371            }
7372            putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
7373            DIP("vmovn.i%u d%u, q%u\n", 16 << size, dreg, mreg);
7374            return True;
7375         } else if (B == 9 || (B >> 1) == 5) {
7376            /* VQMOVN, VQMOVUN */
7377            IROp op, op2;
7378            IRTemp tmp;
7379            dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
7380            mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
7381            if (mreg & 1)
7382               return False;
7383            mreg >>= 1;
7384            switch (size) {
7385               case 0: op2 = Iop_NarrowUn16to8x8;  break;
7386               case 1: op2 = Iop_NarrowUn32to16x4; break;
7387               case 2: op2 = Iop_NarrowUn64to32x2; break;
7388               case 3: return False;
7389               default: vassert(0);
7390            }
7391            switch (B & 3) {
7392               case 0:
7393                  vassert(0);
7394               case 1:
7395                  switch (size) {
7396                     case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
7397                     case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
7398                     case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
7399                     case 3: return False;
7400                     default: vassert(0);
7401                  }
7402                  DIP("vqmovun.s%u d%u, q%u\n", 16 << size, dreg, mreg);
7403                  break;
7404               case 2:
7405                  switch (size) {
7406                     case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
7407                     case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
7408                     case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
7409                     case 3: return False;
7410                     default: vassert(0);
7411                  }
7412                  DIP("vqmovn.s%u d%u, q%u\n", 16 << size, dreg, mreg);
7413                  break;
7414               case 3:
7415                  switch (size) {
7416                     case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
7417                     case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
7418                     case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
7419                     case 3: return False;
7420                     default: vassert(0);
7421                  }
7422                  DIP("vqmovn.u%u d%u, q%u\n", 16 << size, dreg, mreg);
7423                  break;
7424               default:
7425                  vassert(0);
7426            }
7427            res = newTemp(Ity_I64);
7428            tmp = newTemp(Ity_I64);
7429            assign(res, unop(op, getQReg(mreg)));
7430            assign(tmp, unop(op2, getQReg(mreg)));
7431            setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
7432            putDRegI64(dreg, mkexpr(res), condT);
7433            return True;
7434         } else if (B == 12) {
7435            /* VSHLL (maximum shift) */
7436            IROp op, cvt;
7437            UInt shift_imm;
7438            if (Q)
7439               return False;
7440            if (dreg & 1)
7441               return False;
7442            dreg >>= 1;
7443            shift_imm = 8 << size;
7444            res = newTemp(Ity_V128);
7445            switch (size) {
7446               case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
7447               case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
7448               case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
7449               case 3: return False;
7450               default: vassert(0);
7451            }
7452            assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
7453                                  mkU8(shift_imm)));
7454            putQReg(dreg, mkexpr(res), condT);
7455            DIP("vshll.i%u q%u, d%u, #%u\n", 8 << size, dreg, mreg, 8 << size);
7456            return True;
7457         } else if ((B >> 3) == 3 && (B & 3) == 0) {
7458            /* VCVT (half<->single) */
7459            /* Half-precision extensions are needed to run this */
7460            vassert(0); // ATC
7461            if (((theInstr >> 18) & 3) != 1)
7462               return False;
7463            if ((theInstr >> 8) & 1) {
7464               if (dreg & 1)
7465                  return False;
7466               dreg >>= 1;
7467               putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
7468                     condT);
7469               DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
7470            } else {
7471               if (mreg & 1)
7472                  return False;
7473               mreg >>= 1;
7474               putDRegI64(dreg, unop(Iop_F32toF16x4, getQReg(mreg)),
7475                                condT);
7476               DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
7477            }
7478            return True;
7479         } else {
7480            return False;
7481         }
7482         vassert(0);
7483         return True;
7484      case 3:
7485         if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
7486            /* VRECPE */
7487            IROp op;
7488            F = (theInstr >> 8) & 1;
7489            if (size != 2)
7490               return False;
7491            if (Q) {
7492               op = F ? Iop_Recip32Fx4 : Iop_Recip32x4;
7493               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7494               DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7495            } else {
7496               op = F ? Iop_Recip32Fx2 : Iop_Recip32x2;
7497               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7498               DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7499            }
7500            return True;
7501         } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
7502            /* VRSQRTE */
7503            IROp op;
7504            F = (B >> 2) & 1;
7505            if (size != 2)
7506               return False;
7507            if (F) {
7508               /* fp */
7509               op = Q ? Iop_Rsqrte32Fx4 : Iop_Rsqrte32Fx2;
7510            } else {
7511               /* unsigned int */
7512               op = Q ? Iop_Rsqrte32x4 : Iop_Rsqrte32x2;
7513            }
7514            if (Q) {
7515               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7516               DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7517            } else {
7518               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7519               DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7520            }
7521            return True;
7522         } else if ((B >> 3) == 3) {
7523            /* VCVT (fp<->integer) */
7524            IROp op;
7525            if (size != 2)
7526               return False;
7527            switch ((B >> 1) & 3) {
7528               case 0:
7529                  op = Q ? Iop_I32StoFx4 : Iop_I32StoFx2;
7530                  DIP("vcvt.f32.s32 %c%u, %c%u\n",
7531                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7532                  break;
7533               case 1:
7534                  op = Q ? Iop_I32UtoFx4 : Iop_I32UtoFx2;
7535                  DIP("vcvt.f32.u32 %c%u, %c%u\n",
7536                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7537                  break;
7538               case 2:
7539                  op = Q ? Iop_FtoI32Sx4_RZ : Iop_FtoI32Sx2_RZ;
7540                  DIP("vcvt.s32.f32 %c%u, %c%u\n",
7541                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7542                  break;
7543               case 3:
7544                  op = Q ? Iop_FtoI32Ux4_RZ : Iop_FtoI32Ux2_RZ;
7545                  DIP("vcvt.u32.f32 %c%u, %c%u\n",
7546                      Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7547                  break;
7548               default:
7549                  vassert(0);
7550            }
7551            if (Q) {
7552               putQReg(dreg, unop(op, getQReg(mreg)), condT);
7553            } else {
7554               putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7555            }
7556            return True;
7557         } else {
7558            return False;
7559         }
7560         vassert(0);
7561         return True;
7562      default:
7563         vassert(0);
7564   }
7565   return False;
7566}
7567
7568/* A7.4.6 One register and a modified immediate value */
7569static
7570void ppNeonImm(UInt imm, UInt cmode, UInt op)
7571{
7572   int i;
7573   switch (cmode) {
7574      case 0: case 1: case 8: case 9:
7575         vex_printf("0x%x", imm);
7576         break;
7577      case 2: case 3: case 10: case 11:
7578         vex_printf("0x%x00", imm);
7579         break;
7580      case 4: case 5:
7581         vex_printf("0x%x0000", imm);
7582         break;
7583      case 6: case 7:
7584         vex_printf("0x%x000000", imm);
7585         break;
7586      case 12:
7587         vex_printf("0x%xff", imm);
7588         break;
7589      case 13:
7590         vex_printf("0x%xffff", imm);
7591         break;
7592      case 14:
7593         if (op) {
7594            vex_printf("0x");
7595            for (i = 7; i >= 0; i--)
7596               vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
7597         } else {
7598            vex_printf("0x%x", imm);
7599         }
7600         break;
7601      case 15:
7602         vex_printf("0x%x", imm);
7603         break;
7604   }
7605}
7606
7607static
7608const char *ppNeonImmType(UInt cmode, UInt op)
7609{
7610   switch (cmode) {
7611      case 0 ... 7:
7612      case 12: case 13:
7613         return "i32";
7614      case 8 ... 11:
7615         return "i16";
7616      case 14:
7617         if (op)
7618            return "i64";
7619         else
7620            return "i8";
7621      case 15:
7622         if (op)
7623            vassert(0);
7624         else
7625            return "f32";
7626      default:
7627         vassert(0);
7628   }
7629}
7630
7631static
7632void DIPimm(UInt imm, UInt cmode, UInt op,
7633            const char *instr, UInt Q, UInt dreg)
7634{
7635   if (vex_traceflags & VEX_TRACE_FE) {
7636      vex_printf("%s.%s %c%u, #", instr,
7637                 ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
7638      ppNeonImm(imm, cmode, op);
7639      vex_printf("\n");
7640   }
7641}
7642
7643static
7644Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
7645{
7646   UInt dreg = get_neon_d_regno(theInstr);
7647   ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
7648                  (theInstr & 0xf);
7649   ULong imm_raw_pp = imm_raw;
7650   UInt cmode = (theInstr >> 8) & 0xf;
7651   UInt op_bit = (theInstr >> 5) & 1;
7652   ULong imm = 0;
7653   UInt Q = (theInstr >> 6) & 1;
7654   int i, j;
7655   UInt tmp;
7656   IRExpr *imm_val;
7657   IRExpr *expr;
7658   IRTemp tmp_var;
7659   switch(cmode) {
7660      case 7: case 6:
7661         imm_raw = imm_raw << 8;
7662         /* fallthrough */
7663      case 5: case 4:
7664         imm_raw = imm_raw << 8;
7665         /* fallthrough */
7666      case 3: case 2:
7667         imm_raw = imm_raw << 8;
7668         /* fallthrough */
7669      case 0: case 1:
7670         imm = (imm_raw << 32) | imm_raw;
7671         break;
7672      case 11: case 10:
7673         imm_raw = imm_raw << 8;
7674         /* fallthrough */
7675      case 9: case 8:
7676         imm_raw = (imm_raw << 16) | imm_raw;
7677         imm = (imm_raw << 32) | imm_raw;
7678         break;
7679      case 13:
7680         imm_raw = (imm_raw << 8) | 0xff;
7681         /* fallthrough */
7682      case 12:
7683         imm_raw = (imm_raw << 8) | 0xff;
7684         imm = (imm_raw << 32) | imm_raw;
7685         break;
7686      case 14:
7687         if (! op_bit) {
7688            for(i = 0; i < 8; i++) {
7689               imm = (imm << 8) | imm_raw;
7690            }
7691         } else {
7692            for(i = 7; i >= 0; i--) {
7693               tmp = 0;
7694               for(j = 0; j < 8; j++) {
7695                  tmp = (tmp << 1) | ((imm_raw >> i) & 1);
7696               }
7697               imm = (imm << 8) | tmp;
7698            }
7699         }
7700         break;
7701      case 15:
7702         imm = (imm_raw & 0x80) << 5;
7703         imm |= ((~imm_raw & 0x40) << 5);
7704         for(i = 1; i <= 4; i++)
7705            imm |= (imm_raw & 0x40) << i;
7706         imm |= (imm_raw & 0x7f);
7707         imm = imm << 19;
7708         imm = (imm << 32) | imm;
7709         break;
7710      default:
7711         return False;
7712   }
7713   if (Q) {
7714      imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
7715   } else {
7716      imm_val = mkU64(imm);
7717   }
7718   if (((op_bit == 0) &&
7719      (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
7720      ((op_bit == 1) && (cmode == 14))) {
7721      /* VMOV (immediate) */
7722      if (Q) {
7723         putQReg(dreg, imm_val, condT);
7724      } else {
7725         putDRegI64(dreg, imm_val, condT);
7726      }
7727      DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
7728      return True;
7729   }
7730   if ((op_bit == 1) &&
7731      (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
7732      /* VMVN (immediate) */
7733      if (Q) {
7734         putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
7735      } else {
7736         putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
7737      }
7738      DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
7739      return True;
7740   }
7741   if (Q) {
7742      tmp_var = newTemp(Ity_V128);
7743      assign(tmp_var, getQReg(dreg));
7744   } else {
7745      tmp_var = newTemp(Ity_I64);
7746      assign(tmp_var, getDRegI64(dreg));
7747   }
7748   if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7749      /* VORR (immediate) */
7750      if (Q)
7751         expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
7752      else
7753         expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
7754      DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
7755   } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7756      /* VBIC (immediate) */
7757      if (Q)
7758         expr = binop(Iop_AndV128, mkexpr(tmp_var),
7759                                   unop(Iop_NotV128, imm_val));
7760      else
7761         expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
7762      DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
7763   } else {
7764      return False;
7765   }
7766   if (Q)
7767      putQReg(dreg, expr, condT);
7768   else
7769      putDRegI64(dreg, expr, condT);
7770   return True;
7771}
7772
7773/* A7.4 Advanced SIMD data-processing instructions */
7774static
7775Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
7776{
7777   UInt A = (theInstr >> 19) & 0x1F;
7778   UInt B = (theInstr >>  8) & 0xF;
7779   UInt C = (theInstr >>  4) & 0xF;
7780   UInt U = (theInstr >> 24) & 0x1;
7781
7782   if (! (A & 0x10)) {
7783      return dis_neon_data_3same(theInstr, condT);
7784   }
7785   if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
7786      return dis_neon_data_1reg_and_imm(theInstr, condT);
7787   }
7788   if ((C & 1) == 1) {
7789      return dis_neon_data_2reg_and_shift(theInstr, condT);
7790   }
7791   if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7792      return dis_neon_data_3diff(theInstr, condT);
7793   }
7794   if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7795      return dis_neon_data_2reg_and_scalar(theInstr, condT);
7796   }
7797   if ((A & 0x16) == 0x16) {
7798      if ((U == 0) && ((C & 1) == 0)) {
7799         return dis_neon_vext(theInstr, condT);
7800      }
7801      if ((U != 1) || ((C & 1) == 1))
7802         return False;
7803      if ((B & 8) == 0) {
7804         return dis_neon_data_2reg_misc(theInstr, condT);
7805      }
7806      if ((B & 12) == 8) {
7807         return dis_neon_vtb(theInstr, condT);
7808      }
7809      if ((B == 12) && ((C & 9) == 0)) {
7810         return dis_neon_vdup(theInstr, condT);
7811      }
7812   }
7813   return False;
7814}
7815
7816
7817/*------------------------------------------------------------*/
7818/*--- NEON loads and stores                                ---*/
7819/*------------------------------------------------------------*/
7820
7821/* For NEON memory operations, we use the standard scheme to handle
7822   conditionalisation: generate a jump around the instruction if the
7823   condition is false.  That's only necessary in Thumb mode, however,
7824   since in ARM mode NEON instructions are unconditional. */
7825
7826/* A helper function for what follows.  It assumes we already went
7827   uncond as per comments at the top of this section. */
7828static
7829void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
7830                                    UInt N, UInt size, IRTemp addr )
7831{
7832   UInt i;
7833   switch (size) {
7834      case 0:
7835         putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
7836                    loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
7837         break;
7838      case 1:
7839         putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
7840                    loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
7841         break;
7842      case 2:
7843         putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
7844                    loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
7845         break;
7846      default:
7847         vassert(0);
7848   }
7849   for (i = 1; i <= N; i++) {
7850      switch (size) {
7851         case 0:
7852            putDRegI64(rD + i * inc,
7853                       triop(Iop_SetElem8x8,
7854                             getDRegI64(rD + i * inc),
7855                             mkU8(index),
7856                             loadLE(Ity_I8, binop(Iop_Add32,
7857                                                  mkexpr(addr),
7858                                                  mkU32(i * 1)))),
7859                       IRTemp_INVALID);
7860            break;
7861         case 1:
7862            putDRegI64(rD + i * inc,
7863                       triop(Iop_SetElem16x4,
7864                             getDRegI64(rD + i * inc),
7865                             mkU8(index),
7866                             loadLE(Ity_I16, binop(Iop_Add32,
7867                                                   mkexpr(addr),
7868                                                   mkU32(i * 2)))),
7869                       IRTemp_INVALID);
7870            break;
7871         case 2:
7872            putDRegI64(rD + i * inc,
7873                       triop(Iop_SetElem32x2,
7874                             getDRegI64(rD + i * inc),
7875                             mkU8(index),
7876                             loadLE(Ity_I32, binop(Iop_Add32,
7877                                                   mkexpr(addr),
7878                                                   mkU32(i * 4)))),
7879                       IRTemp_INVALID);
7880            break;
7881         default:
7882            vassert(0);
7883      }
7884   }
7885}
7886
7887/* A(nother) helper function for what follows.  It assumes we already
7888   went uncond as per comments at the top of this section. */
7889static
7890void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
7891                                       UInt N, UInt size, IRTemp addr )
7892{
7893   UInt i;
7894   switch (size) {
7895      case 0:
7896         storeLE(mkexpr(addr),
7897                 binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
7898         break;
7899      case 1:
7900         storeLE(mkexpr(addr),
7901                 binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
7902         break;
7903      case 2:
7904         storeLE(mkexpr(addr),
7905                 binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
7906         break;
7907      default:
7908         vassert(0);
7909   }
7910   for (i = 1; i <= N; i++) {
7911      switch (size) {
7912         case 0:
7913            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
7914                    binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
7915                                          mkU8(index)));
7916            break;
7917         case 1:
7918            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
7919                    binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
7920                                           mkU8(index)));
7921            break;
7922         case 2:
7923            storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
7924                    binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
7925                                           mkU8(index)));
7926            break;
7927         default:
7928            vassert(0);
7929      }
7930   }
7931}
7932
7933/* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
7934   make *u0 and *u1 be valid IRTemps before the call. */
7935static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
7936                                 IRTemp i0, IRTemp i1, Int laneszB)
7937{
7938   /* The following assumes that the guest is little endian, and hence
7939      that the memory-side (interleaved) data is stored
7940      little-endianly. */
7941   vassert(u0 && u1);
7942   /* This is pretty easy, since we have primitives directly to
7943      hand. */
7944   if (laneszB == 4) {
7945      // memLE(128 bits) == A0 B0 A1 B1
7946      // i0 == B0 A0, i1 == B1 A1
7947      // u0 == A1 A0, u1 == B1 B0
7948      assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
7949      assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
7950   } else if (laneszB == 2) {
7951      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
7952      // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
7953      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
7954      assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
7955      assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
7956   } else if (laneszB == 1) {
7957      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
7958      // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
7959      // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
7960      assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
7961      assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
7962   } else {
7963      // Can never happen, since VLD2 only has valid lane widths of 32,
7964      // 16 or 8 bits.
7965      vpanic("math_DEINTERLEAVE_2");
7966   }
7967}
7968
7969/* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
7970   *u0 and *u1 be valid IRTemps before the call. */
7971static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
7972                               IRTemp u0, IRTemp u1, Int laneszB)
7973{
7974   /* The following assumes that the guest is little endian, and hence
7975      that the memory-side (interleaved) data is stored
7976      little-endianly. */
7977   vassert(i0 && *i1);
7978   /* This is pretty easy, since we have primitives directly to
7979      hand. */
7980   if (laneszB == 4) {
7981      // memLE(128 bits) == A0 B0 A1 B1
7982      // i0 == B0 A0, i1 == B1 A1
7983      // u0 == A1 A0, u1 == B1 B0
7984      assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
7985      assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
7986   } else if (laneszB == 2) {
7987      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
7988      // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
7989      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
7990      assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
7991      assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
7992   } else if (laneszB == 1) {
7993      // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
7994      // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
7995      // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
7996      assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
7997      assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
7998   } else {
7999      // Can never happen, since VST2 only has valid lane widths of 32,
8000      // 16 or 8 bits.
8001      vpanic("math_INTERLEAVE_2");
8002   }
8003}
8004
8005// Helper function for generating arbitrary slicing 'n' dicing of
8006// 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
8007static IRExpr* math_PERM_8x8x3(const UChar* desc,
8008                               IRTemp s0, IRTemp s1, IRTemp s2)
8009{
8010   // desc is an array of 8 pairs, encoded as 16 bytes,
8011   // that describe how to assemble the result lanes, starting with
8012   // lane 7.  Each pair is: first component (0..2) says which of
8013   // s0/s1/s2 to use.  Second component (0..7) is the lane number
8014   // in the source to use.
8015   UInt si;
8016   for (si = 0; si < 7; si++) {
8017      vassert(desc[2 * si + 0] <= 2);
8018      vassert(desc[2 * si + 1] <= 7);
8019   }
8020   IRTemp h3 = newTemp(Ity_I64);
8021   IRTemp h2 = newTemp(Ity_I64);
8022   IRTemp h1 = newTemp(Ity_I64);
8023   IRTemp h0 = newTemp(Ity_I64);
8024   IRTemp srcs[3] = {s0, s1, s2};
8025#  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
8026#  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
8027   assign(h3, binop(Iop_InterleaveHI8x8,
8028                    binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
8029                    binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
8030   assign(h2, binop(Iop_InterleaveHI8x8,
8031                    binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
8032                    binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
8033   assign(h1, binop(Iop_InterleaveHI8x8,
8034                    binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
8035                    binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
8036   assign(h0, binop(Iop_InterleaveHI8x8,
8037                    binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
8038                    binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
8039#  undef SRC_VEC
8040#  undef SRC_SHIFT
8041   // Now h3..h0 are 64 bit vectors with useful information only
8042   // in the top 16 bits.  We now concatentate those four 16-bit
8043   // groups so as to produce the final result.
8044   IRTemp w1 = newTemp(Ity_I64);
8045   IRTemp w0 = newTemp(Ity_I64);
8046   assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
8047   assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
8048   return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
8049}
8050
8051/* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
8052   make *u0, *u1 and *u2 be valid IRTemps before the call. */
8053static void math_DEINTERLEAVE_3 (
8054               /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
8055               IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
8056            )
8057{
8058#  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8059#  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8060#  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8061   /* The following assumes that the guest is little endian, and hence
8062      that the memory-side (interleaved) data is stored
8063      little-endianly. */
8064   vassert(u0 && u1 && u2);
8065   if (laneszB == 4) {
8066      // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8067      // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8068      // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8069      assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
8070      assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
8071      assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
8072   } else if (laneszB == 2) {
8073      // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8074      // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8075      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8076#     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8077                IHI32x2(                                      \
8078                   IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8079                           SHL64((_tmp2),48-16*(_la2))),      \
8080                   IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8081                           SHL64((_tmp0),48-16*(_la0))))
8082      assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
8083      assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
8084      assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
8085#     undef XXX
8086   } else if (laneszB == 1) {
8087      // These describe how the result vectors [7..0] are
8088      // assembled from the source vectors.  Each pair is
8089      // (source vector number, lane number).
8090      static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
8091      static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
8092      static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
8093      assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
8094      assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
8095      assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
8096   } else {
8097      // Can never happen, since VLD3 only has valid lane widths of 32,
8098      // 16 or 8 bits.
8099      vpanic("math_DEINTERLEAVE_3");
8100   }
8101#  undef SHL64
8102#  undef IHI16x4
8103#  undef IHI32x2
8104}
8105
8106/* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
8107   make *i0, *i1 and *i2 be valid IRTemps before the call. */
8108static void math_INTERLEAVE_3 (
8109               /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
8110               IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
8111            )
8112{
8113#  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8114#  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8115#  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8116   /* The following assumes that the guest is little endian, and hence
8117      that the memory-side (interleaved) data is stored
8118      little-endianly. */
8119   vassert(i0 && i1 && i2);
8120   if (laneszB == 4) {
8121      // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8122      // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8123      // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8124      assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
8125      assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
8126      assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
8127   } else if (laneszB == 2) {
8128      // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8129      // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8130      // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8131#     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8132                IHI32x2(                                      \
8133                   IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8134                           SHL64((_tmp2),48-16*(_la2))),      \
8135                   IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8136                           SHL64((_tmp0),48-16*(_la0))))
8137      assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
8138      assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
8139      assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
8140#     undef XXX
8141   } else if (laneszB == 1) {
8142      // These describe how the result vectors [7..0] are
8143      // assembled from the source vectors.  Each pair is
8144      // (source vector number, lane number).
8145      static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
8146      static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
8147      static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
8148      assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
8149      assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
8150      assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
8151   } else {
8152      // Can never happen, since VST3 only has valid lane widths of 32,
8153      // 16 or 8 bits.
8154      vpanic("math_INTERLEAVE_3");
8155   }
8156#  undef SHL64
8157#  undef IHI16x4
8158#  undef IHI32x2
8159}
8160
8161/* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
8162   make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
8163static void math_DEINTERLEAVE_4 (
8164               /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
8165               /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
8166               IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
8167            )
8168{
8169#  define IHI32x2(_t1, _t2) \
8170             binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8171#  define ILO32x2(_t1, _t2) \
8172             binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8173#  define IHI16x4(_t1, _t2) \
8174             binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
8175#  define ILO16x4(_t1, _t2) \
8176             binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
8177#  define IHI8x8(_t1, _e2) \
8178             binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
8179#  define SHL64(_tmp, _amt) \
8180             binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8181   /* The following assumes that the guest is little endian, and hence
8182      that the memory-side (interleaved) data is stored
8183      little-endianly. */
8184   vassert(u0 && u1 && u2 && u3);
8185   if (laneszB == 4) {
8186      assign(*u0, ILO32x2(i2, i0));
8187      assign(*u1, IHI32x2(i2, i0));
8188      assign(*u2, ILO32x2(i3, i1));
8189      assign(*u3, IHI32x2(i3, i1));
8190   } else if (laneszB == 2) {
8191      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8192      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8193      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8194      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8195      assign(b1b0a1a0, ILO16x4(i1, i0));
8196      assign(b3b2a3a2, ILO16x4(i3, i2));
8197      assign(d1d0c1c0, IHI16x4(i1, i0));
8198      assign(d3d2c3c2, IHI16x4(i3, i2));
8199      // And now do what we did for the 32-bit case.
8200      assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8201      assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8202      assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8203      assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8204   } else if (laneszB == 1) {
8205      // Deinterleave into 16-bit chunks, then do as the 16-bit case.
8206      IRTemp i0x = newTemp(Ity_I64);
8207      IRTemp i1x = newTemp(Ity_I64);
8208      IRTemp i2x = newTemp(Ity_I64);
8209      IRTemp i3x = newTemp(Ity_I64);
8210      assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
8211      assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
8212      assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
8213      assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
8214      // From here on is like the 16 bit case.
8215      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8216      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8217      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8218      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8219      assign(b1b0a1a0, ILO16x4(i1x, i0x));
8220      assign(b3b2a3a2, ILO16x4(i3x, i2x));
8221      assign(d1d0c1c0, IHI16x4(i1x, i0x));
8222      assign(d3d2c3c2, IHI16x4(i3x, i2x));
8223      // And now do what we did for the 32-bit case.
8224      assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8225      assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8226      assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8227      assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8228   } else {
8229      // Can never happen, since VLD4 only has valid lane widths of 32,
8230      // 16 or 8 bits.
8231      vpanic("math_DEINTERLEAVE_4");
8232   }
8233#  undef SHL64
8234#  undef IHI8x8
8235#  undef ILO16x4
8236#  undef IHI16x4
8237#  undef ILO32x2
8238#  undef IHI32x2
8239}
8240
8241/* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
8242   make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
8243static void math_INTERLEAVE_4 (
8244               /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8245               /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
8246               IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
8247            )
8248{
8249#  define IHI32x2(_t1, _t2) \
8250             binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8251#  define ILO32x2(_t1, _t2) \
8252             binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8253#  define CEV16x4(_t1, _t2) \
8254             binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
8255#  define COD16x4(_t1, _t2) \
8256             binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
8257#  define COD8x8(_t1, _e2) \
8258             binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
8259#  define SHL64(_tmp, _amt) \
8260             binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8261   /* The following assumes that the guest is little endian, and hence
8262      that the memory-side (interleaved) data is stored
8263      little-endianly. */
8264   vassert(u0 && u1 && u2 && u3);
8265   if (laneszB == 4) {
8266      assign(*i0, ILO32x2(u1, u0));
8267      assign(*i1, ILO32x2(u3, u2));
8268      assign(*i2, IHI32x2(u1, u0));
8269      assign(*i3, IHI32x2(u3, u2));
8270   } else if (laneszB == 2) {
8271      // First, interleave at the 32-bit lane size.
8272      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8273      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8274      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8275      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8276      assign(b1b0a1a0, ILO32x2(u1, u0));
8277      assign(b3b2a3a2, IHI32x2(u1, u0));
8278      assign(d1d0c1c0, ILO32x2(u3, u2));
8279      assign(d3d2c3c2, IHI32x2(u3, u2));
8280      // And interleave (cat) at the 16 bit size.
8281      assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
8282      assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
8283      assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
8284      assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
8285   } else if (laneszB == 1) {
8286      // First, interleave at the 32-bit lane size.
8287      IRTemp b1b0a1a0 = newTemp(Ity_I64);
8288      IRTemp b3b2a3a2 = newTemp(Ity_I64);
8289      IRTemp d1d0c1c0 = newTemp(Ity_I64);
8290      IRTemp d3d2c3c2 = newTemp(Ity_I64);
8291      assign(b1b0a1a0, ILO32x2(u1, u0));
8292      assign(b3b2a3a2, IHI32x2(u1, u0));
8293      assign(d1d0c1c0, ILO32x2(u3, u2));
8294      assign(d3d2c3c2, IHI32x2(u3, u2));
8295      // And interleave (cat) at the 16 bit size.
8296      IRTemp i0x = newTemp(Ity_I64);
8297      IRTemp i1x = newTemp(Ity_I64);
8298      IRTemp i2x = newTemp(Ity_I64);
8299      IRTemp i3x = newTemp(Ity_I64);
8300      assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
8301      assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
8302      assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
8303      assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
8304      // And rearrange within each word, to get the right 8 bit lanes.
8305      assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
8306      assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
8307      assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
8308      assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
8309   } else {
8310      // Can never happen, since VLD4 only has valid lane widths of 32,
8311      // 16 or 8 bits.
8312      vpanic("math_DEINTERLEAVE_4");
8313   }
8314#  undef SHL64
8315#  undef COD8x8
8316#  undef COD16x4
8317#  undef CEV16x4
8318#  undef ILO32x2
8319#  undef IHI32x2
8320}
8321
8322/* A7.7 Advanced SIMD element or structure load/store instructions */
8323static
8324Bool dis_neon_load_or_store ( UInt theInstr,
8325                              Bool isT, IRTemp condT )
8326{
8327#  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
8328   UInt bA = INSN(23,23);
8329   UInt fB = INSN(11,8);
8330   UInt bL = INSN(21,21);
8331   UInt rD = (INSN(22,22) << 4) | INSN(15,12);
8332   UInt rN = INSN(19,16);
8333   UInt rM = INSN(3,0);
8334   UInt N, size, i, j;
8335   UInt inc;
8336   UInt regs = 1;
8337
8338   if (isT) {
8339      vassert(condT != IRTemp_INVALID);
8340   } else {
8341      vassert(condT == IRTemp_INVALID);
8342   }
8343   /* So now, if condT is not IRTemp_INVALID, we know we're
8344      dealing with Thumb code. */
8345
8346   if (INSN(20,20) != 0)
8347      return False;
8348
8349   IRTemp initialRn = newTemp(Ity_I32);
8350   assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
8351
8352   IRTemp initialRm = newTemp(Ity_I32);
8353   assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
8354
8355   /* There are 3 cases:
8356      (1) VSTn / VLDn (n-element structure from/to one lane)
8357      (2) VLDn (single element to all lanes)
8358      (3) VSTn / VLDn (multiple n-element structures)
8359   */
8360   if (bA) {
8361      N = fB & 3;
8362      if ((fB >> 2) < 3) {
8363         /* ------------ Case (1) ------------
8364            VSTn / VLDn (n-element structure from/to one lane) */
8365
8366         size = fB >> 2;
8367
8368         switch (size) {
8369            case 0: i = INSN(7,5); inc = 1; break;
8370            case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
8371            case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
8372            case 3: return False;
8373            default: vassert(0);
8374         }
8375
8376         IRTemp addr = newTemp(Ity_I32);
8377         assign(addr, mkexpr(initialRn));
8378
8379         // go uncond
8380         if (condT != IRTemp_INVALID)
8381            mk_skip_over_T32_if_cond_is_false(condT);
8382         // now uncond
8383
8384         if (bL)
8385            mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
8386         else
8387            mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
8388         DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << size);
8389         for (j = 0; j <= N; j++) {
8390            if (j)
8391               DIP(", ");
8392            DIP("d%u[%u]", rD + j * inc, i);
8393         }
8394         DIP("}, [r%u]", rN);
8395         if (rM != 13 && rM != 15) {
8396            DIP(", r%u\n", rM);
8397         } else {
8398            DIP("%s\n", (rM != 15) ? "!" : "");
8399         }
8400      } else {
8401         /* ------------ Case (2) ------------
8402            VLDn (single element to all lanes) */
8403         UInt r;
8404         if (bL == 0)
8405            return False;
8406
8407         inc = INSN(5,5) + 1;
8408         size = INSN(7,6);
8409
8410         /* size == 3 and size == 2 cases differ in alignment constraints */
8411         if (size == 3 && N == 3 && INSN(4,4) == 1)
8412            size = 2;
8413
8414         if (size == 0 && N == 0 && INSN(4,4) == 1)
8415            return False;
8416         if (N == 2 && INSN(4,4) == 1)
8417            return False;
8418         if (size == 3)
8419            return False;
8420
8421         // go uncond
8422         if (condT != IRTemp_INVALID)
8423            mk_skip_over_T32_if_cond_is_false(condT);
8424         // now uncond
8425
8426         IRTemp addr = newTemp(Ity_I32);
8427         assign(addr, mkexpr(initialRn));
8428
8429         if (N == 0 && INSN(5,5))
8430            regs = 2;
8431
8432         for (r = 0; r < regs; r++) {
8433            switch (size) {
8434               case 0:
8435                  putDRegI64(rD + r, unop(Iop_Dup8x8,
8436                                          loadLE(Ity_I8, mkexpr(addr))),
8437                             IRTemp_INVALID);
8438                  break;
8439               case 1:
8440                  putDRegI64(rD + r, unop(Iop_Dup16x4,
8441                                          loadLE(Ity_I16, mkexpr(addr))),
8442                             IRTemp_INVALID);
8443                  break;
8444               case 2:
8445                  putDRegI64(rD + r, unop(Iop_Dup32x2,
8446                                          loadLE(Ity_I32, mkexpr(addr))),
8447                             IRTemp_INVALID);
8448                  break;
8449               default:
8450                  vassert(0);
8451            }
8452            for (i = 1; i <= N; i++) {
8453               switch (size) {
8454                  case 0:
8455                     putDRegI64(rD + r + i * inc,
8456                                unop(Iop_Dup8x8,
8457                                     loadLE(Ity_I8, binop(Iop_Add32,
8458                                                          mkexpr(addr),
8459                                                          mkU32(i * 1)))),
8460                                IRTemp_INVALID);
8461                     break;
8462                  case 1:
8463                     putDRegI64(rD + r + i * inc,
8464                                unop(Iop_Dup16x4,
8465                                     loadLE(Ity_I16, binop(Iop_Add32,
8466                                                           mkexpr(addr),
8467                                                           mkU32(i * 2)))),
8468                                IRTemp_INVALID);
8469                     break;
8470                  case 2:
8471                     putDRegI64(rD + r + i * inc,
8472                                unop(Iop_Dup32x2,
8473                                     loadLE(Ity_I32, binop(Iop_Add32,
8474                                                           mkexpr(addr),
8475                                                           mkU32(i * 4)))),
8476                                IRTemp_INVALID);
8477                     break;
8478                  default:
8479                     vassert(0);
8480               }
8481            }
8482         }
8483         DIP("vld%u.%u {", N + 1, 8 << size);
8484         for (r = 0; r < regs; r++) {
8485            for (i = 0; i <= N; i++) {
8486               if (i || r)
8487                  DIP(", ");
8488               DIP("d%u[]", rD + r + i * inc);
8489            }
8490         }
8491         DIP("}, [r%u]", rN);
8492         if (rM != 13 && rM != 15) {
8493            DIP(", r%u\n", rM);
8494         } else {
8495            DIP("%s\n", (rM != 15) ? "!" : "");
8496         }
8497      }
8498      /* Writeback.  We're uncond here, so no condT-ing. */
8499      if (rM != 15) {
8500         if (rM == 13) {
8501            IRExpr* e = binop(Iop_Add32,
8502                              mkexpr(initialRn),
8503                              mkU32((1 << size) * (N + 1)));
8504            if (isT)
8505               putIRegT(rN, e, IRTemp_INVALID);
8506            else
8507               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8508         } else {
8509            IRExpr* e = binop(Iop_Add32,
8510                              mkexpr(initialRn),
8511                              mkexpr(initialRm));
8512            if (isT)
8513               putIRegT(rN, e, IRTemp_INVALID);
8514            else
8515               putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8516         }
8517      }
8518      return True;
8519   } else {
8520      /* ------------ Case (3) ------------
8521         VSTn / VLDn (multiple n-element structures) */
8522      inc = (fB & 1) + 1;
8523
8524      if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
8525          || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
8526          || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
8527          || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
8528         N = 0; // VLD1/VST1.  'inc' does not appear to have any
8529                // meaning for the VLD1/VST1 cases.  'regs' is the number of
8530                // registers involved.
8531         if (rD + regs > 32) return False;
8532      }
8533      else
8534      if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
8535          || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
8536          || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
8537         N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
8538         if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
8539         if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
8540         if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
8541      } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
8542         N = 2; // VLD3/VST3
8543         if (inc == 1 && rD + 2 >= 32) return False;
8544         if (inc == 2 && rD + 4 >= 32) return False;
8545      } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
8546         N = 3; // VLD4/VST4
8547         if (inc == 1 && rD + 3 >= 32) return False;
8548         if (inc == 2 && rD + 6 >= 32) return False;
8549      } else {
8550         return False;
8551      }
8552
8553      if (N == 1 && fB == BITS4(0,0,1,1)) {
8554         regs = 2;
8555      } else if (N == 0) {
8556         if (fB == BITS4(1,0,1,0)) {
8557            regs = 2;
8558         } else if (fB == BITS4(0,1,1,0)) {
8559            regs = 3;
8560         } else if (fB == BITS4(0,0,1,0)) {
8561            regs = 4;
8562         }
8563      }
8564
8565      size = INSN(7,6);
8566      if (N == 0 && size == 3)
8567         size = 2;
8568      if (size == 3)
8569         return False;
8570
8571      // go uncond
8572      if (condT != IRTemp_INVALID)
8573         mk_skip_over_T32_if_cond_is_false(condT);
8574      // now uncond
8575
8576      IRTemp addr = newTemp(Ity_I32);
8577      assign(addr, mkexpr(initialRn));
8578
8579      if (N == 0 /* No interleaving -- VLD1/VST1 */) {
8580         UInt r;
8581         vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
8582         /* inc has no relevance here */
8583         for (r = 0; r < regs; r++) {
8584            if (bL)
8585               putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
8586            else
8587               storeLE(mkexpr(addr), getDRegI64(rD+r));
8588            IRTemp tmp = newTemp(Ity_I32);
8589            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
8590            addr = tmp;
8591         }
8592      }
8593      else
8594      if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
8595         vassert( (regs == 1 && (inc == 1 || inc == 2))
8596                   || (regs == 2 && inc == 2) );
8597         // Make 'nregs' be the number of registers and 'regstep'
8598         // equal the actual register-step.  The ARM encoding, using 'regs'
8599         // and 'inc', is bizarre.  After this, we have:
8600         // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
8601         // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
8602         // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
8603         UInt nregs   = 2;
8604         UInt regstep = 1;
8605         if (regs == 1 && inc == 1) {
8606            /* nothing */
8607         } else if (regs == 1 && inc == 2) {
8608            regstep = 2;
8609         } else if (regs == 2 && inc == 2) {
8610            nregs = 4;
8611         } else {
8612            vassert(0);
8613         }
8614         // 'a' is address,
8615         // 'di' is interleaved data, 'du' is uninterleaved data
8616         if (nregs == 2) {
8617            IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8618            IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8619            IRTemp  di0 = newTemp(Ity_I64);
8620            IRTemp  di1 = newTemp(Ity_I64);
8621            IRTemp  du0 = newTemp(Ity_I64);
8622            IRTemp  du1 = newTemp(Ity_I64);
8623            if (bL) {
8624               assign(di0, loadLE(Ity_I64, a0));
8625               assign(di1, loadLE(Ity_I64, a1));
8626               math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
8627               putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8628               putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8629            } else {
8630               assign(du0, getDRegI64(rD + 0 * regstep));
8631               assign(du1, getDRegI64(rD + 1 * regstep));
8632               math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
8633               storeLE(a0, mkexpr(di0));
8634               storeLE(a1, mkexpr(di1));
8635            }
8636            IRTemp tmp = newTemp(Ity_I32);
8637            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
8638            addr = tmp;
8639         } else {
8640            vassert(nregs == 4);
8641            vassert(regstep == 1);
8642            IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8643            IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8644            IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8645            IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8646            IRTemp  di0 = newTemp(Ity_I64);
8647            IRTemp  di1 = newTemp(Ity_I64);
8648            IRTemp  di2 = newTemp(Ity_I64);
8649            IRTemp  di3 = newTemp(Ity_I64);
8650            IRTemp  du0 = newTemp(Ity_I64);
8651            IRTemp  du1 = newTemp(Ity_I64);
8652            IRTemp  du2 = newTemp(Ity_I64);
8653            IRTemp  du3 = newTemp(Ity_I64);
8654            if (bL) {
8655               assign(di0, loadLE(Ity_I64, a0));
8656               assign(di1, loadLE(Ity_I64, a1));
8657               assign(di2, loadLE(Ity_I64, a2));
8658               assign(di3, loadLE(Ity_I64, a3));
8659               // Note spooky interleaving: du0, du2, di0, di1 etc
8660               math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
8661               math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
8662               putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8663               putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8664               putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
8665               putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
8666            } else {
8667               assign(du0, getDRegI64(rD + 0 * regstep));
8668               assign(du1, getDRegI64(rD + 1 * regstep));
8669               assign(du2, getDRegI64(rD + 2 * regstep));
8670               assign(du3, getDRegI64(rD + 3 * regstep));
8671               // Note spooky interleaving: du0, du2, di0, di1 etc
8672               math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
8673               math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
8674               storeLE(a0, mkexpr(di0));
8675               storeLE(a1, mkexpr(di1));
8676               storeLE(a2, mkexpr(di2));
8677               storeLE(a3, mkexpr(di3));
8678            }
8679
8680            IRTemp tmp = newTemp(Ity_I32);
8681            assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8682            addr = tmp;
8683         }
8684      }
8685      else
8686      if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
8687         // Dd, Dd+1, Dd+2   regs = 1, inc = 1
8688         // Dd, Dd+2, Dd+4   regs = 1, inc = 2
8689         vassert(regs == 1 && (inc == 1 || inc == 2));
8690         IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8691         IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8692         IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8693         IRTemp  di0 = newTemp(Ity_I64);
8694         IRTemp  di1 = newTemp(Ity_I64);
8695         IRTemp  di2 = newTemp(Ity_I64);
8696         IRTemp  du0 = newTemp(Ity_I64);
8697         IRTemp  du1 = newTemp(Ity_I64);
8698         IRTemp  du2 = newTemp(Ity_I64);
8699         if (bL) {
8700            assign(di0, loadLE(Ity_I64, a0));
8701            assign(di1, loadLE(Ity_I64, a1));
8702            assign(di2, loadLE(Ity_I64, a2));
8703            math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
8704            putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8705            putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8706            putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8707         } else {
8708            assign(du0, getDRegI64(rD + 0 * inc));
8709            assign(du1, getDRegI64(rD + 1 * inc));
8710            assign(du2, getDRegI64(rD + 2 * inc));
8711            math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
8712            storeLE(a0, mkexpr(di0));
8713            storeLE(a1, mkexpr(di1));
8714            storeLE(a2, mkexpr(di2));
8715         }
8716         IRTemp tmp = newTemp(Ity_I32);
8717         assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
8718         addr = tmp;
8719      }
8720      else
8721      if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
8722         // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
8723         // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
8724         vassert(regs == 1 && (inc == 1 || inc == 2));
8725         IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8726         IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8727         IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8728         IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8729         IRTemp  di0 = newTemp(Ity_I64);
8730         IRTemp  di1 = newTemp(Ity_I64);
8731         IRTemp  di2 = newTemp(Ity_I64);
8732         IRTemp  di3 = newTemp(Ity_I64);
8733         IRTemp  du0 = newTemp(Ity_I64);
8734         IRTemp  du1 = newTemp(Ity_I64);
8735         IRTemp  du2 = newTemp(Ity_I64);
8736         IRTemp  du3 = newTemp(Ity_I64);
8737         if (bL) {
8738            assign(di0, loadLE(Ity_I64, a0));
8739            assign(di1, loadLE(Ity_I64, a1));
8740            assign(di2, loadLE(Ity_I64, a2));
8741            assign(di3, loadLE(Ity_I64, a3));
8742            math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
8743                                di0, di1, di2, di3, 1 << size);
8744            putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8745            putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8746            putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8747            putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
8748         } else {
8749            assign(du0, getDRegI64(rD + 0 * inc));
8750            assign(du1, getDRegI64(rD + 1 * inc));
8751            assign(du2, getDRegI64(rD + 2 * inc));
8752            assign(du3, getDRegI64(rD + 3 * inc));
8753            math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
8754                              du0, du1, du2, du3, 1 << size);
8755            storeLE(a0, mkexpr(di0));
8756            storeLE(a1, mkexpr(di1));
8757            storeLE(a2, mkexpr(di2));
8758            storeLE(a3, mkexpr(di3));
8759         }
8760         IRTemp tmp = newTemp(Ity_I32);
8761         assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8762         addr = tmp;
8763      }
8764      else {
8765         vassert(0);
8766      }
8767
8768      /* Writeback */
8769      if (rM != 15) {
8770         IRExpr* e;
8771         if (rM == 13) {
8772            e = binop(Iop_Add32, mkexpr(initialRn),
8773                                 mkU32(8 * (N + 1) * regs));
8774         } else {
8775            e = binop(Iop_Add32, mkexpr(initialRn),
8776                                 mkexpr(initialRm));
8777         }
8778         if (isT)
8779            putIRegT(rN, e, IRTemp_INVALID);
8780         else
8781            putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8782      }
8783
8784      DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
8785      if ((inc == 1 && regs * (N + 1) > 1)
8786          || (inc == 2 && regs > 1 && N > 0)) {
8787         DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
8788      } else {
8789         UInt r;
8790         for (r = 0; r < regs; r++) {
8791            for (i = 0; i <= N; i++) {
8792               if (i || r)
8793                  DIP(", ");
8794               DIP("d%u", rD + r + i * inc);
8795            }
8796         }
8797      }
8798      DIP("}, [r%u]", rN);
8799      if (rM != 13 && rM != 15) {
8800         DIP(", r%u\n", rM);
8801      } else {
8802         DIP("%s\n", (rM != 15) ? "!" : "");
8803      }
8804      return True;
8805   }
8806#  undef INSN
8807}
8808
8809
8810/*------------------------------------------------------------*/
8811/*--- NEON, top level control                              ---*/
8812/*------------------------------------------------------------*/
8813
8814/* Both ARM and Thumb */
8815
8816/* Translate a NEON instruction.    If successful, returns
8817   True and *dres may or may not be updated.  If failure, returns
8818   False and doesn't change *dres nor create any IR.
8819
8820   The Thumb and ARM encodings are similar for the 24 bottom bits, but
8821   the top 8 bits are slightly different.  In both cases, the caller
8822   must pass the entire 32 bits.  Callers may pass any instruction;
8823   this ignores non-NEON ones.
8824
8825   Caller must supply an IRTemp 'condT' holding the gating condition,
8826   or IRTemp_INVALID indicating the insn is always executed.  In ARM
8827   code, this must always be IRTemp_INVALID because NEON insns are
8828   unconditional for ARM.
8829
8830   Finally, the caller must indicate whether this occurs in ARM or in
8831   Thumb code.
8832*/
8833static Bool decode_NEON_instruction (
8834               /*MOD*/DisResult* dres,
8835               UInt              insn32,
8836               IRTemp            condT,
8837               Bool              isT
8838            )
8839{
8840#  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
8841
8842   /* There are two kinds of instruction to deal with: load/store and
8843      data processing.  In each case, in ARM mode we merely identify
8844      the kind, and pass it on to the relevant sub-handler.  In Thumb
8845      mode we identify the kind, swizzle the bits around to make it
8846      have the same encoding as in ARM, and hand it on to the
8847      sub-handler.
8848   */
8849
8850   /* In ARM mode, NEON instructions can't be conditional. */
8851   if (!isT)
8852      vassert(condT == IRTemp_INVALID);
8853
8854   /* Data processing:
8855      Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
8856      ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
8857   */
8858   if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
8859      // ARM, DP
8860      return dis_neon_data_processing(INSN(31,0), condT);
8861   }
8862   if (isT && INSN(31,29) == BITS3(1,1,1)
8863       && INSN(27,24) == BITS4(1,1,1,1)) {
8864      // Thumb, DP
8865      UInt reformatted = INSN(23,0);
8866      reformatted |= (INSN(28,28) << 24); // U bit
8867      reformatted |= (BITS7(1,1,1,1,0,0,1) << 25);
8868      return dis_neon_data_processing(reformatted, condT);
8869   }
8870
8871   /* Load/store:
8872      Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
8873      ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
8874   */
8875   if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
8876      // ARM, memory
8877      return dis_neon_load_or_store(INSN(31,0), isT, condT);
8878   }
8879   if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
8880      UInt reformatted = INSN(23,0);
8881      reformatted |= (BITS8(1,1,1,1,0,1,0,0) << 24);
8882      return dis_neon_load_or_store(reformatted, isT, condT);
8883   }
8884
8885   /* Doesn't match. */
8886   return False;
8887
8888#  undef INSN
8889}
8890
8891
8892/*------------------------------------------------------------*/
8893/*--- V6 MEDIA instructions                                ---*/
8894/*------------------------------------------------------------*/
8895
8896/* Both ARM and Thumb */
8897
8898/* Translate a V6 media instruction.    If successful, returns
8899   True and *dres may or may not be updated.  If failure, returns
8900   False and doesn't change *dres nor create any IR.
8901
8902   The Thumb and ARM encodings are completely different.  In Thumb
8903   mode, the caller must pass the entire 32 bits.  In ARM mode it must
8904   pass the lower 28 bits.  Apart from that, callers may pass any
8905   instruction; this function ignores anything it doesn't recognise.
8906
8907   Caller must supply an IRTemp 'condT' holding the gating condition,
8908   or IRTemp_INVALID indicating the insn is always executed.
8909
8910   Caller must also supply an ARMCondcode 'cond'.  This is only used
8911   for debug printing, no other purpose.  For ARM, this is simply the
8912   top 4 bits of the original instruction.  For Thumb, the condition
8913   is not (really) known until run time, and so ARMCondAL should be
8914   passed, only so that printing of these instructions does not show
8915   any condition.
8916
8917   Finally, the caller must indicate whether this occurs in ARM or in
8918   Thumb code.
8919*/
8920static Bool decode_V6MEDIA_instruction (
8921               /*MOD*/DisResult* dres,
8922               UInt              insnv6m,
8923               IRTemp            condT,
8924               ARMCondcode       conq,
8925               Bool              isT
8926            )
8927{
8928#  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
8929#  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
8930                                           (_bMax), (_bMin) )
8931#  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
8932                                           (_bMax), (_bMin) )
8933   HChar dis_buf[128];
8934   dis_buf[0] = 0;
8935
8936   if (isT) {
8937      vassert(conq == ARMCondAL);
8938   } else {
8939      vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
8940      vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
8941   }
8942
8943   /* ----------- smulbb, smulbt, smultb, smultt ----------- */
8944   {
8945     UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
8946     Bool gate = False;
8947
8948     if (isT) {
8949        if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
8950            && INSNT1(7,6) == BITS2(0,0)) {
8951           regD = INSNT1(11,8);
8952           regM = INSNT1(3,0);
8953           regN = INSNT0(3,0);
8954           bitM = INSNT1(4,4);
8955           bitN = INSNT1(5,5);
8956           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
8957              gate = True;
8958        }
8959     } else {
8960        if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
8961            BITS4(0,0,0,0)         == INSNA(15,12) &&
8962            BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
8963           regD = INSNA(19,16);
8964           regM = INSNA(11,8);
8965           regN = INSNA(3,0);
8966           bitM = INSNA(6,6);
8967           bitN = INSNA(5,5);
8968           if (regD != 15 && regN != 15 && regM != 15)
8969              gate = True;
8970        }
8971     }
8972
8973     if (gate) {
8974        IRTemp srcN = newTemp(Ity_I32);
8975        IRTemp srcM = newTemp(Ity_I32);
8976        IRTemp res  = newTemp(Ity_I32);
8977
8978        assign( srcN, binop(Iop_Sar32,
8979                            binop(Iop_Shl32,
8980                                  isT ? getIRegT(regN) : getIRegA(regN),
8981                                  mkU8(bitN ? 0 : 16)), mkU8(16)) );
8982        assign( srcM, binop(Iop_Sar32,
8983                            binop(Iop_Shl32,
8984                                  isT ? getIRegT(regM) : getIRegA(regM),
8985                                  mkU8(bitM ? 0 : 16)), mkU8(16)) );
8986        assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
8987
8988        if (isT)
8989           putIRegT( regD, mkexpr(res), condT );
8990        else
8991           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
8992
8993        DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
8994             nCC(conq), regD, regN, regM );
8995        return True;
8996     }
8997     /* fall through */
8998   }
8999
9000   /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
9001   /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
9002   {
9003     UInt regD = 99, regN = 99, regM = 99, bitM = 0;
9004     Bool gate = False;
9005
9006     if (isT) {
9007        if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
9008            && INSNT1(7,5) == BITS3(0,0,0)) {
9009          regN = INSNT0(3,0);
9010          regD = INSNT1(11,8);
9011          regM = INSNT1(3,0);
9012          bitM = INSNT1(4,4);
9013          if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9014             gate = True;
9015        }
9016     } else {
9017        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
9018            INSNA(15,12) == BITS4(0,0,0,0)         &&
9019            (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
9020           regD = INSNA(19,16);
9021           regN = INSNA(3,0);
9022           regM = INSNA(11,8);
9023           bitM = INSNA(6,6);
9024           if (regD != 15 && regN != 15 && regM != 15)
9025              gate = True;
9026        }
9027     }
9028
9029     if (gate) {
9030        IRTemp irt_prod = newTemp(Ity_I64);
9031
9032        assign( irt_prod,
9033                binop(Iop_MullS32,
9034                      isT ? getIRegT(regN) : getIRegA(regN),
9035                      binop(Iop_Sar32,
9036                            binop(Iop_Shl32,
9037                                  isT ? getIRegT(regM) : getIRegA(regM),
9038                                  mkU8(bitM ? 0 : 16)),
9039                            mkU8(16))) );
9040
9041        IRExpr* ire_result = binop(Iop_Or32,
9042                                   binop( Iop_Shl32,
9043                                          unop(Iop_64HIto32, mkexpr(irt_prod)),
9044                                          mkU8(16) ),
9045                                   binop( Iop_Shr32,
9046                                          unop(Iop_64to32, mkexpr(irt_prod)),
9047                                          mkU8(16) ) );
9048
9049        if (isT)
9050           putIRegT( regD, ire_result, condT );
9051        else
9052           putIRegA( regD, ire_result, condT, Ijk_Boring );
9053
9054        DIP("smulw%c%s r%u, r%u, r%u\n",
9055            bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
9056        return True;
9057     }
9058     /* fall through */
9059   }
9060
9061   /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
9062   /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
9063   {
9064     UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
9065     Bool tbform = False;
9066     Bool gate = False;
9067
9068     if (isT) {
9069        if (INSNT0(15,4) == 0xEAC
9070            && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
9071           regN = INSNT0(3,0);
9072           regD = INSNT1(11,8);
9073           regM = INSNT1(3,0);
9074           imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
9075           shift_type = (INSNT1(5,5) << 1) | 0;
9076           tbform = (INSNT1(5,5) == 0) ? False : True;
9077           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9078              gate = True;
9079        }
9080     } else {
9081        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
9082            INSNA(5,4)   == BITS2(0,1)             &&
9083            (INSNA(6,6)  == 0 || INSNA(6,6) == 1) ) {
9084           regD = INSNA(15,12);
9085           regN = INSNA(19,16);
9086           regM = INSNA(3,0);
9087           imm5 = INSNA(11,7);
9088           shift_type = (INSNA(6,6) << 1) | 0;
9089           tbform = (INSNA(6,6) == 0) ? False : True;
9090           if (regD != 15 && regN != 15 && regM != 15)
9091              gate = True;
9092        }
9093     }
9094
9095     if (gate) {
9096        IRTemp irt_regM       = newTemp(Ity_I32);
9097        IRTemp irt_regM_shift = newTemp(Ity_I32);
9098        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
9099        compute_result_and_C_after_shift_by_imm5(
9100           dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
9101
9102        UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
9103        IRExpr* ire_result
9104          = binop( Iop_Or32,
9105                   binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
9106                   binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
9107                                    unop(Iop_Not32, mkU32(mask))) );
9108
9109        if (isT)
9110           putIRegT( regD, ire_result, condT );
9111        else
9112           putIRegA( regD, ire_result, condT, Ijk_Boring );
9113
9114        DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
9115             nCC(conq), regD, regN, regM, dis_buf );
9116
9117        return True;
9118     }
9119     /* fall through */
9120   }
9121
9122   /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9123   {
9124     UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9125     Bool gate = False;
9126
9127     if (isT) {
9128        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
9129            && INSNT0(4,4) == 0
9130            && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9131           regD       = INSNT1(11,8);
9132           regN       = INSNT0(3,0);
9133           shift_type = (INSNT0(5,5) << 1) | 0;
9134           imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9135           sat_imm    = INSNT1(4,0);
9136           if (!isBadRegT(regD) && !isBadRegT(regN))
9137              gate = True;
9138           if (shift_type == BITS2(1,0) && imm5 == 0)
9139              gate = False;
9140        }
9141     } else {
9142        if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
9143            INSNA(5,4)   == BITS2(0,1)) {
9144           regD       = INSNA(15,12);
9145           regN       = INSNA(3,0);
9146           shift_type = (INSNA(6,6) << 1) | 0;
9147           imm5       = INSNA(11,7);
9148           sat_imm    = INSNA(20,16);
9149           if (regD != 15 && regN != 15)
9150              gate = True;
9151        }
9152     }
9153
9154     if (gate) {
9155        IRTemp irt_regN       = newTemp(Ity_I32);
9156        IRTemp irt_regN_shift = newTemp(Ity_I32);
9157        IRTemp irt_sat_Q      = newTemp(Ity_I32);
9158        IRTemp irt_result     = newTemp(Ity_I32);
9159
9160        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9161        compute_result_and_C_after_shift_by_imm5(
9162                dis_buf, &irt_regN_shift, NULL,
9163                irt_regN, shift_type, imm5, regN );
9164
9165        armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
9166        or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9167
9168        if (isT)
9169           putIRegT( regD, mkexpr(irt_result), condT );
9170        else
9171           putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9172
9173        DIP("usat%s r%u, #0x%04x, %s\n",
9174            nCC(conq), regD, imm5, dis_buf);
9175        return True;
9176     }
9177     /* fall through */
9178   }
9179
9180  /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9181   {
9182     UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9183     Bool gate = False;
9184
9185     if (isT) {
9186        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9187            && INSNT0(4,4) == 0
9188            && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9189           regD       = INSNT1(11,8);
9190           regN       = INSNT0(3,0);
9191           shift_type = (INSNT0(5,5) << 1) | 0;
9192           imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9193           sat_imm    = INSNT1(4,0) + 1;
9194           if (!isBadRegT(regD) && !isBadRegT(regN))
9195              gate = True;
9196           if (shift_type == BITS2(1,0) && imm5 == 0)
9197              gate = False;
9198        }
9199     } else {
9200        if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
9201            INSNA(5,4)   == BITS2(0,1)) {
9202           regD       = INSNA(15,12);
9203           regN       = INSNA(3,0);
9204           shift_type = (INSNA(6,6) << 1) | 0;
9205           imm5       = INSNA(11,7);
9206           sat_imm    = INSNA(20,16) + 1;
9207           if (regD != 15 && regN != 15)
9208              gate = True;
9209        }
9210     }
9211
9212     if (gate) {
9213        IRTemp irt_regN       = newTemp(Ity_I32);
9214        IRTemp irt_regN_shift = newTemp(Ity_I32);
9215        IRTemp irt_sat_Q      = newTemp(Ity_I32);
9216        IRTemp irt_result     = newTemp(Ity_I32);
9217
9218        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9219        compute_result_and_C_after_shift_by_imm5(
9220                dis_buf, &irt_regN_shift, NULL,
9221                irt_regN, shift_type, imm5, regN );
9222
9223        armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
9224        or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9225
9226        if (isT)
9227           putIRegT( regD, mkexpr(irt_result), condT );
9228        else
9229           putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9230
9231        DIP( "ssat%s r%u, #0x%04x, %s\n",
9232             nCC(conq), regD, imm5, dis_buf);
9233        return True;
9234    }
9235    /* fall through */
9236  }
9237
9238   /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
9239   {
9240     UInt regD = 99, regN = 99, sat_imm = 99;
9241     Bool gate = False;
9242
9243     if (isT) {
9244        if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9245            && INSNT0(5,4) == BITS2(1,0)
9246            && INSNT1(15,12) == BITS4(0,0,0,0)
9247            && INSNT1(7,4) == BITS4(0,0,0,0)) {
9248           regD       = INSNT1(11,8);
9249           regN       = INSNT0(3,0);
9250           sat_imm    = INSNT1(3,0) + 1;
9251           if (!isBadRegT(regD) && !isBadRegT(regN))
9252              gate = True;
9253        }
9254     } else {
9255        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
9256            INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
9257           regD       = INSNA(15,12);
9258           regN       = INSNA(3,0);
9259           sat_imm    = INSNA(19,16) + 1;
9260           if (regD != 15 && regN != 15)
9261              gate = True;
9262        }
9263     }
9264
9265     if (gate) {
9266        IRTemp irt_regN    = newTemp(Ity_I32);
9267        IRTemp irt_regN_lo = newTemp(Ity_I32);
9268        IRTemp irt_regN_hi = newTemp(Ity_I32);
9269        IRTemp irt_Q_lo    = newTemp(Ity_I32);
9270        IRTemp irt_Q_hi    = newTemp(Ity_I32);
9271        IRTemp irt_res_lo  = newTemp(Ity_I32);
9272        IRTemp irt_res_hi  = newTemp(Ity_I32);
9273
9274        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9275        assign( irt_regN_lo,
9276                binop( Iop_Sar32,
9277                       binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9278                       mkU8(16)) );
9279        assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9280
9281        armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
9282        or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9283
9284        armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
9285        or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9286
9287        IRExpr* ire_result
9288           = binop(Iop_Or32,
9289                   binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
9290                   binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
9291        if (isT)
9292           putIRegT( regD, ire_result, condT );
9293        else
9294           putIRegA( regD, ire_result, condT, Ijk_Boring );
9295
9296        DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9297        return True;
9298     }
9299     /* fall through */
9300   }
9301
9302   /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
9303   {
9304     UInt regD = 99, regN = 99, sat_imm = 99;
9305     Bool gate = False;
9306
9307     if (isT) {
9308        if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
9309           regN = INSNT0(3,0);
9310           regD = INSNT1(11,8);
9311           sat_imm = INSNT1(3,0);
9312           if (!isBadRegT(regD) && !isBadRegT(regN))
9313              gate = True;
9314       }
9315     } else {
9316        if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
9317            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9318            INSNA(7,4)   == BITS4(0,0,1,1)) {
9319           regD    = INSNA(15,12);
9320           regN    = INSNA(3,0);
9321           sat_imm = INSNA(19,16);
9322           if (regD != 15 && regN != 15)
9323              gate = True;
9324        }
9325     }
9326
9327     if (gate) {
9328        IRTemp irt_regN    = newTemp(Ity_I32);
9329        IRTemp irt_regN_lo = newTemp(Ity_I32);
9330        IRTemp irt_regN_hi = newTemp(Ity_I32);
9331        IRTemp irt_Q_lo    = newTemp(Ity_I32);
9332        IRTemp irt_Q_hi    = newTemp(Ity_I32);
9333        IRTemp irt_res_lo  = newTemp(Ity_I32);
9334        IRTemp irt_res_hi  = newTemp(Ity_I32);
9335
9336        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9337        assign( irt_regN_lo, binop( Iop_Sar32,
9338                                    binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9339                                    mkU8(16)) );
9340        assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9341
9342        armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
9343        or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9344
9345        armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
9346        or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9347
9348        IRExpr* ire_result = binop( Iop_Or32,
9349                                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
9350                                    mkexpr(irt_res_lo) );
9351
9352        if (isT)
9353           putIRegT( regD, ire_result, condT );
9354        else
9355           putIRegA( regD, ire_result, condT, Ijk_Boring );
9356
9357        DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9358        return True;
9359     }
9360     /* fall through */
9361   }
9362
9363   /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9364   {
9365     UInt regD = 99, regN = 99, regM = 99;
9366     Bool gate = False;
9367
9368     if (isT) {
9369        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9370           regN = INSNT0(3,0);
9371           regD = INSNT1(11,8);
9372           regM = INSNT1(3,0);
9373           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9374              gate = True;
9375        }
9376     } else {
9377        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9378            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9379            INSNA(7,4)   == BITS4(0,0,0,1)) {
9380           regD = INSNA(15,12);
9381           regN = INSNA(19,16);
9382           regM = INSNA(3,0);
9383           if (regD != 15 && regN != 15 && regM != 15)
9384              gate = True;
9385        }
9386     }
9387
9388     if (gate) {
9389        IRTemp rNt  = newTemp(Ity_I32);
9390        IRTemp rMt  = newTemp(Ity_I32);
9391        IRTemp res  = newTemp(Ity_I32);
9392        IRTemp reso = newTemp(Ity_I32);
9393
9394        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9395        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9396
9397        assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9398        if (isT)
9399           putIRegT( regD, mkexpr(res), condT );
9400        else
9401           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9402
9403        assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
9404        set_GE_32_10_from_bits_31_15(reso, condT);
9405
9406        DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9407        return True;
9408     }
9409     /* fall through */
9410   }
9411
9412   /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9413   {
9414     UInt regD = 99, regN = 99, regM = 99;
9415     Bool gate = False;
9416
9417     if (isT) {
9418        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9419           regN = INSNT0(3,0);
9420           regD = INSNT1(11,8);
9421           regM = INSNT1(3,0);
9422           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9423              gate = True;
9424        }
9425     } else {
9426        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9427            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9428            INSNA(7,4)   == BITS4(0,0,0,1)) {
9429           regD = INSNA(15,12);
9430           regN = INSNA(19,16);
9431           regM = INSNA(3,0);
9432           if (regD != 15 && regN != 15 && regM != 15)
9433              gate = True;
9434        }
9435     }
9436
9437     if (gate) {
9438        IRTemp rNt  = newTemp(Ity_I32);
9439        IRTemp rMt  = newTemp(Ity_I32);
9440        IRTemp res  = newTemp(Ity_I32);
9441        IRTemp reso = newTemp(Ity_I32);
9442
9443        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9444        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9445
9446        assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9447        if (isT)
9448           putIRegT( regD, mkexpr(res), condT );
9449        else
9450           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9451
9452        assign(reso, unop(Iop_Not32,
9453                          binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
9454        set_GE_32_10_from_bits_31_15(reso, condT);
9455
9456        DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9457        return True;
9458     }
9459     /* fall through */
9460   }
9461
9462   /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
9463   {
9464     UInt regD = 99, regN = 99, regM = 99;
9465     Bool gate = False;
9466
9467     if (isT) {
9468        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9469           regN = INSNT0(3,0);
9470           regD = INSNT1(11,8);
9471           regM = INSNT1(3,0);
9472           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9473              gate = True;
9474        }
9475     } else {
9476        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9477            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9478            INSNA(7,4)   == BITS4(0,1,1,1)) {
9479           regD = INSNA(15,12);
9480           regN = INSNA(19,16);
9481           regM = INSNA(3,0);
9482           if (regD != 15 && regN != 15 && regM != 15)
9483             gate = True;
9484        }
9485     }
9486
9487     if (gate) {
9488        IRTemp rNt  = newTemp(Ity_I32);
9489        IRTemp rMt  = newTemp(Ity_I32);
9490        IRTemp res  = newTemp(Ity_I32);
9491        IRTemp reso = newTemp(Ity_I32);
9492
9493        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9494        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9495
9496        assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9497        if (isT)
9498           putIRegT( regD, mkexpr(res), condT );
9499        else
9500           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9501
9502        assign(reso, unop(Iop_Not32,
9503                          binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
9504        set_GE_32_10_from_bits_31_15(reso, condT);
9505
9506        DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9507        return True;
9508     }
9509     /* fall through */
9510   }
9511
9512   /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
9513   {
9514     UInt regD = 99, regN = 99, regM = 99;
9515     Bool gate = False;
9516
9517     if (isT) {
9518        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9519           regN = INSNT0(3,0);
9520           regD = INSNT1(11,8);
9521           regM = INSNT1(3,0);
9522           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9523              gate = True;
9524        }
9525     } else {
9526        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9527            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9528            INSNA(7,4)   == BITS4(0,1,1,1)) {
9529           regD = INSNA(15,12);
9530           regN = INSNA(19,16);
9531           regM = INSNA(3,0);
9532           if (regD != 15 && regN != 15 && regM != 15)
9533              gate = True;
9534        }
9535     }
9536
9537     if (gate) {
9538        IRTemp rNt  = newTemp(Ity_I32);
9539        IRTemp rMt  = newTemp(Ity_I32);
9540        IRTemp res  = newTemp(Ity_I32);
9541        IRTemp reso = newTemp(Ity_I32);
9542
9543        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9544        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9545
9546        assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9547        if (isT)
9548           putIRegT( regD, mkexpr(res), condT );
9549        else
9550           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9551
9552        assign(reso, unop(Iop_Not32,
9553                          binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
9554        set_GE_32_10_from_bits_31_15(reso, condT);
9555
9556        DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9557        return True;
9558     }
9559     /* fall through */
9560   }
9561
9562   /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
9563   {
9564     UInt regD = 99, regN = 99, regM = 99;
9565     Bool gate = False;
9566
9567     if (isT) {
9568        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9569           regN = INSNT0(3,0);
9570           regD = INSNT1(11,8);
9571           regM = INSNT1(3,0);
9572           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9573              gate = True;
9574        }
9575     } else {
9576        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9577            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9578            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9579           regD = INSNA(15,12);
9580           regN = INSNA(19,16);
9581           regM = INSNA(3,0);
9582           if (regD != 15 && regN != 15 && regM != 15)
9583              gate = True;
9584        }
9585     }
9586
9587     if (gate) {
9588        IRTemp rNt  = newTemp(Ity_I32);
9589        IRTemp rMt  = newTemp(Ity_I32);
9590        IRTemp res  = newTemp(Ity_I32);
9591        IRTemp reso = newTemp(Ity_I32);
9592
9593        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9594        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9595
9596        assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9597        if (isT)
9598           putIRegT( regD, mkexpr(res), condT );
9599        else
9600           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9601
9602        assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9603        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9604
9605        DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9606        return True;
9607     }
9608     /* fall through */
9609   }
9610
9611   /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9612   {
9613     UInt regD = 99, regN = 99, regM = 99;
9614     Bool gate = False;
9615
9616     if (isT) {
9617        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9618           regN = INSNT0(3,0);
9619           regD = INSNT1(11,8);
9620           regM = INSNT1(3,0);
9621           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9622              gate = True;
9623        }
9624     } else {
9625        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9626            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9627            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9628           regD = INSNA(15,12);
9629           regN = INSNA(19,16);
9630           regM = INSNA(3,0);
9631           if (regD != 15 && regN != 15 && regM != 15)
9632              gate = True;
9633        }
9634     }
9635
9636     if (gate) {
9637        IRTemp rNt  = newTemp(Ity_I32);
9638        IRTemp rMt  = newTemp(Ity_I32);
9639        IRTemp res  = newTemp(Ity_I32);
9640        IRTemp reso = newTemp(Ity_I32);
9641
9642        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9643        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9644
9645        assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9646        if (isT)
9647           putIRegT( regD, mkexpr(res), condT );
9648        else
9649           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9650
9651        assign(reso, unop(Iop_Not32,
9652                          binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
9653        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9654
9655        DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9656        return True;
9657     }
9658     /* fall through */
9659   }
9660
9661   /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9662   {
9663     UInt regD = 99, regN = 99, regM = 99;
9664     Bool gate = False;
9665
9666     if (isT) {
9667        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9668           regN = INSNT0(3,0);
9669           regD = INSNT1(11,8);
9670           regM = INSNT1(3,0);
9671           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9672              gate = True;
9673        }
9674     } else {
9675        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9676            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9677            (INSNA(7,4)  == BITS4(1,1,1,1))) {
9678           regD = INSNA(15,12);
9679           regN = INSNA(19,16);
9680           regM = INSNA(3,0);
9681           if (regD != 15 && regN != 15 && regM != 15)
9682             gate = True;
9683        }
9684     }
9685
9686     if (gate) {
9687        IRTemp rNt  = newTemp(Ity_I32);
9688        IRTemp rMt  = newTemp(Ity_I32);
9689        IRTemp res  = newTemp(Ity_I32);
9690        IRTemp reso = newTemp(Ity_I32);
9691
9692        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9693        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9694
9695        assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9696        if (isT)
9697           putIRegT( regD, mkexpr(res), condT );
9698        else
9699           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9700
9701        assign(reso, unop(Iop_Not32,
9702                          binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
9703        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9704
9705        DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9706        return True;
9707     }
9708     /* fall through */
9709   }
9710
9711   /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9712   {
9713     UInt regD = 99, regN = 99, regM = 99;
9714     Bool gate = False;
9715
9716     if (isT) {
9717        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9718           regN = INSNT0(3,0);
9719           regD = INSNT1(11,8);
9720           regM = INSNT1(3,0);
9721           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9722              gate = True;
9723        }
9724     } else {
9725        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9726            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9727            INSNA(7,4)   == BITS4(1,1,1,1)) {
9728           regD = INSNA(15,12);
9729           regN = INSNA(19,16);
9730           regM = INSNA(3,0);
9731           if (regD != 15 && regN != 15 && regM != 15)
9732              gate = True;
9733        }
9734     }
9735
9736     if (gate) {
9737        IRTemp rNt  = newTemp(Ity_I32);
9738        IRTemp rMt  = newTemp(Ity_I32);
9739        IRTemp res  = newTemp(Ity_I32);
9740        IRTemp reso = newTemp(Ity_I32);
9741
9742        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9743        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9744
9745        assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9746        if (isT)
9747           putIRegT( regD, mkexpr(res), condT );
9748        else
9749           putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9750
9751        assign(reso, unop(Iop_Not32,
9752                          binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
9753        set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9754
9755        DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9756        return True;
9757     }
9758     /* fall through */
9759   }
9760
9761   /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9762   {
9763     UInt regD = 99, regN = 99, regM = 99;
9764     Bool gate = False;
9765
9766     if (isT) {
9767        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9768           regN = INSNT0(3,0);
9769           regD = INSNT1(11,8);
9770           regM = INSNT1(3,0);
9771           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9772              gate = True;
9773        }
9774     } else {
9775        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9776            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9777            INSNA(7,4)   == BITS4(1,0,0,1)) {
9778           regD = INSNA(15,12);
9779           regN = INSNA(19,16);
9780           regM = INSNA(3,0);
9781           if (regD != 15 && regN != 15 && regM != 15)
9782              gate = True;
9783        }
9784     }
9785
9786     if (gate) {
9787        IRTemp rNt   = newTemp(Ity_I32);
9788        IRTemp rMt   = newTemp(Ity_I32);
9789        IRTemp res_q = newTemp(Ity_I32);
9790
9791        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9792        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9793
9794        assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
9795        if (isT)
9796           putIRegT( regD, mkexpr(res_q), condT );
9797        else
9798           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9799
9800        DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9801        return True;
9802     }
9803     /* fall through */
9804   }
9805
9806   /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
9807   {
9808     UInt regD = 99, regN = 99, regM = 99;
9809     Bool gate = False;
9810
9811     if (isT) {
9812        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9813           regN = INSNT0(3,0);
9814           regD = INSNT1(11,8);
9815           regM = INSNT1(3,0);
9816           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9817              gate = True;
9818        }
9819     } else {
9820        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9821            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9822            INSNA(7,4)   == BITS4(1,1,1,1)) {
9823           regD = INSNA(15,12);
9824           regN = INSNA(19,16);
9825           regM = INSNA(3,0);
9826           if (regD != 15 && regN != 15 && regM != 15)
9827              gate = True;
9828        }
9829     }
9830
9831     if (gate) {
9832        IRTemp rNt   = newTemp(Ity_I32);
9833        IRTemp rMt   = newTemp(Ity_I32);
9834        IRTemp res_q = newTemp(Ity_I32);
9835
9836        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9837        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9838
9839        assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
9840        if (isT)
9841           putIRegT( regD, mkexpr(res_q), condT );
9842        else
9843           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9844
9845        DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9846        return True;
9847     }
9848     /* fall through */
9849   }
9850
9851   /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9852   {
9853     UInt regD = 99, regN = 99, regM = 99;
9854     Bool gate = False;
9855
9856     if (isT) {
9857        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9858           regN = INSNT0(3,0);
9859           regD = INSNT1(11,8);
9860           regM = INSNT1(3,0);
9861           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9862              gate = True;
9863        }
9864     } else {
9865        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9866            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9867            (INSNA(7,4)  == BITS4(1,0,0,1))) {
9868           regD = INSNA(15,12);
9869           regN = INSNA(19,16);
9870           regM = INSNA(3,0);
9871           if (regD != 15 && regN != 15 && regM != 15)
9872              gate = True;
9873        }
9874     }
9875
9876     if (gate) {
9877        IRTemp rNt   = newTemp(Ity_I32);
9878        IRTemp rMt   = newTemp(Ity_I32);
9879        IRTemp res_q = newTemp(Ity_I32);
9880
9881        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9882        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9883
9884        assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9885        if (isT)
9886           putIRegT( regD, mkexpr(res_q), condT );
9887        else
9888           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9889
9890        DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9891        return True;
9892     }
9893     /* fall through */
9894   }
9895
9896   /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9897   {
9898     UInt regD = 99, regN = 99, regM = 99;
9899     Bool gate = False;
9900
9901     if (isT) {
9902        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9903           regN = INSNT0(3,0);
9904           regD = INSNT1(11,8);
9905           regM = INSNT1(3,0);
9906           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9907              gate = True;
9908        }
9909     } else {
9910        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9911            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9912            (INSNA(7,4)  == BITS4(1,1,1,1))) {
9913           regD = INSNA(15,12);
9914           regN = INSNA(19,16);
9915           regM = INSNA(3,0);
9916           if (regD != 15 && regN != 15 && regM != 15)
9917             gate = True;
9918        }
9919     }
9920
9921     if (gate) {
9922        IRTemp rNt   = newTemp(Ity_I32);
9923        IRTemp rMt   = newTemp(Ity_I32);
9924        IRTemp res_q = newTemp(Ity_I32);
9925
9926        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9927        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9928
9929        assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
9930        if (isT)
9931           putIRegT( regD, mkexpr(res_q), condT );
9932        else
9933           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9934
9935        DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9936        return True;
9937     }
9938     /* fall through */
9939   }
9940
9941   /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9942   {
9943     UInt regD = 99, regN = 99, regM = 99;
9944     Bool gate = False;
9945
9946     if (isT) {
9947        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
9948           regN = INSNT0(3,0);
9949           regD = INSNT1(11,8);
9950           regM = INSNT1(3,0);
9951           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9952              gate = True;
9953        }
9954     } else {
9955        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
9956            INSNA(11,8)  == BITS4(1,1,1,1)         &&
9957            INSNA(7,4)   == BITS4(1,0,0,1)) {
9958           regD = INSNA(15,12);
9959           regN = INSNA(19,16);
9960           regM = INSNA(3,0);
9961           if (regD != 15 && regN != 15 && regM != 15)
9962              gate = True;
9963        }
9964     }
9965
9966     if (gate) {
9967        IRTemp rNt   = newTemp(Ity_I32);
9968        IRTemp rMt   = newTemp(Ity_I32);
9969        IRTemp res_q = newTemp(Ity_I32);
9970
9971        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9972        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9973
9974        assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9975        if (isT)
9976           putIRegT( regD, mkexpr(res_q), condT );
9977        else
9978           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9979
9980        DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9981        return True;
9982     }
9983     /* fall through */
9984   }
9985
9986   /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
9987   {
9988     UInt regD = 99, regN = 99, regM = 99;
9989     Bool gate = False;
9990
9991     if (isT) {
9992        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
9993           regN = INSNT0(3,0);
9994           regD = INSNT1(11,8);
9995           regM = INSNT1(3,0);
9996           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9997              gate = True;
9998        }
9999     } else {
10000        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10001            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10002            INSNA(7,4)   == BITS4(0,0,0,1)) {
10003           regD = INSNA(15,12);
10004           regN = INSNA(19,16);
10005           regM = INSNA(3,0);
10006           if (regD != 15 && regN != 15 && regM != 15)
10007              gate = True;
10008        }
10009     }
10010
10011     if (gate) {
10012        IRTemp rNt   = newTemp(Ity_I32);
10013        IRTemp rMt   = newTemp(Ity_I32);
10014        IRTemp res_q = newTemp(Ity_I32);
10015
10016        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10017        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10018
10019        assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
10020        if (isT)
10021           putIRegT( regD, mkexpr(res_q), condT );
10022        else
10023           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10024
10025        DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10026        return True;
10027     }
10028     /* fall through */
10029   }
10030
10031   /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10032   {
10033     UInt regD = 99, regN = 99, regM = 99;
10034     Bool gate = False;
10035
10036     if (isT) {
10037        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
10038           regN = INSNT0(3,0);
10039           regD = INSNT1(11,8);
10040           regM = INSNT1(3,0);
10041           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10042              gate = True;
10043        }
10044     } else {
10045        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
10046            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10047            INSNA(7,4)   == BITS4(1,0,0,1)) {
10048           regD = INSNA(15,12);
10049           regN = INSNA(19,16);
10050           regM = INSNA(3,0);
10051           if (regD != 15 && regN != 15 && regM != 15)
10052              gate = True;
10053        }
10054     }
10055
10056     if (gate) {
10057        IRTemp rNt   = newTemp(Ity_I32);
10058        IRTemp rMt   = newTemp(Ity_I32);
10059        IRTemp res_q = newTemp(Ity_I32);
10060
10061        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10062        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10063
10064        assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
10065        if (isT)
10066           putIRegT( regD, mkexpr(res_q), condT );
10067        else
10068           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10069
10070        DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10071        return True;
10072     }
10073     /* fall through */
10074   }
10075
10076   /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
10077   {
10078     UInt regD = 99, regN = 99, regM = 99;
10079     Bool gate = False;
10080
10081     if (isT) {
10082        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10083           regN = INSNT0(3,0);
10084           regD = INSNT1(11,8);
10085           regM = INSNT1(3,0);
10086           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10087              gate = True;
10088        }
10089     } else {
10090        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10091            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10092            INSNA(7,4)   == BITS4(0,0,0,1)) {
10093           regD = INSNA(15,12);
10094           regN = INSNA(19,16);
10095           regM = INSNA(3,0);
10096           if (regD != 15 && regN != 15 && regM != 15)
10097              gate = True;
10098        }
10099     }
10100
10101     if (gate) {
10102        IRTemp rNt   = newTemp(Ity_I32);
10103        IRTemp rMt   = newTemp(Ity_I32);
10104        IRTemp res_q = newTemp(Ity_I32);
10105
10106        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10107        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10108
10109        assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
10110        if (isT)
10111           putIRegT( regD, mkexpr(res_q), condT );
10112        else
10113           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10114
10115        DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10116        return True;
10117     }
10118     /* fall through */
10119   }
10120
10121   /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
10122   {
10123     UInt regD = 99, regN = 99, regM = 99;
10124     Bool gate = False;
10125
10126      if (isT) {
10127        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10128           regN = INSNT0(3,0);
10129           regD = INSNT1(11,8);
10130           regM = INSNT1(3,0);
10131           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10132              gate = True;
10133        }
10134     } else {
10135        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10136            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10137            INSNA(7,4)   == BITS4(0,1,1,1)) {
10138           regD = INSNA(15,12);
10139           regN = INSNA(19,16);
10140           regM = INSNA(3,0);
10141           if (regD != 15 && regN != 15 && regM != 15)
10142             gate = True;
10143        }
10144     }
10145
10146     if (gate) {
10147        IRTemp rNt   = newTemp(Ity_I32);
10148        IRTemp rMt   = newTemp(Ity_I32);
10149        IRTemp res_q = newTemp(Ity_I32);
10150
10151        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10152        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10153
10154        assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
10155        if (isT)
10156           putIRegT( regD, mkexpr(res_q), condT );
10157        else
10158           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10159
10160        DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10161        return True;
10162     }
10163     /* fall through */
10164   }
10165
10166   /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
10167   /* note: the hardware seems to construct the result differently
10168      from wot the manual says. */
10169   {
10170     UInt regD = 99, regN = 99, regM = 99;
10171     Bool gate = False;
10172
10173     if (isT) {
10174        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10175           regN = INSNT0(3,0);
10176           regD = INSNT1(11,8);
10177           regM = INSNT1(3,0);
10178           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10179              gate = True;
10180        }
10181     } else {
10182        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10183            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10184            INSNA(7,4)   == BITS4(0,1,0,1)) {
10185           regD = INSNA(15,12);
10186           regN = INSNA(19,16);
10187           regM = INSNA(3,0);
10188           if (regD != 15 && regN != 15 && regM != 15)
10189              gate = True;
10190        }
10191     }
10192
10193     if (gate) {
10194        IRTemp irt_regN     = newTemp(Ity_I32);
10195        IRTemp irt_regM     = newTemp(Ity_I32);
10196        IRTemp irt_sum      = newTemp(Ity_I32);
10197        IRTemp irt_diff     = newTemp(Ity_I32);
10198        IRTemp irt_sum_res  = newTemp(Ity_I32);
10199        IRTemp irt_diff_res = newTemp(Ity_I32);
10200
10201        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10202        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10203
10204        assign( irt_diff,
10205                binop( Iop_Sub32,
10206                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10207                       binop( Iop_Sar32,
10208                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10209                              mkU8(16) ) ) );
10210        armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
10211
10212        assign( irt_sum,
10213                binop( Iop_Add32,
10214                       binop( Iop_Sar32,
10215                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10216                              mkU8(16) ),
10217                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
10218        armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
10219
10220        IRExpr* ire_result = binop( Iop_Or32,
10221                                    binop( Iop_Shl32, mkexpr(irt_diff_res),
10222                                           mkU8(16) ),
10223                                    binop( Iop_And32, mkexpr(irt_sum_res),
10224                                           mkU32(0xFFFF)) );
10225
10226        if (isT)
10227           putIRegT( regD, ire_result, condT );
10228        else
10229           putIRegA( regD, ire_result, condT, Ijk_Boring );
10230
10231        DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10232        return True;
10233     }
10234     /* fall through */
10235   }
10236
10237   /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10238   {
10239     UInt regD = 99, regN = 99, regM = 99;
10240     Bool gate = False;
10241
10242     if (isT) {
10243        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10244           regN = INSNT0(3,0);
10245           regD = INSNT1(11,8);
10246           regM = INSNT1(3,0);
10247           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10248              gate = True;
10249        }
10250     } else {
10251        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10252            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10253            INSNA(7,4)   == BITS4(0,0,1,1)) {
10254           regD = INSNA(15,12);
10255           regN = INSNA(19,16);
10256           regM = INSNA(3,0);
10257           if (regD != 15 && regN != 15 && regM != 15)
10258              gate = True;
10259        }
10260     }
10261
10262     if (gate) {
10263        IRTemp irt_regN     = newTemp(Ity_I32);
10264        IRTemp irt_regM     = newTemp(Ity_I32);
10265        IRTemp irt_sum      = newTemp(Ity_I32);
10266        IRTemp irt_diff     = newTemp(Ity_I32);
10267        IRTemp irt_res_sum  = newTemp(Ity_I32);
10268        IRTemp irt_res_diff = newTemp(Ity_I32);
10269
10270        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10271        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10272
10273        assign( irt_diff,
10274                binop( Iop_Sub32,
10275                       binop( Iop_Sar32,
10276                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10277                              mkU8(16) ),
10278                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10279        armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
10280
10281        assign( irt_sum,
10282                binop( Iop_Add32,
10283                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10284                       binop( Iop_Sar32,
10285                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10286                              mkU8(16) ) ) );
10287        armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
10288
10289        IRExpr* ire_result
10290          = binop( Iop_Or32,
10291                   binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
10292                   binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
10293
10294        if (isT)
10295           putIRegT( regD, ire_result, condT );
10296        else
10297           putIRegA( regD, ire_result, condT, Ijk_Boring );
10298
10299        DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10300        return True;
10301     }
10302     /* fall through */
10303   }
10304
10305   /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10306   {
10307     UInt regD = 99, regN = 99, regM = 99;
10308     Bool gate = False;
10309
10310     if (isT) {
10311        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
10312           regN = INSNT0(3,0);
10313           regD = INSNT1(11,8);
10314           regM = INSNT1(3,0);
10315           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10316              gate = True;
10317        }
10318     } else {
10319        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
10320            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10321            INSNA(7,4)   == BITS4(0,0,1,1)) {
10322           regD = INSNA(15,12);
10323           regN = INSNA(19,16);
10324           regM = INSNA(3,0);
10325           if (regD != 15 && regN != 15 && regM != 15)
10326              gate = True;
10327        }
10328     }
10329
10330     if (gate) {
10331        IRTemp irt_regN = newTemp(Ity_I32);
10332        IRTemp irt_regM = newTemp(Ity_I32);
10333        IRTemp irt_sum  = newTemp(Ity_I32);
10334        IRTemp irt_diff = newTemp(Ity_I32);
10335
10336        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10337        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10338
10339        assign( irt_diff,
10340                binop( Iop_Sub32,
10341                       binop( Iop_Sar32,
10342                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10343                              mkU8(16) ),
10344                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10345
10346        assign( irt_sum,
10347                binop( Iop_Add32,
10348                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10349                       binop( Iop_Sar32,
10350                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10351                              mkU8(16) ) ) );
10352
10353        IRExpr* ire_result
10354          = binop( Iop_Or32,
10355                   binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
10356                   binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
10357
10358        IRTemp ge10 = newTemp(Ity_I32);
10359        assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
10360        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
10361        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
10362
10363        IRTemp ge32 = newTemp(Ity_I32);
10364        assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
10365        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
10366        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
10367
10368        if (isT)
10369           putIRegT( regD, ire_result, condT );
10370        else
10371           putIRegA( regD, ire_result, condT, Ijk_Boring );
10372
10373        DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10374        return True;
10375     }
10376     /* fall through */
10377   }
10378
10379   /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
10380   /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
10381   {
10382     UInt regD = 99, regN = 99, regM = 99, bitM = 99;
10383     Bool gate = False, isAD = False;
10384
10385     if (isT) {
10386        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10387            && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
10388           regN = INSNT0(3,0);
10389           regD = INSNT1(11,8);
10390           regM = INSNT1(3,0);
10391           bitM = INSNT1(4,4);
10392           isAD = INSNT0(15,4) == 0xFB2;
10393           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10394              gate = True;
10395        }
10396     } else {
10397        if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10398            INSNA(15,12) == BITS4(1,1,1,1)         &&
10399            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
10400           regD = INSNA(19,16);
10401           regN = INSNA(3,0);
10402           regM = INSNA(11,8);
10403           bitM = INSNA(5,5);
10404           isAD = INSNA(6,6) == 0;
10405           if (regD != 15 && regN != 15 && regM != 15)
10406              gate = True;
10407        }
10408     }
10409
10410     if (gate) {
10411        IRTemp irt_regN    = newTemp(Ity_I32);
10412        IRTemp irt_regM    = newTemp(Ity_I32);
10413        IRTemp irt_prod_lo = newTemp(Ity_I32);
10414        IRTemp irt_prod_hi = newTemp(Ity_I32);
10415        IRTemp tmpM        = newTemp(Ity_I32);
10416
10417        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10418
10419        assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10420        assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10421
10422        assign( irt_prod_lo,
10423                binop( Iop_Mul32,
10424                       binop( Iop_Sar32,
10425                              binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
10426                              mkU8(16) ),
10427                       binop( Iop_Sar32,
10428                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10429                              mkU8(16) ) ) );
10430        assign( irt_prod_hi, binop(Iop_Mul32,
10431                                   binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
10432                                   binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
10433        IRExpr* ire_result
10434           = binop( isAD ? Iop_Add32 : Iop_Sub32,
10435                    mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
10436
10437        if (isT)
10438           putIRegT( regD, ire_result, condT );
10439        else
10440           putIRegA( regD, ire_result, condT, Ijk_Boring );
10441
10442        if (isAD) {
10443           or_into_QFLAG32(
10444              signed_overflow_after_Add32( ire_result,
10445                                           irt_prod_lo, irt_prod_hi ),
10446              condT
10447           );
10448        }
10449
10450        DIP("smu%cd%s%s r%u, r%u, r%u\n",
10451            isAD ? 'a' : 's',
10452            bitM ? "x" : "", nCC(conq), regD, regN, regM);
10453        return True;
10454     }
10455     /* fall through */
10456   }
10457
10458   /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10459   /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10460   {
10461     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10462     Bool gate = False, isAD = False;
10463
10464     if (isT) {
10465       if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10466           && INSNT1(7,5) == BITS3(0,0,0)) {
10467           regN = INSNT0(3,0);
10468           regD = INSNT1(11,8);
10469           regM = INSNT1(3,0);
10470           regA = INSNT1(15,12);
10471           bitM = INSNT1(4,4);
10472           isAD = INSNT0(15,4) == 0xFB2;
10473           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10474               && !isBadRegT(regA))
10475              gate = True;
10476        }
10477     } else {
10478        if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10479            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
10480           regD = INSNA(19,16);
10481           regA = INSNA(15,12);
10482           regN = INSNA(3,0);
10483           regM = INSNA(11,8);
10484           bitM = INSNA(5,5);
10485           isAD = INSNA(6,6) == 0;
10486           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10487              gate = True;
10488        }
10489     }
10490
10491     if (gate) {
10492        IRTemp irt_regN    = newTemp(Ity_I32);
10493        IRTemp irt_regM    = newTemp(Ity_I32);
10494        IRTemp irt_regA    = newTemp(Ity_I32);
10495        IRTemp irt_prod_lo = newTemp(Ity_I32);
10496        IRTemp irt_prod_hi = newTemp(Ity_I32);
10497        IRTemp irt_sum     = newTemp(Ity_I32);
10498        IRTemp tmpM        = newTemp(Ity_I32);
10499
10500        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10501        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10502
10503        assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10504        assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10505
10506        assign( irt_prod_lo,
10507                binop(Iop_Mul32,
10508                      binop(Iop_Sar32,
10509                            binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10510                            mkU8(16)),
10511                      binop(Iop_Sar32,
10512                            binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10513                            mkU8(16))) );
10514        assign( irt_prod_hi,
10515                binop( Iop_Mul32,
10516                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10517                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10518        assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
10519                                mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
10520
10521        IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
10522
10523        if (isT)
10524           putIRegT( regD, ire_result, condT );
10525        else
10526           putIRegA( regD, ire_result, condT, Ijk_Boring );
10527
10528        if (isAD) {
10529           or_into_QFLAG32(
10530              signed_overflow_after_Add32( mkexpr(irt_sum),
10531                                           irt_prod_lo, irt_prod_hi ),
10532              condT
10533           );
10534        }
10535
10536        or_into_QFLAG32(
10537           signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
10538           condT
10539        );
10540
10541        DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
10542            isAD ? 'a' : 's',
10543            bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
10544        return True;
10545     }
10546     /* fall through */
10547   }
10548
10549   /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10550   {
10551     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
10552     Bool gate = False;
10553
10554     if (isT) {
10555        if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
10556           regN = INSNT0(3,0);
10557           regD = INSNT1(11,8);
10558           regM = INSNT1(3,0);
10559           regA = INSNT1(15,12);
10560           bitM = INSNT1(4,4);
10561           bitN = INSNT1(5,5);
10562           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10563               && !isBadRegT(regA))
10564              gate = True;
10565        }
10566     } else {
10567        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
10568            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10569           regD = INSNA(19,16);
10570           regN = INSNA(3,0);
10571           regM = INSNA(11,8);
10572           regA = INSNA(15,12);
10573           bitM = INSNA(6,6);
10574           bitN = INSNA(5,5);
10575           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10576              gate = True;
10577        }
10578     }
10579
10580     if (gate) {
10581        IRTemp irt_regA = newTemp(Ity_I32);
10582        IRTemp irt_prod = newTemp(Ity_I32);
10583
10584        assign( irt_prod,
10585                binop(Iop_Mul32,
10586                      binop(Iop_Sar32,
10587                            binop(Iop_Shl32,
10588                                  isT ? getIRegT(regN) : getIRegA(regN),
10589                                  mkU8(bitN ? 0 : 16)),
10590                            mkU8(16)),
10591                      binop(Iop_Sar32,
10592                            binop(Iop_Shl32,
10593                                  isT ? getIRegT(regM) : getIRegA(regM),
10594                                  mkU8(bitM ? 0 : 16)),
10595                            mkU8(16))) );
10596
10597        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10598
10599        IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
10600
10601        if (isT)
10602           putIRegT( regD, ire_result, condT );
10603        else
10604           putIRegA( regD, ire_result, condT, Ijk_Boring );
10605
10606        or_into_QFLAG32(
10607           signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
10608           condT
10609        );
10610
10611        DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
10612             bitN ? 't' : 'b', bitM ? 't' : 'b',
10613             nCC(conq), regD, regN, regM, regA );
10614        return True;
10615     }
10616     /* fall through */
10617   }
10618
10619   /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10620   {
10621     UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
10622     Bool gate = False;
10623
10624     if (isT) {
10625        if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
10626           regN   = INSNT0(3,0);
10627           regDHi = INSNT1(11,8);
10628           regM   = INSNT1(3,0);
10629           regDLo = INSNT1(15,12);
10630           bitM   = INSNT1(4,4);
10631           bitN   = INSNT1(5,5);
10632           if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
10633               && !isBadRegT(regDLo) && regDHi != regDLo)
10634              gate = True;
10635        }
10636     } else {
10637        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
10638            (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10639           regDHi = INSNA(19,16);
10640           regN   = INSNA(3,0);
10641           regM   = INSNA(11,8);
10642           regDLo = INSNA(15,12);
10643           bitM   = INSNA(6,6);
10644           bitN   = INSNA(5,5);
10645           if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
10646               regDHi != regDLo)
10647              gate = True;
10648        }
10649     }
10650
10651     if (gate) {
10652        IRTemp irt_regD  = newTemp(Ity_I64);
10653        IRTemp irt_prod  = newTemp(Ity_I64);
10654        IRTemp irt_res   = newTemp(Ity_I64);
10655        IRTemp irt_resHi = newTemp(Ity_I32);
10656        IRTemp irt_resLo = newTemp(Ity_I32);
10657
10658        assign( irt_prod,
10659                binop(Iop_MullS32,
10660                      binop(Iop_Sar32,
10661                            binop(Iop_Shl32,
10662                                  isT ? getIRegT(regN) : getIRegA(regN),
10663                                  mkU8(bitN ? 0 : 16)),
10664                            mkU8(16)),
10665                      binop(Iop_Sar32,
10666                            binop(Iop_Shl32,
10667                                  isT ? getIRegT(regM) : getIRegA(regM),
10668                                  mkU8(bitM ? 0 : 16)),
10669                            mkU8(16))) );
10670
10671        assign( irt_regD, binop(Iop_32HLto64,
10672                                isT ? getIRegT(regDHi) : getIRegA(regDHi),
10673                                isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
10674        assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
10675        assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
10676        assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
10677
10678        if (isT) {
10679           putIRegT( regDHi, mkexpr(irt_resHi), condT );
10680           putIRegT( regDLo, mkexpr(irt_resLo), condT );
10681        } else {
10682           putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
10683           putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
10684        }
10685
10686        DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
10687             bitN ? 't' : 'b', bitM ? 't' : 'b',
10688             nCC(conq), regDHi, regN, regM, regDLo );
10689        return True;
10690     }
10691     /* fall through */
10692   }
10693
10694   /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10695   {
10696     UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10697     Bool gate = False;
10698
10699     if (isT) {
10700        if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
10701           regN = INSNT0(3,0);
10702           regD = INSNT1(11,8);
10703           regM = INSNT1(3,0);
10704           regA = INSNT1(15,12);
10705           bitM = INSNT1(4,4);
10706           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10707               && !isBadRegT(regA))
10708              gate = True;
10709        }
10710     } else {
10711        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
10712            (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
10713           regD = INSNA(19,16);
10714           regN = INSNA(3,0);
10715           regM = INSNA(11,8);
10716           regA = INSNA(15,12);
10717           bitM = INSNA(6,6);
10718           if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10719              gate = True;
10720        }
10721     }
10722
10723     if (gate) {
10724        IRTemp irt_regA = newTemp(Ity_I32);
10725        IRTemp irt_prod = newTemp(Ity_I64);
10726
10727        assign( irt_prod,
10728                binop(Iop_MullS32,
10729                      isT ? getIRegT(regN) : getIRegA(regN),
10730                      binop(Iop_Sar32,
10731                            binop(Iop_Shl32,
10732                                  isT ? getIRegT(regM) : getIRegA(regM),
10733                                  mkU8(bitM ? 0 : 16)),
10734                            mkU8(16))) );
10735
10736        assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10737
10738        IRTemp prod32 = newTemp(Ity_I32);
10739        assign(prod32,
10740               binop(Iop_Or32,
10741                     binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
10742                     binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
10743        ));
10744
10745        IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
10746
10747        if (isT)
10748           putIRegT( regD, ire_result, condT );
10749        else
10750           putIRegA( regD, ire_result, condT, Ijk_Boring );
10751
10752        or_into_QFLAG32(
10753           signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
10754           condT
10755        );
10756
10757        DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
10758             bitM ? 't' : 'b',
10759             nCC(conq), regD, regN, regM, regA );
10760        return True;
10761     }
10762     /* fall through */
10763   }
10764
10765   /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
10766   /* fixme: fix up the test in v6media.c so that we can pass the ge
10767      flags as part of the test. */
10768   {
10769     UInt regD = 99, regN = 99, regM = 99;
10770     Bool gate = False;
10771
10772     if (isT) {
10773        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10774           regN = INSNT0(3,0);
10775           regD = INSNT1(11,8);
10776           regM = INSNT1(3,0);
10777           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10778              gate = True;
10779        }
10780     } else {
10781        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
10782            INSNA(11,8)  == BITS4(1,1,1,1)         &&
10783            INSNA(7,4)   == BITS4(1,0,1,1)) {
10784           regD = INSNA(15,12);
10785           regN = INSNA(19,16);
10786           regM = INSNA(3,0);
10787           if (regD != 15 && regN != 15 && regM != 15)
10788              gate = True;
10789        }
10790     }
10791
10792     if (gate) {
10793        IRTemp irt_ge_flag0 = newTemp(Ity_I32);
10794        IRTemp irt_ge_flag1 = newTemp(Ity_I32);
10795        IRTemp irt_ge_flag2 = newTemp(Ity_I32);
10796        IRTemp irt_ge_flag3 = newTemp(Ity_I32);
10797
10798        assign( irt_ge_flag0, get_GEFLAG32(0) );
10799        assign( irt_ge_flag1, get_GEFLAG32(1) );
10800        assign( irt_ge_flag2, get_GEFLAG32(2) );
10801        assign( irt_ge_flag3, get_GEFLAG32(3) );
10802
10803        IRExpr* ire_ge_flag0_or
10804          = binop(Iop_Or32, mkexpr(irt_ge_flag0),
10805                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
10806        IRExpr* ire_ge_flag1_or
10807          = binop(Iop_Or32, mkexpr(irt_ge_flag1),
10808                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
10809        IRExpr* ire_ge_flag2_or
10810          = binop(Iop_Or32, mkexpr(irt_ge_flag2),
10811                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
10812        IRExpr* ire_ge_flag3_or
10813          = binop(Iop_Or32, mkexpr(irt_ge_flag3),
10814                  binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
10815
10816        IRExpr* ire_ge_flags
10817          = binop( Iop_Or32,
10818                   binop(Iop_Or32,
10819                         binop(Iop_And32,
10820                               binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
10821                               mkU32(0x000000ff)),
10822                         binop(Iop_And32,
10823                               binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
10824                               mkU32(0x0000ff00))),
10825                   binop(Iop_Or32,
10826                         binop(Iop_And32,
10827                               binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
10828                               mkU32(0x00ff0000)),
10829                         binop(Iop_And32,
10830                               binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
10831                               mkU32(0xff000000))) );
10832
10833        IRExpr* ire_result
10834          = binop(Iop_Or32,
10835                  binop(Iop_And32,
10836                        isT ? getIRegT(regN) : getIRegA(regN),
10837                        ire_ge_flags ),
10838                  binop(Iop_And32,
10839                        isT ? getIRegT(regM) : getIRegA(regM),
10840                        unop(Iop_Not32, ire_ge_flags)));
10841
10842        if (isT)
10843           putIRegT( regD, ire_result, condT );
10844        else
10845           putIRegA( regD, ire_result, condT, Ijk_Boring );
10846
10847        DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10848        return True;
10849     }
10850     /* fall through */
10851   }
10852
10853   /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
10854   {
10855     UInt regD = 99, regN = 99, regM = 99, rotate = 99;
10856     Bool gate = False;
10857
10858     if (isT) {
10859        if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
10860           regN   = INSNT0(3,0);
10861           regD   = INSNT1(11,8);
10862           regM   = INSNT1(3,0);
10863           rotate = INSNT1(5,4);
10864           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10865              gate = True;
10866        }
10867     } else {
10868        if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
10869            INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
10870           regD   = INSNA(15,12);
10871           regN   = INSNA(19,16);
10872           regM   = INSNA(3,0);
10873           rotate = INSNA(11,10);
10874           if (regD != 15 && regN != 15 && regM != 15)
10875             gate = True;
10876        }
10877     }
10878
10879     if (gate) {
10880        IRTemp irt_regN = newTemp(Ity_I32);
10881        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10882
10883        IRTemp irt_regM = newTemp(Ity_I32);
10884        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10885
10886        IRTemp irt_rot = newTemp(Ity_I32);
10887        assign( irt_rot, binop(Iop_And32,
10888                               genROR32(irt_regM, 8 * rotate),
10889                               mkU32(0x00FF00FF)) );
10890
10891        IRExpr* resLo
10892           = binop(Iop_And32,
10893                   binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
10894                   mkU32(0x0000FFFF));
10895
10896        IRExpr* resHi
10897           = binop(Iop_Add32,
10898                   binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
10899                   binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
10900
10901        IRExpr* ire_result
10902           = binop( Iop_Or32, resHi, resLo );
10903
10904        if (isT)
10905           putIRegT( regD, ire_result, condT );
10906        else
10907           putIRegA( regD, ire_result, condT, Ijk_Boring );
10908
10909        DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
10910             nCC(conq), regD, regN, regM, 8 * rotate );
10911        return True;
10912     }
10913     /* fall through */
10914   }
10915
10916   /* --------------- usad8  Rd,Rn,Rm    ---------------- */
10917   /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
10918   {
10919     UInt rD = 99, rN = 99, rM = 99, rA = 99;
10920     Bool gate = False;
10921
10922     if (isT) {
10923       if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
10924           rN = INSNT0(3,0);
10925           rA = INSNT1(15,12);
10926           rD = INSNT1(11,8);
10927           rM = INSNT1(3,0);
10928           if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
10929              gate = True;
10930        }
10931     } else {
10932        if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
10933            INSNA(7,4)   == BITS4(0,0,0,1) ) {
10934           rD = INSNA(19,16);
10935           rA = INSNA(15,12);
10936           rM = INSNA(11,8);
10937           rN = INSNA(3,0);
10938           if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
10939              gate = True;
10940        }
10941     }
10942     /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
10943
10944     if (gate) {
10945        IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
10946        IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
10947        IRExpr* rAe = rA == 15 ? mkU32(0)
10948                               : (isT ? getIRegT(rA) : getIRegA(rA));
10949        IRExpr* res = binop(Iop_Add32,
10950                            binop(Iop_Sad8Ux4, rNe, rMe),
10951                            rAe);
10952        if (isT)
10953           putIRegT( rD, res, condT );
10954        else
10955           putIRegA( rD, res, condT, Ijk_Boring );
10956
10957        if (rA == 15) {
10958           DIP( "usad8%s r%u, r%u, r%u\n",
10959                nCC(conq), rD, rN, rM );
10960        } else {
10961           DIP( "usada8%s r%u, r%u, r%u, r%u\n",
10962                nCC(conq), rD, rN, rM, rA );
10963        }
10964        return True;
10965     }
10966     /* fall through */
10967   }
10968
10969   /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
10970   {
10971     UInt regD = 99, regN = 99, regM = 99;
10972     Bool gate = False;
10973
10974     if (isT) {
10975        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10976           regN = INSNT0(3,0);
10977           regD = INSNT1(11,8);
10978           regM = INSNT1(3,0);
10979           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10980              gate = True;
10981        }
10982     } else {
10983        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
10984            INSNA(11,8)  == BITS4(0,0,0,0)         &&
10985            INSNA(7,4)   == BITS4(0,1,0,1)) {
10986           regD = INSNA(15,12);
10987           regN = INSNA(19,16);
10988           regM = INSNA(3,0);
10989           if (regD != 15 && regN != 15 && regM != 15)
10990              gate = True;
10991        }
10992     }
10993
10994     if (gate) {
10995        IRTemp rNt   = newTemp(Ity_I32);
10996        IRTemp rMt   = newTemp(Ity_I32);
10997        IRTemp res_q = newTemp(Ity_I32);
10998
10999        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11000        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11001
11002        assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
11003        if (isT)
11004           putIRegT( regD, mkexpr(res_q), condT );
11005        else
11006           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11007
11008        or_into_QFLAG32(
11009           signed_overflow_after_Add32(
11010              binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11011           condT
11012        );
11013
11014        DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11015        return True;
11016     }
11017     /* fall through */
11018   }
11019
11020   /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
11021   {
11022     UInt regD = 99, regN = 99, regM = 99;
11023     Bool gate = False;
11024
11025     if (isT) {
11026        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
11027           regN = INSNT0(3,0);
11028           regD = INSNT1(11,8);
11029           regM = INSNT1(3,0);
11030           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11031              gate = True;
11032        }
11033     } else {
11034        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
11035            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11036            INSNA(7,4)   == BITS4(0,1,0,1)) {
11037           regD = INSNA(15,12);
11038           regN = INSNA(19,16);
11039           regM = INSNA(3,0);
11040           if (regD != 15 && regN != 15 && regM != 15)
11041              gate = True;
11042        }
11043     }
11044
11045     if (gate) {
11046        IRTemp rNt   = newTemp(Ity_I32);
11047        IRTemp rMt   = newTemp(Ity_I32);
11048        IRTemp rN_d  = newTemp(Ity_I32);
11049        IRTemp res_q = newTemp(Ity_I32);
11050
11051        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11052        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11053
11054        or_into_QFLAG32(
11055           signed_overflow_after_Add32(
11056              binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11057           condT
11058        );
11059
11060        assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11061        assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
11062        if (isT)
11063           putIRegT( regD, mkexpr(res_q), condT );
11064        else
11065           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11066
11067        or_into_QFLAG32(
11068           signed_overflow_after_Add32(
11069              binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11070           condT
11071        );
11072
11073        DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11074        return True;
11075     }
11076     /* fall through */
11077   }
11078
11079   /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
11080   {
11081     UInt regD = 99, regN = 99, regM = 99;
11082     Bool gate = False;
11083
11084     if (isT) {
11085        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
11086           regN = INSNT0(3,0);
11087           regD = INSNT1(11,8);
11088           regM = INSNT1(3,0);
11089           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11090              gate = True;
11091        }
11092     } else {
11093        if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
11094            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11095            INSNA(7,4)   == BITS4(0,1,0,1)) {
11096           regD = INSNA(15,12);
11097           regN = INSNA(19,16);
11098           regM = INSNA(3,0);
11099           if (regD != 15 && regN != 15 && regM != 15)
11100              gate = True;
11101        }
11102     }
11103
11104     if (gate) {
11105        IRTemp rNt   = newTemp(Ity_I32);
11106        IRTemp rMt   = newTemp(Ity_I32);
11107        IRTemp res_q = newTemp(Ity_I32);
11108
11109        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11110        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11111
11112        assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
11113        if (isT)
11114           putIRegT( regD, mkexpr(res_q), condT );
11115        else
11116           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11117
11118        or_into_QFLAG32(
11119           signed_overflow_after_Sub32(
11120              binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11121           condT
11122        );
11123
11124        DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11125        return True;
11126     }
11127     /* fall through */
11128   }
11129
11130   /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
11131   {
11132     UInt regD = 99, regN = 99, regM = 99;
11133     Bool gate = False;
11134
11135     if (isT) {
11136        if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
11137           regN = INSNT0(3,0);
11138           regD = INSNT1(11,8);
11139           regM = INSNT1(3,0);
11140           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11141              gate = True;
11142        }
11143     } else {
11144        if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
11145            INSNA(11,8)  == BITS4(0,0,0,0)         &&
11146            INSNA(7,4)   == BITS4(0,1,0,1)) {
11147           regD = INSNA(15,12);
11148           regN = INSNA(19,16);
11149           regM = INSNA(3,0);
11150           if (regD != 15 && regN != 15 && regM != 15)
11151              gate = True;
11152        }
11153     }
11154
11155     if (gate) {
11156        IRTemp rNt   = newTemp(Ity_I32);
11157        IRTemp rMt   = newTemp(Ity_I32);
11158        IRTemp rN_d  = newTemp(Ity_I32);
11159        IRTemp res_q = newTemp(Ity_I32);
11160
11161        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11162        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11163
11164        or_into_QFLAG32(
11165           signed_overflow_after_Add32(
11166              binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11167           condT
11168        );
11169
11170        assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11171        assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
11172        if (isT)
11173           putIRegT( regD, mkexpr(res_q), condT );
11174        else
11175           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11176
11177        or_into_QFLAG32(
11178           signed_overflow_after_Sub32(
11179              binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11180           condT
11181        );
11182
11183        DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11184        return True;
11185     }
11186     /* fall through */
11187   }
11188
11189   /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
11190   {
11191     UInt regD = 99, regN = 99, regM = 99;
11192     Bool gate = False;
11193
11194     if (isT) {
11195        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11196           regN = INSNT0(3,0);
11197           regD = INSNT1(11,8);
11198           regM = INSNT1(3,0);
11199           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11200              gate = True;
11201        }
11202     } else {
11203        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11204            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11205            INSNA(7,4)   == BITS4(0,1,1,1)) {
11206           regD = INSNA(15,12);
11207           regN = INSNA(19,16);
11208           regM = INSNA(3,0);
11209           if (regD != 15 && regN != 15 && regM != 15)
11210             gate = True;
11211        }
11212     }
11213
11214     if (gate) {
11215        IRTemp rNt   = newTemp(Ity_I32);
11216        IRTemp rMt   = newTemp(Ity_I32);
11217        IRTemp res_q = newTemp(Ity_I32);
11218
11219        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11220        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11221
11222        assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11223        if (isT)
11224           putIRegT( regD, mkexpr(res_q), condT );
11225        else
11226           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11227
11228        DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11229        return True;
11230     }
11231     /* fall through */
11232   }
11233
11234   /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
11235   {
11236     UInt regD = 99, regN = 99, regM = 99;
11237     Bool gate = False;
11238
11239     if (isT) {
11240        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11241           regN = INSNT0(3,0);
11242           regD = INSNT1(11,8);
11243           regM = INSNT1(3,0);
11244           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11245              gate = True;
11246        }
11247     } else {
11248        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11249            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11250            INSNA(7,4)   == BITS4(0,0,0,1)) {
11251           regD = INSNA(15,12);
11252           regN = INSNA(19,16);
11253           regM = INSNA(3,0);
11254           if (regD != 15 && regN != 15 && regM != 15)
11255              gate = True;
11256        }
11257     }
11258
11259     if (gate) {
11260        IRTemp rNt   = newTemp(Ity_I32);
11261        IRTemp rMt   = newTemp(Ity_I32);
11262        IRTemp res_q = newTemp(Ity_I32);
11263
11264        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11265        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11266
11267        assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
11268        if (isT)
11269           putIRegT( regD, mkexpr(res_q), condT );
11270        else
11271           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11272
11273        DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11274        return True;
11275     }
11276     /* fall through */
11277   }
11278
11279   /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11280   {
11281     UInt regD = 99, regN = 99, regM = 99;
11282     Bool gate = False;
11283
11284     if (isT) {
11285        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11286           regN = INSNT0(3,0);
11287           regD = INSNT1(11,8);
11288           regM = INSNT1(3,0);
11289           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11290              gate = True;
11291        }
11292     } else {
11293        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11294            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11295            INSNA(7,4)   == BITS4(1,1,1,1)) {
11296           regD = INSNA(15,12);
11297           regN = INSNA(19,16);
11298           regM = INSNA(3,0);
11299           if (regD != 15 && regN != 15 && regM != 15)
11300              gate = True;
11301        }
11302     }
11303
11304     if (gate) {
11305        IRTemp rNt   = newTemp(Ity_I32);
11306        IRTemp rMt   = newTemp(Ity_I32);
11307        IRTemp res_q = newTemp(Ity_I32);
11308
11309        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11310        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11311
11312        assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
11313        if (isT)
11314           putIRegT( regD, mkexpr(res_q), condT );
11315        else
11316           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11317
11318        DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11319        return True;
11320     }
11321     /* fall through */
11322   }
11323
11324   /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
11325   {
11326     UInt regD = 99, regN = 99, regM = 99;
11327     Bool gate = False;
11328
11329     if (isT) {
11330        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11331           regN = INSNT0(3,0);
11332           regD = INSNT1(11,8);
11333           regM = INSNT1(3,0);
11334           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11335              gate = True;
11336        }
11337     } else {
11338        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11339            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11340            INSNA(7,4)   == BITS4(0,1,1,1)) {
11341           regD = INSNA(15,12);
11342           regN = INSNA(19,16);
11343           regM = INSNA(3,0);
11344           if (regD != 15 && regN != 15 && regM != 15)
11345              gate = True;
11346        }
11347     }
11348
11349     if (gate) {
11350        IRTemp rNt   = newTemp(Ity_I32);
11351        IRTemp rMt   = newTemp(Ity_I32);
11352        IRTemp res_q = newTemp(Ity_I32);
11353
11354        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11355        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11356
11357        assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11358        if (isT)
11359           putIRegT( regD, mkexpr(res_q), condT );
11360        else
11361           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11362
11363        DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11364        return True;
11365     }
11366     /* fall through */
11367   }
11368
11369   /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
11370   {
11371     UInt regD = 99, regN = 99, regM = 99;
11372     Bool gate = False;
11373
11374     if (isT) {
11375        if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11376           regN = INSNT0(3,0);
11377           regD = INSNT1(11,8);
11378           regM = INSNT1(3,0);
11379           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11380              gate = True;
11381        }
11382     } else {
11383        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11384            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11385            INSNA(7,4)   == BITS4(0,0,0,1)) {
11386           regD = INSNA(15,12);
11387           regN = INSNA(19,16);
11388           regM = INSNA(3,0);
11389           if (regD != 15 && regN != 15 && regM != 15)
11390              gate = True;
11391        }
11392     }
11393
11394     if (gate) {
11395        IRTemp rNt   = newTemp(Ity_I32);
11396        IRTemp rMt   = newTemp(Ity_I32);
11397        IRTemp res_q = newTemp(Ity_I32);
11398
11399        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11400        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11401
11402        assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
11403        if (isT)
11404           putIRegT( regD, mkexpr(res_q), condT );
11405        else
11406           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11407
11408        DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11409        return True;
11410     }
11411     /* fall through */
11412   }
11413
11414   /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
11415   {
11416     UInt regD = 99, regN = 99, regM = 99;
11417     Bool gate = False;
11418
11419     if (isT) {
11420        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11421           regN = INSNT0(3,0);
11422           regD = INSNT1(11,8);
11423           regM = INSNT1(3,0);
11424           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11425              gate = True;
11426        }
11427     } else {
11428        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11429            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11430            INSNA(7,4)   == BITS4(0,1,0,1)) {
11431           regD = INSNA(15,12);
11432           regN = INSNA(19,16);
11433           regM = INSNA(3,0);
11434           if (regD != 15 && regN != 15 && regM != 15)
11435              gate = True;
11436        }
11437     }
11438
11439     if (gate) {
11440        IRTemp irt_regN     = newTemp(Ity_I32);
11441        IRTemp irt_regM     = newTemp(Ity_I32);
11442        IRTemp irt_sum      = newTemp(Ity_I32);
11443        IRTemp irt_diff     = newTemp(Ity_I32);
11444        IRTemp irt_sum_res  = newTemp(Ity_I32);
11445        IRTemp irt_diff_res = newTemp(Ity_I32);
11446
11447        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11448        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11449
11450        assign( irt_diff,
11451                binop( Iop_Sub32,
11452                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11453                       binop( Iop_Shr32,
11454                              binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
11455                              mkU8(16) ) ) );
11456        armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
11457
11458        assign( irt_sum,
11459                binop( Iop_Add32,
11460                       binop( Iop_Shr32,
11461                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11462                              mkU8(16) ),
11463                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
11464        armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
11465
11466        IRExpr* ire_result = binop( Iop_Or32,
11467                                    binop( Iop_Shl32, mkexpr(irt_diff_res),
11468                                           mkU8(16) ),
11469                                    binop( Iop_And32, mkexpr(irt_sum_res),
11470                                           mkU32(0xFFFF)) );
11471
11472        if (isT)
11473           putIRegT( regD, ire_result, condT );
11474        else
11475           putIRegA( regD, ire_result, condT, Ijk_Boring );
11476
11477        DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11478        return True;
11479     }
11480     /* fall through */
11481   }
11482
11483   /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11484   {
11485     UInt regD = 99, regN = 99, regM = 99;
11486     Bool gate = False;
11487
11488     if (isT) {
11489        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11490           regN = INSNT0(3,0);
11491           regD = INSNT1(11,8);
11492           regM = INSNT1(3,0);
11493           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11494              gate = True;
11495        }
11496     } else {
11497        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11498            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11499            INSNA(7,4)   == BITS4(0,0,1,1)) {
11500           regD = INSNA(15,12);
11501           regN = INSNA(19,16);
11502           regM = INSNA(3,0);
11503           if (regD != 15 && regN != 15 && regM != 15)
11504              gate = True;
11505        }
11506     }
11507
11508     if (gate) {
11509        IRTemp irt_regN     = newTemp(Ity_I32);
11510        IRTemp irt_regM     = newTemp(Ity_I32);
11511        IRTemp irt_sum      = newTemp(Ity_I32);
11512        IRTemp irt_diff     = newTemp(Ity_I32);
11513        IRTemp irt_res_sum  = newTemp(Ity_I32);
11514        IRTemp irt_res_diff = newTemp(Ity_I32);
11515
11516        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11517        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11518
11519        assign( irt_diff,
11520                binop( Iop_Sub32,
11521                       binop( Iop_Shr32,
11522                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11523                              mkU8(16) ),
11524                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11525        armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
11526
11527        assign( irt_sum,
11528                binop( Iop_Add32,
11529                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11530                       binop( Iop_Shr32,
11531                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11532                              mkU8(16) ) ) );
11533        armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
11534
11535        IRExpr* ire_result
11536          = binop( Iop_Or32,
11537                   binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
11538                   binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
11539
11540        if (isT)
11541           putIRegT( regD, ire_result, condT );
11542        else
11543           putIRegA( regD, ire_result, condT, Ijk_Boring );
11544
11545        DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11546        return True;
11547     }
11548     /* fall through */
11549   }
11550
11551   /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
11552   {
11553     UInt regD = 99, regN = 99, regM = 99;
11554     Bool gate = False;
11555
11556     if (isT) {
11557        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11558           regN = INSNT0(3,0);
11559           regD = INSNT1(11,8);
11560           regM = INSNT1(3,0);
11561           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11562              gate = True;
11563        }
11564     } else {
11565        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11566            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11567            INSNA(7,4)   == BITS4(0,1,0,1)) {
11568           regD = INSNA(15,12);
11569           regN = INSNA(19,16);
11570           regM = INSNA(3,0);
11571           if (regD != 15 && regN != 15 && regM != 15)
11572              gate = True;
11573        }
11574     }
11575
11576     if (gate) {
11577        IRTemp irt_regN = newTemp(Ity_I32);
11578        IRTemp irt_regM = newTemp(Ity_I32);
11579        IRTemp irt_sum  = newTemp(Ity_I32);
11580        IRTemp irt_diff = newTemp(Ity_I32);
11581
11582        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11583        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11584
11585        assign( irt_sum,
11586                binop( Iop_Add32,
11587                       unop( Iop_16Uto32,
11588                             unop( Iop_32to16, mkexpr(irt_regN) )
11589                       ),
11590                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11591
11592        assign( irt_diff,
11593                binop( Iop_Sub32,
11594                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11595                       unop( Iop_16Uto32,
11596                             unop( Iop_32to16, mkexpr(irt_regM) )
11597                       )
11598                )
11599        );
11600
11601        IRExpr* ire_result
11602          = binop( Iop_Or32,
11603                   binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11604                   binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11605
11606        IRTemp ge10 = newTemp(Ity_I32);
11607        assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
11608                                         mkU32(0x10000), mkexpr(irt_sum) ),
11609                                  mkU32(1), mkU32(0) ) );
11610        put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
11611        put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
11612
11613        IRTemp ge32 = newTemp(Ity_I32);
11614        assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11615        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11616        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11617
11618        if (isT)
11619           putIRegT( regD, ire_result, condT );
11620        else
11621           putIRegA( regD, ire_result, condT, Ijk_Boring );
11622
11623        DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11624        return True;
11625     }
11626     /* fall through */
11627   }
11628
11629   /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11630   {
11631     UInt regD = 99, regN = 99, regM = 99;
11632     Bool gate = False;
11633
11634     if (isT) {
11635        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11636           regN = INSNT0(3,0);
11637           regD = INSNT1(11,8);
11638           regM = INSNT1(3,0);
11639           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11640              gate = True;
11641        }
11642     } else {
11643        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11644            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11645            INSNA(7,4)   == BITS4(0,0,1,1)) {
11646           regD = INSNA(15,12);
11647           regN = INSNA(19,16);
11648           regM = INSNA(3,0);
11649           if (regD != 15 && regN != 15 && regM != 15)
11650              gate = True;
11651        }
11652     }
11653
11654     if (gate) {
11655        IRTemp irt_regN = newTemp(Ity_I32);
11656        IRTemp irt_regM = newTemp(Ity_I32);
11657        IRTemp irt_sum  = newTemp(Ity_I32);
11658        IRTemp irt_diff = newTemp(Ity_I32);
11659
11660        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11661        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11662
11663        assign( irt_diff,
11664                binop( Iop_Sub32,
11665                       unop( Iop_16Uto32,
11666                             unop( Iop_32to16, mkexpr(irt_regN) )
11667                       ),
11668                       binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11669
11670        assign( irt_sum,
11671                binop( Iop_Add32,
11672                       binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11673                       unop( Iop_16Uto32,
11674                             unop( Iop_32to16, mkexpr(irt_regM) )
11675                       ) ) );
11676
11677        IRExpr* ire_result
11678          = binop( Iop_Or32,
11679                   binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
11680                   binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
11681
11682        IRTemp ge10 = newTemp(Ity_I32);
11683        assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
11684        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11685        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11686
11687        IRTemp ge32 = newTemp(Ity_I32);
11688        assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
11689                                         mkU32(0x10000), mkexpr(irt_sum) ),
11690                                  mkU32(1), mkU32(0) ) );
11691        put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
11692        put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
11693
11694        if (isT)
11695           putIRegT( regD, ire_result, condT );
11696        else
11697           putIRegA( regD, ire_result, condT, Ijk_Boring );
11698
11699        DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11700        return True;
11701     }
11702     /* fall through */
11703   }
11704
11705   /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
11706   {
11707     UInt regD = 99, regN = 99, regM = 99;
11708     Bool gate = False;
11709
11710     if (isT) {
11711        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
11712           regN = INSNT0(3,0);
11713           regD = INSNT1(11,8);
11714           regM = INSNT1(3,0);
11715           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11716              gate = True;
11717        }
11718     } else {
11719        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
11720            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11721            INSNA(7,4)   == BITS4(0,1,0,1)) {
11722           regD = INSNA(15,12);
11723           regN = INSNA(19,16);
11724           regM = INSNA(3,0);
11725           if (regD != 15 && regN != 15 && regM != 15)
11726              gate = True;
11727        }
11728     }
11729
11730     if (gate) {
11731        IRTemp irt_regN = newTemp(Ity_I32);
11732        IRTemp irt_regM = newTemp(Ity_I32);
11733        IRTemp irt_sum  = newTemp(Ity_I32);
11734        IRTemp irt_diff = newTemp(Ity_I32);
11735
11736        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11737        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11738
11739        assign( irt_sum,
11740                binop( Iop_Add32,
11741                       binop( Iop_Sar32,
11742                              binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11743                              mkU8(16) ),
11744                       binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
11745
11746        assign( irt_diff,
11747                binop( Iop_Sub32,
11748                       binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
11749                       binop( Iop_Sar32,
11750                              binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11751                              mkU8(16) ) ) );
11752
11753        IRExpr* ire_result
11754          = binop( Iop_Or32,
11755                   binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11756                   binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11757
11758        IRTemp ge10 = newTemp(Ity_I32);
11759        assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
11760        put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11761        put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11762
11763        IRTemp ge32 = newTemp(Ity_I32);
11764        assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11765        put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11766        put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11767
11768        if (isT)
11769           putIRegT( regD, ire_result, condT );
11770        else
11771           putIRegA( regD, ire_result, condT, Ijk_Boring );
11772
11773        DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11774        return True;
11775     }
11776     /* fall through */
11777   }
11778
11779   /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11780   {
11781     UInt regD = 99, regN = 99, regM = 99;
11782     Bool gate = False;
11783
11784     if (isT) {
11785        if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11786           regN = INSNT0(3,0);
11787           regD = INSNT1(11,8);
11788           regM = INSNT1(3,0);
11789           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11790              gate = True;
11791        }
11792     } else {
11793        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11794            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11795            INSNA(7,4)   == BITS4(1,1,1,1)) {
11796           regD = INSNA(15,12);
11797           regN = INSNA(19,16);
11798           regM = INSNA(3,0);
11799           if (regD != 15 && regN != 15 && regM != 15)
11800              gate = True;
11801        }
11802     }
11803
11804     if (gate) {
11805        IRTemp rNt   = newTemp(Ity_I32);
11806        IRTemp rMt   = newTemp(Ity_I32);
11807        IRTemp res_q = newTemp(Ity_I32);
11808
11809        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11810        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11811
11812        assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
11813        if (isT)
11814           putIRegT( regD, mkexpr(res_q), condT );
11815        else
11816           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11817
11818        DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11819        return True;
11820     }
11821     /* fall through */
11822   }
11823
11824   /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
11825   {
11826     UInt regD = 99, regN = 99, regM = 99, rotate = 99;
11827     Bool gate = False;
11828
11829     if (isT) {
11830        if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
11831           regN   = INSNT0(3,0);
11832           regD   = INSNT1(11,8);
11833           regM   = INSNT1(3,0);
11834           rotate = INSNT1(5,4);
11835           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11836              gate = True;
11837        }
11838     } else {
11839        if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
11840            INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
11841           regD   = INSNA(15,12);
11842           regN   = INSNA(19,16);
11843           regM   = INSNA(3,0);
11844           rotate = INSNA(11,10);
11845           if (regD != 15 && regN != 15 && regM != 15)
11846             gate = True;
11847        }
11848     }
11849
11850     if (gate) {
11851        IRTemp irt_regN = newTemp(Ity_I32);
11852        assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11853
11854        IRTemp irt_regM = newTemp(Ity_I32);
11855        assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11856
11857        IRTemp irt_rot = newTemp(Ity_I32);
11858        assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
11859
11860        /* FIXME Maybe we can write this arithmetic in shorter form. */
11861        IRExpr* resLo
11862           = binop(Iop_And32,
11863                   binop(Iop_Add32,
11864                         mkexpr(irt_regN),
11865                         unop(Iop_16Uto32,
11866                              unop(Iop_8Sto16,
11867                                   unop(Iop_32to8, mkexpr(irt_rot))))),
11868                   mkU32(0x0000FFFF));
11869
11870        IRExpr* resHi
11871           = binop(Iop_And32,
11872                   binop(Iop_Add32,
11873                         mkexpr(irt_regN),
11874                         binop(Iop_Shl32,
11875                               unop(Iop_16Uto32,
11876                                    unop(Iop_8Sto16,
11877                                         unop(Iop_32to8,
11878                                              binop(Iop_Shr32,
11879                                                    mkexpr(irt_rot),
11880                                                    mkU8(16))))),
11881                               mkU8(16))),
11882                   mkU32(0xFFFF0000));
11883
11884        IRExpr* ire_result
11885           = binop( Iop_Or32, resHi, resLo );
11886
11887        if (isT)
11888           putIRegT( regD, ire_result, condT );
11889        else
11890           putIRegA( regD, ire_result, condT, Ijk_Boring );
11891
11892        DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
11893             nCC(conq), regD, regN, regM, 8 * rotate );
11894        return True;
11895     }
11896     /* fall through */
11897   }
11898
11899   /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11900   {
11901     UInt regD = 99, regN = 99, regM = 99;
11902     Bool gate = False;
11903
11904     if (isT) {
11905        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11906           regN = INSNT0(3,0);
11907           regD = INSNT1(11,8);
11908           regM = INSNT1(3,0);
11909           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11910              gate = True;
11911        }
11912     } else {
11913        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11914            INSNA(11,8)  == BITS4(1,1,1,1)         &&
11915            INSNA(7,4)   == BITS4(0,0,1,1)) {
11916           regD = INSNA(15,12);
11917           regN = INSNA(19,16);
11918           regM = INSNA(3,0);
11919           if (regD != 15 && regN != 15 && regM != 15)
11920              gate = True;
11921        }
11922     }
11923
11924     if (gate) {
11925        IRTemp rNt   = newTemp(Ity_I32);
11926        IRTemp rMt   = newTemp(Ity_I32);
11927        IRTemp irt_diff  = newTemp(Ity_I32);
11928        IRTemp irt_sum   = newTemp(Ity_I32);
11929        IRTemp res_q = newTemp(Ity_I32);
11930
11931        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11932        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11933
11934        assign( irt_diff,
11935                binop(Iop_Sub32,
11936                      unop(Iop_16Sto32,
11937                           unop(Iop_32to16,
11938                                mkexpr(rNt)
11939                           )
11940                      ),
11941                      unop(Iop_16Sto32,
11942                           unop(Iop_32to16,
11943                                binop(Iop_Shr32,
11944                                      mkexpr(rMt), mkU8(16)
11945                                )
11946                           )
11947                      )
11948                )
11949        );
11950
11951        assign( irt_sum,
11952                binop(Iop_Add32,
11953                      unop(Iop_16Sto32,
11954                           unop(Iop_32to16,
11955                                binop(Iop_Shr32,
11956                                      mkexpr(rNt), mkU8(16)
11957                                )
11958                           )
11959                      ),
11960                      unop(Iop_16Sto32,
11961                           unop(Iop_32to16, mkexpr(rMt)
11962                           )
11963                      )
11964                )
11965        );
11966
11967        assign( res_q,
11968                binop(Iop_Or32,
11969                      unop(Iop_16Uto32,
11970                           unop(Iop_32to16,
11971                                binop(Iop_Shr32,
11972                                      mkexpr(irt_diff), mkU8(1)
11973                                )
11974                           )
11975                      ),
11976                      binop(Iop_Shl32,
11977                            binop(Iop_Shr32,
11978                                  mkexpr(irt_sum), mkU8(1)
11979                            ),
11980                            mkU8(16)
11981                     )
11982                )
11983        );
11984
11985        if (isT)
11986           putIRegT( regD, mkexpr(res_q), condT );
11987        else
11988           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11989
11990        DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11991        return True;
11992     }
11993     /* fall through */
11994   }
11995
11996   /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11997   {
11998     UInt regD = 99, regN = 99, regM = 99;
11999     Bool gate = False;
12000
12001     if (isT) {
12002        if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12003           regN = INSNT0(3,0);
12004           regD = INSNT1(11,8);
12005           regM = INSNT1(3,0);
12006           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12007              gate = True;
12008        }
12009     } else {
12010        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12011            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12012            INSNA(7,4)   == BITS4(0,0,1,1)) {
12013           regD = INSNA(15,12);
12014           regN = INSNA(19,16);
12015           regM = INSNA(3,0);
12016           if (regD != 15 && regN != 15 && regM != 15)
12017              gate = True;
12018        }
12019     }
12020
12021     if (gate) {
12022        IRTemp rNt   = newTemp(Ity_I32);
12023        IRTemp rMt   = newTemp(Ity_I32);
12024        IRTemp irt_diff  = newTemp(Ity_I32);
12025        IRTemp irt_sum   = newTemp(Ity_I32);
12026        IRTemp res_q = newTemp(Ity_I32);
12027
12028        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12029        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12030
12031        assign( irt_diff,
12032                binop(Iop_Sub32,
12033                      unop(Iop_16Uto32,
12034                           unop(Iop_32to16,
12035                                mkexpr(rNt)
12036                           )
12037                      ),
12038                      unop(Iop_16Uto32,
12039                           unop(Iop_32to16,
12040                                binop(Iop_Shr32,
12041                                      mkexpr(rMt), mkU8(16)
12042                                )
12043                           )
12044                      )
12045                )
12046        );
12047
12048        assign( irt_sum,
12049                binop(Iop_Add32,
12050                      unop(Iop_16Uto32,
12051                           unop(Iop_32to16,
12052                                binop(Iop_Shr32,
12053                                      mkexpr(rNt), mkU8(16)
12054                                )
12055                           )
12056                      ),
12057                      unop(Iop_16Uto32,
12058                           unop(Iop_32to16, mkexpr(rMt)
12059                           )
12060                      )
12061                )
12062        );
12063
12064        assign( res_q,
12065                binop(Iop_Or32,
12066                      unop(Iop_16Uto32,
12067                           unop(Iop_32to16,
12068                                binop(Iop_Shr32,
12069                                      mkexpr(irt_diff), mkU8(1)
12070                                )
12071                           )
12072                      ),
12073                      binop(Iop_Shl32,
12074                            binop(Iop_Shr32,
12075                                  mkexpr(irt_sum), mkU8(1)
12076                            ),
12077                            mkU8(16)
12078                     )
12079                )
12080        );
12081
12082        if (isT)
12083           putIRegT( regD, mkexpr(res_q), condT );
12084        else
12085           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12086
12087        DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12088        return True;
12089     }
12090     /* fall through */
12091   }
12092
12093   /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12094   {
12095     UInt regD = 99, regN = 99, regM = 99;
12096     Bool gate = False;
12097
12098     if (isT) {
12099        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12100           regN = INSNT0(3,0);
12101           regD = INSNT1(11,8);
12102           regM = INSNT1(3,0);
12103           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12104              gate = True;
12105        }
12106     } else {
12107        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12108            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12109            INSNA(7,4)   == BITS4(0,1,0,1)) {
12110           regD = INSNA(15,12);
12111           regN = INSNA(19,16);
12112           regM = INSNA(3,0);
12113           if (regD != 15 && regN != 15 && regM != 15)
12114              gate = True;
12115        }
12116     }
12117
12118     if (gate) {
12119        IRTemp rNt   = newTemp(Ity_I32);
12120        IRTemp rMt   = newTemp(Ity_I32);
12121        IRTemp irt_diff  = newTemp(Ity_I32);
12122        IRTemp irt_sum   = newTemp(Ity_I32);
12123        IRTemp res_q = newTemp(Ity_I32);
12124
12125        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12126        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12127
12128        assign( irt_sum,
12129                binop(Iop_Add32,
12130                      unop(Iop_16Sto32,
12131                           unop(Iop_32to16,
12132                                mkexpr(rNt)
12133                           )
12134                      ),
12135                      unop(Iop_16Sto32,
12136                           unop(Iop_32to16,
12137                                binop(Iop_Shr32,
12138                                      mkexpr(rMt), mkU8(16)
12139                                )
12140                           )
12141                      )
12142                )
12143        );
12144
12145        assign( irt_diff,
12146                binop(Iop_Sub32,
12147                      unop(Iop_16Sto32,
12148                           unop(Iop_32to16,
12149                                binop(Iop_Shr32,
12150                                      mkexpr(rNt), mkU8(16)
12151                                )
12152                           )
12153                      ),
12154                      unop(Iop_16Sto32,
12155                           unop(Iop_32to16, mkexpr(rMt)
12156                           )
12157                      )
12158                )
12159        );
12160
12161        assign( res_q,
12162                binop(Iop_Or32,
12163                      unop(Iop_16Uto32,
12164                           unop(Iop_32to16,
12165                                binop(Iop_Shr32,
12166                                      mkexpr(irt_sum), mkU8(1)
12167                                )
12168                           )
12169                      ),
12170                      binop(Iop_Shl32,
12171                            binop(Iop_Shr32,
12172                                  mkexpr(irt_diff), mkU8(1)
12173                            ),
12174                            mkU8(16)
12175                     )
12176                )
12177        );
12178
12179        if (isT)
12180           putIRegT( regD, mkexpr(res_q), condT );
12181        else
12182           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12183
12184        DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12185        return True;
12186     }
12187     /* fall through */
12188   }
12189
12190   /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12191   {
12192     UInt regD = 99, regN = 99, regM = 99;
12193     Bool gate = False;
12194
12195     if (isT) {
12196        if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12197           regN = INSNT0(3,0);
12198           regD = INSNT1(11,8);
12199           regM = INSNT1(3,0);
12200           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12201              gate = True;
12202        }
12203     } else {
12204        if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12205            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12206            INSNA(7,4)   == BITS4(0,1,0,1)) {
12207           regD = INSNA(15,12);
12208           regN = INSNA(19,16);
12209           regM = INSNA(3,0);
12210           if (regD != 15 && regN != 15 && regM != 15)
12211              gate = True;
12212        }
12213     }
12214
12215     if (gate) {
12216        IRTemp rNt   = newTemp(Ity_I32);
12217        IRTemp rMt   = newTemp(Ity_I32);
12218        IRTemp irt_diff  = newTemp(Ity_I32);
12219        IRTemp irt_sum   = newTemp(Ity_I32);
12220        IRTemp res_q = newTemp(Ity_I32);
12221
12222        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12223        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12224
12225        assign( irt_sum,
12226                binop(Iop_Add32,
12227                      unop(Iop_16Uto32,
12228                           unop(Iop_32to16,
12229                                mkexpr(rNt)
12230                           )
12231                      ),
12232                      unop(Iop_16Uto32,
12233                           unop(Iop_32to16,
12234                                binop(Iop_Shr32,
12235                                      mkexpr(rMt), mkU8(16)
12236                                )
12237                           )
12238                      )
12239                )
12240        );
12241
12242        assign( irt_diff,
12243                binop(Iop_Sub32,
12244                      unop(Iop_16Uto32,
12245                           unop(Iop_32to16,
12246                                binop(Iop_Shr32,
12247                                      mkexpr(rNt), mkU8(16)
12248                                )
12249                           )
12250                      ),
12251                      unop(Iop_16Uto32,
12252                           unop(Iop_32to16, mkexpr(rMt)
12253                           )
12254                      )
12255                )
12256        );
12257
12258        assign( res_q,
12259                binop(Iop_Or32,
12260                      unop(Iop_16Uto32,
12261                           unop(Iop_32to16,
12262                                binop(Iop_Shr32,
12263                                      mkexpr(irt_sum), mkU8(1)
12264                                )
12265                           )
12266                      ),
12267                      binop(Iop_Shl32,
12268                            binop(Iop_Shr32,
12269                                  mkexpr(irt_diff), mkU8(1)
12270                            ),
12271                            mkU8(16)
12272                     )
12273                )
12274        );
12275
12276        if (isT)
12277           putIRegT( regD, mkexpr(res_q), condT );
12278        else
12279           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12280
12281        DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12282        return True;
12283     }
12284     /* fall through */
12285   }
12286
12287   /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
12288   {
12289     UInt regD = 99, regN = 99, regM = 99;
12290     Bool gate = False;
12291
12292     if (isT) {
12293        if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12294           regN = INSNT0(3,0);
12295           regD = INSNT1(11,8);
12296           regM = INSNT1(3,0);
12297           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12298              gate = True;
12299        }
12300     } else {
12301        if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12302            INSNA(11,8)  == BITS4(1,1,1,1)         &&
12303            INSNA(7,4)   == BITS4(0,1,1,1)) {
12304           regD = INSNA(15,12);
12305           regN = INSNA(19,16);
12306           regM = INSNA(3,0);
12307           if (regD != 15 && regN != 15 && regM != 15)
12308              gate = True;
12309        }
12310     }
12311
12312     if (gate) {
12313        IRTemp rNt   = newTemp(Ity_I32);
12314        IRTemp rMt   = newTemp(Ity_I32);
12315        IRTemp res_q = newTemp(Ity_I32);
12316
12317        assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12318        assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12319
12320        assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
12321        if (isT)
12322           putIRegT( regD, mkexpr(res_q), condT );
12323        else
12324           putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12325
12326        DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12327        return True;
12328     }
12329     /* fall through */
12330   }
12331
12332   /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
12333   {
12334     UInt rD = 99, rN = 99, rM = 99, rA = 99;
12335     Bool round  = False;
12336     Bool gate   = False;
12337
12338     if (isT) {
12339        if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
12340            && INSNT0(6,4) == BITS3(1,1,0)
12341            && INSNT1(7,5) == BITS3(0,0,0)) {
12342           round = INSNT1(4,4);
12343           rA    = INSNT1(15,12);
12344           rD    = INSNT1(11,8);
12345           rM    = INSNT1(3,0);
12346           rN    = INSNT0(3,0);
12347           if (!isBadRegT(rD)
12348               && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
12349              gate = True;
12350        }
12351     } else {
12352        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
12353            && INSNA(15,12) != BITS4(1,1,1,1)
12354            && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
12355           round = INSNA(5,5);
12356           rD    = INSNA(19,16);
12357           rA    = INSNA(15,12);
12358           rM    = INSNA(11,8);
12359           rN    = INSNA(3,0);
12360           if (rD != 15 && rM != 15 && rN != 15)
12361              gate = True;
12362        }
12363     }
12364     if (gate) {
12365        IRTemp irt_rA   = newTemp(Ity_I32);
12366        IRTemp irt_rN   = newTemp(Ity_I32);
12367        IRTemp irt_rM   = newTemp(Ity_I32);
12368        assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
12369        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12370        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12371        IRExpr* res
12372        = unop(Iop_64HIto32,
12373               binop(Iop_Add64,
12374                     binop(Iop_Sub64,
12375                           binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
12376                           binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
12377                     mkU64(round ? 0x80000000ULL : 0ULL)));
12378        if (isT)
12379           putIRegT( rD, res, condT );
12380        else
12381           putIRegA(rD, res, condT, Ijk_Boring);
12382        DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
12383            round ? "r" : "", nCC(conq), rD, rN, rM, rA);
12384        return True;
12385     }
12386     /* fall through */
12387   }
12388
12389   /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12390   {
12391     UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12392     Bool m_swap = False;
12393     Bool gate   = False;
12394
12395     if (isT) {
12396        if (INSNT0(15,4) == 0xFBC &&
12397            (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
12398           rN     = INSNT0(3,0);
12399           rDlo   = INSNT1(15,12);
12400           rDhi   = INSNT1(11,8);
12401           rM     = INSNT1(3,0);
12402           m_swap = (INSNT1(4,4) & 1) == 1;
12403           if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
12404               && !isBadRegT(rM) && rDhi != rDlo)
12405              gate = True;
12406        }
12407     } else {
12408        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
12409            && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
12410           rN     = INSNA(3,0);
12411           rDlo   = INSNA(15,12);
12412           rDhi   = INSNA(19,16);
12413           rM     = INSNA(11,8);
12414           m_swap = ( INSNA(5,5) & 1 ) == 1;
12415           if (rDlo != 15 && rDhi != 15
12416               && rN != 15 && rM != 15 && rDlo != rDhi)
12417              gate = True;
12418        }
12419     }
12420
12421     if (gate) {
12422        IRTemp irt_rM   = newTemp(Ity_I32);
12423        IRTemp irt_rN   = newTemp(Ity_I32);
12424        IRTemp irt_rDhi = newTemp(Ity_I32);
12425        IRTemp irt_rDlo = newTemp(Ity_I32);
12426        IRTemp op_2     = newTemp(Ity_I32);
12427        IRTemp pr_1     = newTemp(Ity_I64);
12428        IRTemp pr_2     = newTemp(Ity_I64);
12429        IRTemp result   = newTemp(Ity_I64);
12430        IRTemp resHi    = newTemp(Ity_I32);
12431        IRTemp resLo    = newTemp(Ity_I32);
12432        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
12433        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
12434        assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
12435        assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
12436        assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12437        assign( pr_1, binop(Iop_MullS32,
12438                            unop(Iop_16Sto32,
12439                                 unop(Iop_32to16, mkexpr(irt_rN))
12440                            ),
12441                            unop(Iop_16Sto32,
12442                                 unop(Iop_32to16, mkexpr(op_2))
12443                            )
12444                      )
12445        );
12446        assign( pr_2, binop(Iop_MullS32,
12447                            binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12448                            binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12449                      )
12450        );
12451        assign( result, binop(Iop_Add64,
12452                              binop(Iop_Add64,
12453                                    mkexpr(pr_1),
12454                                    mkexpr(pr_2)
12455                              ),
12456                              binop(Iop_32HLto64,
12457                                    mkexpr(irt_rDhi),
12458                                    mkexpr(irt_rDlo)
12459                              )
12460                        )
12461        );
12462        assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12463        assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12464        if (isT) {
12465           putIRegT( rDhi, mkexpr(resHi), condT );
12466           putIRegT( rDlo, mkexpr(resLo), condT );
12467        } else {
12468           putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12469           putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12470        }
12471        DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
12472            m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12473        return True;
12474     }
12475     /* fall through */
12476   }
12477
12478   /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12479   {
12480     UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12481     Bool m_swap = False;
12482     Bool gate   = False;
12483
12484     if (isT) {
12485        if ((INSNT0(15,4) == 0xFBD &&
12486            (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
12487           rN     = INSNT0(3,0);
12488           rDlo   = INSNT1(15,12);
12489           rDhi   = INSNT1(11,8);
12490           rM     = INSNT1(3,0);
12491           m_swap = (INSNT1(4,4) & 1) == 1;
12492           if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
12493               !isBadRegT(rM) && rDhi != rDlo)
12494              gate = True;
12495        }
12496     } else {
12497        if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
12498            (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
12499           rN     = INSNA(3,0);
12500           rDlo   = INSNA(15,12);
12501           rDhi   = INSNA(19,16);
12502           rM     = INSNA(11,8);
12503           m_swap = (INSNA(5,5) & 1) == 1;
12504           if (rDlo != 15 && rDhi != 15 &&
12505               rN != 15 && rM != 15 && rDlo != rDhi)
12506              gate = True;
12507        }
12508     }
12509     if (gate) {
12510        IRTemp irt_rM   = newTemp(Ity_I32);
12511        IRTemp irt_rN   = newTemp(Ity_I32);
12512        IRTemp irt_rDhi = newTemp(Ity_I32);
12513        IRTemp irt_rDlo = newTemp(Ity_I32);
12514        IRTemp op_2     = newTemp(Ity_I32);
12515        IRTemp pr_1     = newTemp(Ity_I64);
12516        IRTemp pr_2     = newTemp(Ity_I64);
12517        IRTemp result   = newTemp(Ity_I64);
12518        IRTemp resHi    = newTemp(Ity_I32);
12519        IRTemp resLo    = newTemp(Ity_I32);
12520        assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12521        assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12522        assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
12523        assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
12524        assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12525        assign( pr_1, binop(Iop_MullS32,
12526                            unop(Iop_16Sto32,
12527                                 unop(Iop_32to16, mkexpr(irt_rN))
12528                            ),
12529                            unop(Iop_16Sto32,
12530                                 unop(Iop_32to16, mkexpr(op_2))
12531                            )
12532                      )
12533        );
12534        assign( pr_2, binop(Iop_MullS32,
12535                            binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12536                            binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12537                      )
12538        );
12539        assign( result, binop(Iop_Add64,
12540                              binop(Iop_Sub64,
12541                                    mkexpr(pr_1),
12542                                    mkexpr(pr_2)
12543                              ),
12544                              binop(Iop_32HLto64,
12545                                    mkexpr(irt_rDhi),
12546                                    mkexpr(irt_rDlo)
12547                              )
12548                        )
12549        );
12550        assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12551        assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12552        if (isT) {
12553           putIRegT( rDhi, mkexpr(resHi), condT );
12554           putIRegT( rDlo, mkexpr(resLo), condT );
12555        } else {
12556           putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12557           putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12558        }
12559        DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
12560            m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12561        return True;
12562     }
12563     /* fall through */
12564   }
12565
12566   /* ---------- Doesn't match anything. ---------- */
12567   return False;
12568
12569#  undef INSNA
12570#  undef INSNT0
12571#  undef INSNT1
12572}
12573
12574
12575/*------------------------------------------------------------*/
12576/*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
12577/*------------------------------------------------------------*/
12578
12579/* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
12580   unconditional, so the caller must produce a jump-around before
12581   calling this, if the insn is to be conditional.  Caller is
12582   responsible for all validation of parameters.  For LDMxx, if PC is
12583   amongst the values loaded, caller is also responsible for
12584   generating the jump. */
12585static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
12586                         UInt rN,      /* base reg */
12587                         UInt bINC,    /* 1: inc,  0: dec */
12588                         UInt bBEFORE, /* 1: inc/dec before, 0: after */
12589                         UInt bW,      /* 1: writeback to Rn */
12590                         UInt bL,      /* 1: load, 0: store */
12591                         UInt regList )
12592{
12593   Int i, r, m, nRegs;
12594   IRTemp jk = Ijk_Boring;
12595
12596   /* Get hold of the old Rn value.  We might need to write its value
12597      to memory during a store, and if it's also the writeback
12598      register then we need to get its value now.  We can't treat it
12599      exactly like the other registers we're going to transfer,
12600      because for xxMDA and xxMDB writeback forms, the generated IR
12601      updates Rn in the guest state before any transfers take place.
12602      We have to do this as per comments below, in order that if Rn is
12603      the stack pointer then it always has a value is below or equal
12604      to any of the transfer addresses.  Ick. */
12605   IRTemp oldRnT = newTemp(Ity_I32);
12606   assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
12607
12608   IRTemp anchorT = newTemp(Ity_I32);
12609   /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
12610      ignore the bottom two bits of the address.  However, Cortex-A8
12611      doesn't seem to care.  Hence: */
12612   /* No .. don't force alignment .. */
12613   /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
12614   /* Instead, use the potentially misaligned address directly. */
12615   assign(anchorT, mkexpr(oldRnT));
12616
12617   IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
12618   // bINC == 1:  xxMIA, xxMIB
12619   // bINC == 0:  xxMDA, xxMDB
12620
12621   // For xxMDA and xxMDB, update Rn first if necessary.  We have
12622   // to do this first so that, for the common idiom of the transfers
12623   // faulting because we're pushing stuff onto a stack and the stack
12624   // is growing down onto allocate-on-fault pages (as Valgrind simulates),
12625   // we need to have the SP up-to-date "covering" (pointing below) the
12626   // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
12627   // do the transfer first, and then update rN afterwards.
12628   nRegs = 0;
12629   for (i = 0; i < 16; i++) {
12630     if ((regList & (1 << i)) != 0)
12631         nRegs++;
12632   }
12633   if (bW == 1 && !bINC) {
12634      IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
12635      if (arm)
12636         putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
12637      else
12638         putIRegT( rN, e, IRTemp_INVALID );
12639   }
12640
12641   // Make up a list of the registers to transfer, and their offsets
12642   // in memory relative to the anchor.  If the base reg (Rn) is part
12643   // of the transfer, then do it last for a load and first for a store.
12644   UInt xReg[16], xOff[16];
12645   Int  nX = 0;
12646   m = 0;
12647   for (i = 0; i < 16; i++) {
12648      r = bINC ? i : (15-i);
12649      if (0 == (regList & (1<<r)))
12650         continue;
12651      if (bBEFORE)
12652         m++;
12653      /* paranoia: check we aren't transferring the writeback
12654         register during a load. Should be assured by decode-point
12655         check above. */
12656      if (bW == 1 && bL == 1)
12657         vassert(r != rN);
12658
12659      xOff[nX] = 4 * m;
12660      xReg[nX] = r;
12661      nX++;
12662
12663      if (!bBEFORE)
12664         m++;
12665   }
12666   vassert(m == nRegs);
12667   vassert(nX == nRegs);
12668   vassert(nX <= 16);
12669
12670   if (bW == 0 && (regList & (1<<rN)) != 0) {
12671      /* Non-writeback, and basereg is to be transferred.  Do its
12672         transfer last for a load and first for a store.  Requires
12673         reordering xOff/xReg. */
12674      if (0) {
12675         vex_printf("\nREG_LIST_PRE: (rN=%d)\n", rN);
12676         for (i = 0; i < nX; i++)
12677            vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
12678         vex_printf("\n");
12679      }
12680
12681      vassert(nX > 0);
12682      for (i = 0; i < nX; i++) {
12683         if (xReg[i] == rN)
12684             break;
12685      }
12686      vassert(i < nX); /* else we didn't find it! */
12687      UInt tReg = xReg[i];
12688      UInt tOff = xOff[i];
12689      if (bL == 1) {
12690         /* load; make this transfer happen last */
12691         if (i < nX-1) {
12692            for (m = i+1; m < nX; m++) {
12693               xReg[m-1] = xReg[m];
12694               xOff[m-1] = xOff[m];
12695            }
12696            vassert(m == nX);
12697            xReg[m-1] = tReg;
12698            xOff[m-1] = tOff;
12699         }
12700      } else {
12701         /* store; make this transfer happen first */
12702         if (i > 0) {
12703            for (m = i-1; m >= 0; m--) {
12704               xReg[m+1] = xReg[m];
12705               xOff[m+1] = xOff[m];
12706            }
12707            vassert(m == -1);
12708            xReg[0] = tReg;
12709            xOff[0] = tOff;
12710         }
12711      }
12712
12713      if (0) {
12714         vex_printf("REG_LIST_POST:\n");
12715         for (i = 0; i < nX; i++)
12716            vex_printf("reg %d   off %d\n", xReg[i], xOff[i]);
12717         vex_printf("\n");
12718      }
12719   }
12720
12721   /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
12722       register and PC in the register list is a return for purposes of branch
12723       prediction.
12724      The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
12725       to be counted in event 0x0E (Procedure return).*/
12726   if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
12727      jk = Ijk_Ret;
12728   }
12729
12730   /* Actually generate the transfers */
12731   for (i = 0; i < nX; i++) {
12732      r = xReg[i];
12733      if (bL == 1) {
12734         IRExpr* e = loadLE(Ity_I32,
12735                            binop(opADDorSUB, mkexpr(anchorT),
12736                                  mkU32(xOff[i])));
12737         if (arm) {
12738            putIRegA( r, e, IRTemp_INVALID, jk );
12739         } else {
12740            // no: putIRegT( r, e, IRTemp_INVALID );
12741            // putIRegT refuses to write to R15.  But that might happen.
12742            // Since this is uncond, and we need to be able to
12743            // write the PC, just use the low level put:
12744            llPutIReg( r, e );
12745         }
12746      } else {
12747         /* if we're storing Rn, make sure we use the correct
12748            value, as per extensive comments above */
12749         storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
12750                  r == rN ? mkexpr(oldRnT)
12751                          : (arm ? getIRegA(r) : getIRegT(r) ) );
12752      }
12753   }
12754
12755   // If we are doing xxMIA or xxMIB,
12756   // do the transfer first, and then update rN afterwards.
12757   if (bW == 1 && bINC) {
12758      IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
12759      if (arm)
12760         putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
12761      else
12762         putIRegT( rN, e, IRTemp_INVALID );
12763   }
12764}
12765
12766
12767/*------------------------------------------------------------*/
12768/*--- VFP (CP 10 and 11) instructions                      ---*/
12769/*------------------------------------------------------------*/
12770
12771/* Both ARM and Thumb */
12772
12773/* Translate a CP10 or CP11 instruction.  If successful, returns
12774   True and *dres may or may not be updated.  If failure, returns
12775   False and doesn't change *dres nor create any IR.
12776
12777   The ARM and Thumb encodings are identical for the low 28 bits of
12778   the insn (yay!) and that's what the caller must supply, iow, imm28
12779   has the top 4 bits masked out.  Caller is responsible for
12780   determining whether the masked-out bits are valid for a CP10/11
12781   insn.  The rules for the top 4 bits are:
12782
12783     ARM: 0000 to 1110 allowed, and this is the gating condition.
12784     1111 (NV) is not allowed.
12785
12786     Thumb: must be 1110.  The gating condition is taken from
12787     ITSTATE in the normal way.
12788
12789   Conditionalisation:
12790
12791   Caller must supply an IRTemp 'condT' holding the gating condition,
12792   or IRTemp_INVALID indicating the insn is always executed.
12793
12794   Caller must also supply an ARMCondcode 'cond'.  This is only used
12795   for debug printing, no other purpose.  For ARM, this is simply the
12796   top 4 bits of the original instruction.  For Thumb, the condition
12797   is not (really) known until run time, and so ARMCondAL should be
12798   passed, only so that printing of these instructions does not show
12799   any condition.
12800
12801   Finally, the caller must indicate whether this occurs in ARM or
12802   Thumb code.
12803*/
12804static Bool decode_CP10_CP11_instruction (
12805               /*MOD*/DisResult* dres,
12806               UInt              insn28,
12807               IRTemp            condT,
12808               ARMCondcode       conq,
12809               Bool              isT
12810            )
12811{
12812#  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
12813
12814   vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
12815
12816   if (isT) {
12817      vassert(conq == ARMCondAL);
12818   } else {
12819      vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
12820   }
12821
12822   /* ----------------------------------------------------------- */
12823   /* -- VFP instructions -- double precision (mostly)         -- */
12824   /* ----------------------------------------------------------- */
12825
12826   /* --------------------- fldmx, fstmx --------------------- */
12827   /*
12828                                 31   27   23   19 15 11   7   0
12829                                         P U WL
12830      C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
12831      C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
12832      C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
12833
12834      C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
12835      C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
12836      C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
12837
12838      Regs transferred: Dd .. D(d + (offset-3)/2)
12839      offset must be odd, must not imply a reg > 15
12840      IA/DB: Rn is changed by (4 + 8 x # regs transferred)
12841
12842      case coding:
12843         1  at-Rn   (access at Rn)
12844         2  ia-Rn   (access at Rn, then Rn += 4+8n)
12845         3  db-Rn   (Rn -= 4+8n,   then access at Rn)
12846   */
12847   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
12848       && INSN(11,8) == BITS4(1,0,1,1)) {
12849      UInt bP      = (insn28 >> 24) & 1;
12850      UInt bU      = (insn28 >> 23) & 1;
12851      UInt bW      = (insn28 >> 21) & 1;
12852      UInt bL      = (insn28 >> 20) & 1;
12853      UInt offset  = (insn28 >> 0) & 0xFF;
12854      UInt rN      = INSN(19,16);
12855      UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
12856      UInt nRegs   = (offset - 1) / 2;
12857      UInt summary = 0;
12858      Int  i;
12859
12860      /**/ if (bP == 0 && bU == 1 && bW == 0) {
12861         summary = 1;
12862      }
12863      else if (bP == 0 && bU == 1 && bW == 1) {
12864         summary = 2;
12865      }
12866      else if (bP == 1 && bU == 0 && bW == 1) {
12867         summary = 3;
12868      }
12869      else goto after_vfp_fldmx_fstmx;
12870
12871      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
12872      if (rN == 15 && (summary == 2 || summary == 3 || isT))
12873         goto after_vfp_fldmx_fstmx;
12874
12875      /* offset must be odd, and specify at least one register */
12876      if (0 == (offset & 1) || offset < 3)
12877         goto after_vfp_fldmx_fstmx;
12878
12879      /* can't transfer regs after D15 */
12880      if (dD + nRegs - 1 >= 32)
12881         goto after_vfp_fldmx_fstmx;
12882
12883      /* Now, we can't do a conditional load or store, since that very
12884         likely will generate an exception.  So we have to take a side
12885         exit at this point if the condition is false. */
12886      if (condT != IRTemp_INVALID) {
12887         if (isT)
12888            mk_skip_over_T32_if_cond_is_false( condT );
12889         else
12890            mk_skip_over_A32_if_cond_is_false( condT );
12891         condT = IRTemp_INVALID;
12892      }
12893      /* Ok, now we're unconditional.  Do the load or store. */
12894
12895      /* get the old Rn value */
12896      IRTemp rnT = newTemp(Ity_I32);
12897      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
12898                           rN == 15));
12899
12900      /* make a new value for Rn, post-insn */
12901      IRTemp rnTnew = IRTemp_INVALID;
12902      if (summary == 2 || summary == 3) {
12903         rnTnew = newTemp(Ity_I32);
12904         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
12905                              mkexpr(rnT),
12906                              mkU32(4 + 8 * nRegs)));
12907      }
12908
12909      /* decide on the base transfer address */
12910      IRTemp taT = newTemp(Ity_I32);
12911      assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
12912
12913      /* update Rn if necessary -- in case 3, we're moving it down, so
12914         update before any memory reference, in order to keep Memcheck
12915         and V's stack-extending logic (on linux) happy */
12916      if (summary == 3) {
12917         if (isT)
12918            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
12919         else
12920            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
12921      }
12922
12923      /* generate the transfers */
12924      for (i = 0; i < nRegs; i++) {
12925         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
12926         if (bL) {
12927            putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
12928         } else {
12929            storeLE(addr, getDReg(dD + i));
12930         }
12931      }
12932
12933      /* update Rn if necessary -- in case 2, we're moving it up, so
12934         update after any memory reference, in order to keep Memcheck
12935         and V's stack-extending logic (on linux) happy */
12936      if (summary == 2) {
12937         if (isT)
12938            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
12939         else
12940            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
12941      }
12942
12943      const HChar* nm = bL==1 ? "ld" : "st";
12944      switch (summary) {
12945         case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
12946                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
12947                  break;
12948         case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
12949                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
12950                  break;
12951         case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
12952                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
12953                  break;
12954         default: vassert(0);
12955      }
12956
12957      goto decode_success_vfp;
12958      /* FIXME alignment constraints? */
12959   }
12960
12961  after_vfp_fldmx_fstmx:
12962
12963   /* --------------------- fldmd, fstmd --------------------- */
12964   /*
12965                                 31   27   23   19 15 11   7   0
12966                                         P U WL
12967      C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
12968      C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
12969      C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
12970
12971      C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
12972      C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
12973      C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
12974
12975      Regs transferred: Dd .. D(d + (offset-2)/2)
12976      offset must be even, must not imply a reg > 15
12977      IA/DB: Rn is changed by (8 x # regs transferred)
12978
12979      case coding:
12980         1  at-Rn   (access at Rn)
12981         2  ia-Rn   (access at Rn, then Rn += 8n)
12982         3  db-Rn   (Rn -= 8n,     then access at Rn)
12983   */
12984   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
12985       && INSN(11,8) == BITS4(1,0,1,1)) {
12986      UInt bP      = (insn28 >> 24) & 1;
12987      UInt bU      = (insn28 >> 23) & 1;
12988      UInt bW      = (insn28 >> 21) & 1;
12989      UInt bL      = (insn28 >> 20) & 1;
12990      UInt offset  = (insn28 >> 0) & 0xFF;
12991      UInt rN      = INSN(19,16);
12992      UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
12993      UInt nRegs   = offset / 2;
12994      UInt summary = 0;
12995      Int  i;
12996
12997      /**/ if (bP == 0 && bU == 1 && bW == 0) {
12998         summary = 1;
12999      }
13000      else if (bP == 0 && bU == 1 && bW == 1) {
13001         summary = 2;
13002      }
13003      else if (bP == 1 && bU == 0 && bW == 1) {
13004         summary = 3;
13005      }
13006      else goto after_vfp_fldmd_fstmd;
13007
13008      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
13009      if (rN == 15 && (summary == 2 || summary == 3 || isT))
13010         goto after_vfp_fldmd_fstmd;
13011
13012      /* offset must be even, and specify at least one register */
13013      if (1 == (offset & 1) || offset < 2)
13014         goto after_vfp_fldmd_fstmd;
13015
13016      /* can't transfer regs after D15 */
13017      if (dD + nRegs - 1 >= 32)
13018         goto after_vfp_fldmd_fstmd;
13019
13020      /* Now, we can't do a conditional load or store, since that very
13021         likely will generate an exception.  So we have to take a side
13022         exit at this point if the condition is false. */
13023      if (condT != IRTemp_INVALID) {
13024         if (isT)
13025            mk_skip_over_T32_if_cond_is_false( condT );
13026         else
13027            mk_skip_over_A32_if_cond_is_false( condT );
13028         condT = IRTemp_INVALID;
13029      }
13030      /* Ok, now we're unconditional.  Do the load or store. */
13031
13032      /* get the old Rn value */
13033      IRTemp rnT = newTemp(Ity_I32);
13034      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
13035                           rN == 15));
13036
13037      /* make a new value for Rn, post-insn */
13038      IRTemp rnTnew = IRTemp_INVALID;
13039      if (summary == 2 || summary == 3) {
13040         rnTnew = newTemp(Ity_I32);
13041         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
13042                              mkexpr(rnT),
13043                              mkU32(8 * nRegs)));
13044      }
13045
13046      /* decide on the base transfer address */
13047      IRTemp taT = newTemp(Ity_I32);
13048      assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
13049
13050      /* update Rn if necessary -- in case 3, we're moving it down, so
13051         update before any memory reference, in order to keep Memcheck
13052         and V's stack-extending logic (on linux) happy */
13053      if (summary == 3) {
13054         if (isT)
13055            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13056         else
13057            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13058      }
13059
13060      /* generate the transfers */
13061      for (i = 0; i < nRegs; i++) {
13062         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
13063         if (bL) {
13064            putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
13065         } else {
13066            storeLE(addr, getDReg(dD + i));
13067         }
13068      }
13069
13070      /* update Rn if necessary -- in case 2, we're moving it up, so
13071         update after any memory reference, in order to keep Memcheck
13072         and V's stack-extending logic (on linux) happy */
13073      if (summary == 2) {
13074         if (isT)
13075            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13076         else
13077            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13078      }
13079
13080      const HChar* nm = bL==1 ? "ld" : "st";
13081      switch (summary) {
13082         case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
13083                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
13084                  break;
13085         case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
13086                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
13087                  break;
13088         case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
13089                      nm, nCC(conq), rN, dD, dD + nRegs - 1);
13090                  break;
13091         default: vassert(0);
13092      }
13093
13094      goto decode_success_vfp;
13095      /* FIXME alignment constraints? */
13096   }
13097
13098  after_vfp_fldmd_fstmd:
13099
13100   /* ------------------- fmrx, fmxr ------------------- */
13101   if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
13102       && BITS4(1,0,1,0) == INSN(11,8)
13103       && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
13104      UInt rD  = INSN(15,12);
13105      UInt reg = INSN(19,16);
13106      if (reg == BITS4(0,0,0,1)) {
13107         if (rD == 15) {
13108            IRTemp nzcvT = newTemp(Ity_I32);
13109            /* When rD is 15, we are copying the top 4 bits of FPSCR
13110               into CPSR.  That is, set the flags thunk to COPY and
13111               install FPSCR[31:28] as the value to copy. */
13112            assign(nzcvT, binop(Iop_And32,
13113                                IRExpr_Get(OFFB_FPSCR, Ity_I32),
13114                                mkU32(0xF0000000)));
13115            setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
13116            DIP("fmstat%s\n", nCC(conq));
13117         } else {
13118            /* Otherwise, merely transfer FPSCR to r0 .. r14. */
13119            IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
13120            if (isT)
13121               putIRegT(rD, e, condT);
13122            else
13123               putIRegA(rD, e, condT, Ijk_Boring);
13124            DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
13125         }
13126         goto decode_success_vfp;
13127      }
13128      /* fall through */
13129   }
13130
13131   if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
13132       && BITS4(1,0,1,0) == INSN(11,8)
13133       && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
13134      UInt rD  = INSN(15,12);
13135      UInt reg = INSN(19,16);
13136      if (reg == BITS4(0,0,0,1)) {
13137         putMiscReg32(OFFB_FPSCR,
13138                      isT ? getIRegT(rD) : getIRegA(rD), condT);
13139         DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
13140         goto decode_success_vfp;
13141      }
13142      /* fall through */
13143   }
13144
13145   /* --------------------- vmov --------------------- */
13146   // VMOV dM, rD, rN
13147   if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
13148      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
13149      UInt rD = INSN(15,12); /* lo32 */
13150      UInt rN = INSN(19,16); /* hi32 */
13151      if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
13152         /* fall through */
13153      } else {
13154         putDReg(dM,
13155                 unop(Iop_ReinterpI64asF64,
13156                      binop(Iop_32HLto64,
13157                            isT ? getIRegT(rN) : getIRegA(rN),
13158                            isT ? getIRegT(rD) : getIRegA(rD))),
13159                 condT);
13160         DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
13161         goto decode_success_vfp;
13162      }
13163      /* fall through */
13164   }
13165
13166   // VMOV rD, rN, dM
13167   if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
13168      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
13169      UInt rD = INSN(15,12); /* lo32 */
13170      UInt rN = INSN(19,16); /* hi32 */
13171      if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
13172          || rD == rN) {
13173         /* fall through */
13174      } else {
13175         IRTemp i64 = newTemp(Ity_I64);
13176         assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
13177         IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
13178         IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
13179         if (isT) {
13180            putIRegT(rN, hi32, condT);
13181            putIRegT(rD, lo32, condT);
13182         } else {
13183            putIRegA(rN, hi32, condT, Ijk_Boring);
13184            putIRegA(rD, lo32, condT, Ijk_Boring);
13185         }
13186         DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
13187         goto decode_success_vfp;
13188      }
13189      /* fall through */
13190   }
13191
13192   // VMOV sD, sD+1, rN, rM
13193   if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
13194      UInt sD = (INSN(3,0) << 1) | INSN(5,5);
13195      UInt rN = INSN(15,12);
13196      UInt rM = INSN(19,16);
13197      if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
13198          || sD == 31) {
13199         /* fall through */
13200      } else {
13201         putFReg(sD,
13202                 unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
13203                 condT);
13204         putFReg(sD+1,
13205                 unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
13206                 condT);
13207         DIP("vmov%s, s%u, s%u, r%u, r%u\n",
13208              nCC(conq), sD, sD + 1, rN, rM);
13209         goto decode_success_vfp;
13210      }
13211   }
13212
13213   // VMOV rN, rM, sD, sD+1
13214   if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
13215      UInt sD = (INSN(3,0) << 1) | INSN(5,5);
13216      UInt rN = INSN(15,12);
13217      UInt rM = INSN(19,16);
13218      if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
13219          || sD == 31 || rN == rM) {
13220         /* fall through */
13221      } else {
13222         IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
13223         IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
13224         if (isT) {
13225            putIRegT(rN, res0, condT);
13226            putIRegT(rM, res1, condT);
13227         } else {
13228            putIRegA(rN, res0, condT, Ijk_Boring);
13229            putIRegA(rM, res1, condT, Ijk_Boring);
13230         }
13231         DIP("vmov%s, r%u, r%u, s%u, s%u\n",
13232             nCC(conq), rN, rM, sD, sD + 1);
13233         goto decode_success_vfp;
13234      }
13235   }
13236
13237   // VMOV rD[x], rT  (ARM core register to scalar)
13238   if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
13239      UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
13240      UInt rT  = INSN(15,12);
13241      UInt opc = (INSN(22,21) << 2) | INSN(6,5);
13242      UInt index;
13243      if (rT == 15 || (isT && rT == 13)) {
13244         /* fall through */
13245      } else {
13246         if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
13247            index = opc & 7;
13248            putDRegI64(rD, triop(Iop_SetElem8x8,
13249                                 getDRegI64(rD),
13250                                 mkU8(index),
13251                                 unop(Iop_32to8,
13252                                      isT ? getIRegT(rT) : getIRegA(rT))),
13253                           condT);
13254            DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
13255            goto decode_success_vfp;
13256         }
13257         else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
13258            index = (opc >> 1) & 3;
13259            putDRegI64(rD, triop(Iop_SetElem16x4,
13260                                 getDRegI64(rD),
13261                                 mkU8(index),
13262                                 unop(Iop_32to16,
13263                                      isT ? getIRegT(rT) : getIRegA(rT))),
13264                           condT);
13265            DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
13266            goto decode_success_vfp;
13267         }
13268         else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
13269            index = (opc >> 2) & 1;
13270            putDRegI64(rD, triop(Iop_SetElem32x2,
13271                                 getDRegI64(rD),
13272                                 mkU8(index),
13273                                 isT ? getIRegT(rT) : getIRegA(rT)),
13274                           condT);
13275            DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
13276            goto decode_success_vfp;
13277         } else {
13278            /* fall through */
13279         }
13280      }
13281   }
13282
13283   // VMOV (scalar to ARM core register)
13284   // VMOV rT, rD[x]
13285   if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
13286      UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
13287      UInt rT  = INSN(15,12);
13288      UInt U   = INSN(23,23);
13289      UInt opc = (INSN(22,21) << 2) | INSN(6,5);
13290      UInt index;
13291      if (rT == 15 || (isT && rT == 13)) {
13292         /* fall through */
13293      } else {
13294         if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
13295            index = opc & 7;
13296            IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
13297                             binop(Iop_GetElem8x8,
13298                                   getDRegI64(rN),
13299                                   mkU8(index)));
13300            if (isT)
13301               putIRegT(rT, e, condT);
13302            else
13303               putIRegA(rT, e, condT, Ijk_Boring);
13304            DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
13305                  rT, rN, index);
13306            goto decode_success_vfp;
13307         }
13308         else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
13309            index = (opc >> 1) & 3;
13310            IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
13311                             binop(Iop_GetElem16x4,
13312                                   getDRegI64(rN),
13313                                   mkU8(index)));
13314            if (isT)
13315               putIRegT(rT, e, condT);
13316            else
13317               putIRegA(rT, e, condT, Ijk_Boring);
13318            DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
13319                  rT, rN, index);
13320            goto decode_success_vfp;
13321         }
13322         else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
13323            index = (opc >> 2) & 1;
13324            IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
13325            if (isT)
13326               putIRegT(rT, e, condT);
13327            else
13328               putIRegA(rT, e, condT, Ijk_Boring);
13329            DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
13330            goto decode_success_vfp;
13331         } else {
13332            /* fall through */
13333         }
13334      }
13335   }
13336
13337   // VMOV.F32 sD, #imm
13338   // FCONSTS sD, #imm
13339   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13340       && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
13341      UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
13342      UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
13343      UInt b    = (imm8 >> 6) & 1;
13344      UInt imm;
13345      imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
13346             | ((imm8 & 0x1f) << 3);
13347      imm <<= 16;
13348      putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
13349      DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
13350      goto decode_success_vfp;
13351   }
13352
13353   // VMOV.F64 dD, #imm
13354   // FCONSTD dD, #imm
13355   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13356       && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
13357      UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
13358      UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
13359      UInt b    = (imm8 >> 6) & 1;
13360      ULong imm;
13361      imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
13362             | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
13363      imm <<= 48;
13364      putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
13365      DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
13366      goto decode_success_vfp;
13367   }
13368
13369   /* ---------------------- vdup ------------------------- */
13370   // VDUP dD, rT
13371   // VDUP qD, rT
13372   if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
13373       && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
13374      UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
13375      UInt rT   = INSN(15,12);
13376      UInt Q    = INSN(21,21);
13377      UInt size = (INSN(22,22) << 1) | INSN(5,5);
13378      if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
13379         /* fall through */
13380      } else {
13381         IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
13382         if (Q) {
13383            rD >>= 1;
13384            switch (size) {
13385               case 0:
13386                  putQReg(rD, unop(Iop_Dup32x4, e), condT);
13387                  break;
13388               case 1:
13389                  putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
13390                              condT);
13391                  break;
13392               case 2:
13393                  putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
13394                              condT);
13395                  break;
13396               default:
13397                  vassert(0);
13398            }
13399            DIP("vdup.%u q%u, r%u\n", 32 / (1<<size), rD, rT);
13400         } else {
13401            switch (size) {
13402               case 0:
13403                  putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
13404                  break;
13405               case 1:
13406                  putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
13407                               condT);
13408                  break;
13409               case 2:
13410                  putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
13411                               condT);
13412                  break;
13413               default:
13414                  vassert(0);
13415            }
13416            DIP("vdup.%u d%u, r%u\n", 32 / (1<<size), rD, rT);
13417         }
13418         goto decode_success_vfp;
13419      }
13420   }
13421
13422   /* --------------------- f{ld,st}d --------------------- */
13423   // FLDD, FSTD
13424   if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
13425       && BITS4(1,0,1,1) == INSN(11,8)) {
13426      UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
13427      UInt rN     = INSN(19,16);
13428      UInt offset = (insn28 & 0xFF) << 2;
13429      UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
13430      UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
13431      /* make unconditional */
13432      if (condT != IRTemp_INVALID) {
13433         if (isT)
13434            mk_skip_over_T32_if_cond_is_false( condT );
13435         else
13436            mk_skip_over_A32_if_cond_is_false( condT );
13437         condT = IRTemp_INVALID;
13438      }
13439      IRTemp ea = newTemp(Ity_I32);
13440      assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
13441                       align4if(isT ? getIRegT(rN) : getIRegA(rN),
13442                                rN == 15),
13443                       mkU32(offset)));
13444      if (bL) {
13445         putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
13446      } else {
13447         storeLE(mkexpr(ea), getDReg(dD));
13448      }
13449      DIP("f%sd%s d%u, [r%u, %c#%u]\n",
13450          bL ? "ld" : "st", nCC(conq), dD, rN,
13451          bU ? '+' : '-', offset);
13452      goto decode_success_vfp;
13453   }
13454
13455   /* --------------------- dp insns (D) --------------------- */
13456   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
13457       && BITS4(1,0,1,1) == INSN(11,8)
13458       && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
13459      UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
13460      UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
13461      UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
13462      UInt    bP  = (insn28 >> 23) & 1;
13463      UInt    bQ  = (insn28 >> 21) & 1;
13464      UInt    bR  = (insn28 >> 20) & 1;
13465      UInt    bS  = (insn28 >> 6) & 1;
13466      UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
13467      IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
13468      switch (opc) {
13469         case BITS4(0,0,0,0): /* MAC: d + n * m */
13470            putDReg(dD, triop(Iop_AddF64, rm,
13471                              getDReg(dD),
13472                              triop(Iop_MulF64, rm, getDReg(dN),
13473                                                    getDReg(dM))),
13474                        condT);
13475            DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13476            goto decode_success_vfp;
13477         case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
13478            putDReg(dD, triop(Iop_AddF64, rm,
13479                              getDReg(dD),
13480                              unop(Iop_NegF64,
13481                                   triop(Iop_MulF64, rm, getDReg(dN),
13482                                                         getDReg(dM)))),
13483                        condT);
13484            DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13485            goto decode_success_vfp;
13486         case BITS4(0,0,1,0): /* MSC: - d + n * m */
13487            putDReg(dD, triop(Iop_AddF64, rm,
13488                              unop(Iop_NegF64, getDReg(dD)),
13489                              triop(Iop_MulF64, rm, getDReg(dN),
13490                                                    getDReg(dM))),
13491                        condT);
13492            DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13493            goto decode_success_vfp;
13494         case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
13495            putDReg(dD, triop(Iop_AddF64, rm,
13496                              unop(Iop_NegF64, getDReg(dD)),
13497                              unop(Iop_NegF64,
13498                                   triop(Iop_MulF64, rm, getDReg(dN),
13499                                                         getDReg(dM)))),
13500                        condT);
13501            DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13502            goto decode_success_vfp;
13503         case BITS4(0,1,0,0): /* MUL: n * m */
13504            putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
13505                        condT);
13506            DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13507            goto decode_success_vfp;
13508         case BITS4(0,1,0,1): /* NMUL: - n * m */
13509            putDReg(dD, unop(Iop_NegF64,
13510                             triop(Iop_MulF64, rm, getDReg(dN),
13511                                                   getDReg(dM))),
13512                    condT);
13513            DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13514            goto decode_success_vfp;
13515         case BITS4(0,1,1,0): /* ADD: n + m */
13516            putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
13517                        condT);
13518            DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13519            goto decode_success_vfp;
13520         case BITS4(0,1,1,1): /* SUB: n - m */
13521            putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
13522                        condT);
13523            DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13524            goto decode_success_vfp;
13525         case BITS4(1,0,0,0): /* DIV: n / m */
13526            putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
13527                        condT);
13528            DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
13529            goto decode_success_vfp;
13530         default:
13531            break;
13532      }
13533   }
13534
13535   /* --------------------- compares (D) --------------------- */
13536   /*          31   27   23   19   15 11   7    3
13537                 28   24   20   16 12    8    4    0
13538      FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
13539      FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
13540      FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
13541      FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
13542                                 Z         N
13543
13544      Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
13545      Z=1 Compare Dd vs zero
13546
13547      N=1 generates Invalid Operation exn if either arg is any kind of NaN
13548      N=0 generates Invalid Operation exn if either arg is a signalling NaN
13549      (Not that we pay any attention to N here)
13550   */
13551   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13552       && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
13553       && BITS4(1,0,1,1) == INSN(11,8)
13554       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13555      UInt bZ = (insn28 >> 16) & 1;
13556      UInt bN = (insn28 >> 7) & 1;
13557      UInt dD = INSN(15,12) | (INSN(22,22) << 4);
13558      UInt dM = INSN(3,0) | (INSN(5,5) << 4);
13559      if (bZ && INSN(3,0) != 0) {
13560         /* does not decode; fall through */
13561      } else {
13562         IRTemp argL = newTemp(Ity_F64);
13563         IRTemp argR = newTemp(Ity_F64);
13564         IRTemp irRes = newTemp(Ity_I32);
13565         assign(argL, getDReg(dD));
13566         assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
13567         assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
13568
13569         IRTemp nzcv     = IRTemp_INVALID;
13570         IRTemp oldFPSCR = newTemp(Ity_I32);
13571         IRTemp newFPSCR = newTemp(Ity_I32);
13572
13573         /* This is where the fun starts.  We have to convert 'irRes'
13574            from an IR-convention return result (IRCmpF64Result) to an
13575            ARM-encoded (N,Z,C,V) group.  The final result is in the
13576            bottom 4 bits of 'nzcv'. */
13577         /* Map compare result from IR to ARM(nzcv) */
13578         /*
13579            FP cmp result | IR   | ARM(nzcv)
13580            --------------------------------
13581            UN              0x45   0011
13582            LT              0x01   1000
13583            GT              0x00   0010
13584            EQ              0x40   0110
13585         */
13586         nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13587
13588         /* And update FPSCR accordingly */
13589         assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
13590         assign(newFPSCR,
13591                binop(Iop_Or32,
13592                      binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
13593                      binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
13594
13595         putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
13596
13597         if (bZ) {
13598            DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
13599         } else {
13600            DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
13601         }
13602         goto decode_success_vfp;
13603      }
13604      /* fall through */
13605   }
13606
13607   /* --------------------- unary (D) --------------------- */
13608   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13609       && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
13610       && BITS4(1,0,1,1) == INSN(11,8)
13611       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13612      UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
13613      UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
13614      UInt b16 = (insn28 >> 16) & 1;
13615      UInt b7  = (insn28 >> 7) & 1;
13616      /**/ if (b16 == 0 && b7 == 0) {
13617         // FCPYD
13618         putDReg(dD, getDReg(dM), condT);
13619         DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
13620         goto decode_success_vfp;
13621      }
13622      else if (b16 == 0 && b7 == 1) {
13623         // FABSD
13624         putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
13625         DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
13626         goto decode_success_vfp;
13627      }
13628      else if (b16 == 1 && b7 == 0) {
13629         // FNEGD
13630         putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
13631         DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
13632         goto decode_success_vfp;
13633      }
13634      else if (b16 == 1 && b7 == 1) {
13635         // FSQRTD
13636         IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
13637         putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
13638         DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
13639         goto decode_success_vfp;
13640      }
13641      else
13642         vassert(0);
13643
13644      /* fall through */
13645   }
13646
13647   /* ----------------- I <-> D conversions ----------------- */
13648
13649   // F{S,U}ITOD dD, fM
13650   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13651       && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
13652       && BITS4(1,0,1,1) == INSN(11,8)
13653       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13654      UInt bM    = (insn28 >> 5) & 1;
13655      UInt fM    = (INSN(3,0) << 1) | bM;
13656      UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
13657      UInt syned = (insn28 >> 7) & 1;
13658      if (syned) {
13659         // FSITOD
13660         putDReg(dD, unop(Iop_I32StoF64,
13661                          unop(Iop_ReinterpF32asI32, getFReg(fM))),
13662                 condT);
13663         DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
13664      } else {
13665         // FUITOD
13666         putDReg(dD, unop(Iop_I32UtoF64,
13667                          unop(Iop_ReinterpF32asI32, getFReg(fM))),
13668                 condT);
13669         DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
13670      }
13671      goto decode_success_vfp;
13672   }
13673
13674   // FTO{S,U}ID fD, dM
13675   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
13676       && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
13677       && BITS4(1,0,1,1) == INSN(11,8)
13678       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
13679      UInt   bD    = (insn28 >> 22) & 1;
13680      UInt   fD    = (INSN(15,12) << 1) | bD;
13681      UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
13682      UInt   bZ    = (insn28 >> 7) & 1;
13683      UInt   syned = (insn28 >> 16) & 1;
13684      IRTemp rmode = newTemp(Ity_I32);
13685      assign(rmode, bZ ? mkU32(Irrm_ZERO)
13686                       : mkexpr(mk_get_IR_rounding_mode()));
13687      if (syned) {
13688         // FTOSID
13689         putFReg(fD, unop(Iop_ReinterpI32asF32,
13690                          binop(Iop_F64toI32S, mkexpr(rmode),
13691                                getDReg(dM))),
13692                 condT);
13693         DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
13694             nCC(conq), fD, dM);
13695      } else {
13696         // FTOUID
13697         putFReg(fD, unop(Iop_ReinterpI32asF32,
13698                          binop(Iop_F64toI32U, mkexpr(rmode),
13699                                getDReg(dM))),
13700                 condT);
13701         DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
13702             nCC(conq), fD, dM);
13703      }
13704      goto decode_success_vfp;
13705   }
13706
13707   /* ----------------------------------------------------------- */
13708   /* -- VFP instructions -- single precision                  -- */
13709   /* ----------------------------------------------------------- */
13710
13711   /* --------------------- fldms, fstms --------------------- */
13712   /*
13713                                 31   27   23   19 15 11   7   0
13714                                         P UDWL
13715      C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
13716      C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
13717      C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
13718
13719      C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
13720      C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
13721      C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
13722
13723      Regs transferred: F(Fd:D) .. F(Fd:d + offset)
13724      offset must not imply a reg > 15
13725      IA/DB: Rn is changed by (4 x # regs transferred)
13726
13727      case coding:
13728         1  at-Rn   (access at Rn)
13729         2  ia-Rn   (access at Rn, then Rn += 4n)
13730         3  db-Rn   (Rn -= 4n,     then access at Rn)
13731   */
13732   if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
13733       && INSN(11,8) == BITS4(1,0,1,0)) {
13734      UInt bP      = (insn28 >> 24) & 1;
13735      UInt bU      = (insn28 >> 23) & 1;
13736      UInt bW      = (insn28 >> 21) & 1;
13737      UInt bL      = (insn28 >> 20) & 1;
13738      UInt bD      = (insn28 >> 22) & 1;
13739      UInt offset  = (insn28 >> 0) & 0xFF;
13740      UInt rN      = INSN(19,16);
13741      UInt fD      = (INSN(15,12) << 1) | bD;
13742      UInt nRegs   = offset;
13743      UInt summary = 0;
13744      Int  i;
13745
13746      /**/ if (bP == 0 && bU == 1 && bW == 0) {
13747         summary = 1;
13748      }
13749      else if (bP == 0 && bU == 1 && bW == 1) {
13750         summary = 2;
13751      }
13752      else if (bP == 1 && bU == 0 && bW == 1) {
13753         summary = 3;
13754      }
13755      else goto after_vfp_fldms_fstms;
13756
13757      /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
13758      if (rN == 15 && (summary == 2 || summary == 3 || isT))
13759         goto after_vfp_fldms_fstms;
13760
13761      /* offset must specify at least one register */
13762      if (offset < 1)
13763         goto after_vfp_fldms_fstms;
13764
13765      /* can't transfer regs after S31 */
13766      if (fD + nRegs - 1 >= 32)
13767         goto after_vfp_fldms_fstms;
13768
13769      /* Now, we can't do a conditional load or store, since that very
13770         likely will generate an exception.  So we have to take a side
13771         exit at this point if the condition is false. */
13772      if (condT != IRTemp_INVALID) {
13773         if (isT)
13774            mk_skip_over_T32_if_cond_is_false( condT );
13775         else
13776            mk_skip_over_A32_if_cond_is_false( condT );
13777         condT = IRTemp_INVALID;
13778      }
13779      /* Ok, now we're unconditional.  Do the load or store. */
13780
13781      /* get the old Rn value */
13782      IRTemp rnT = newTemp(Ity_I32);
13783      assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
13784                           rN == 15));
13785
13786      /* make a new value for Rn, post-insn */
13787      IRTemp rnTnew = IRTemp_INVALID;
13788      if (summary == 2 || summary == 3) {
13789         rnTnew = newTemp(Ity_I32);
13790         assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
13791                              mkexpr(rnT),
13792                              mkU32(4 * nRegs)));
13793      }
13794
13795      /* decide on the base transfer address */
13796      IRTemp taT = newTemp(Ity_I32);
13797      assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
13798
13799      /* update Rn if necessary -- in case 3, we're moving it down, so
13800         update before any memory reference, in order to keep Memcheck
13801         and V's stack-extending logic (on linux) happy */
13802      if (summary == 3) {
13803         if (isT)
13804            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13805         else
13806            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13807      }
13808
13809      /* generate the transfers */
13810      for (i = 0; i < nRegs; i++) {
13811         IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
13812         if (bL) {
13813            putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
13814         } else {
13815            storeLE(addr, getFReg(fD + i));
13816         }
13817      }
13818
13819      /* update Rn if necessary -- in case 2, we're moving it up, so
13820         update after any memory reference, in order to keep Memcheck
13821         and V's stack-extending logic (on linux) happy */
13822      if (summary == 2) {
13823         if (isT)
13824            putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
13825         else
13826            putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
13827      }
13828
13829      const HChar* nm = bL==1 ? "ld" : "st";
13830      switch (summary) {
13831         case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
13832                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
13833                  break;
13834         case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
13835                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
13836                  break;
13837         case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
13838                      nm, nCC(conq), rN, fD, fD + nRegs - 1);
13839                  break;
13840         default: vassert(0);
13841      }
13842
13843      goto decode_success_vfp;
13844      /* FIXME alignment constraints? */
13845   }
13846
13847  after_vfp_fldms_fstms:
13848
13849   /* --------------------- fmsr, fmrs --------------------- */
13850   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
13851       && BITS4(1,0,1,0) == INSN(11,8)
13852       && BITS4(0,0,0,0) == INSN(3,0)
13853       && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
13854      UInt rD  = INSN(15,12);
13855      UInt b7  = (insn28 >> 7) & 1;
13856      UInt fN  = (INSN(19,16) << 1) | b7;
13857      UInt b20 = (insn28 >> 20) & 1;
13858      if (rD == 15) {
13859         /* fall through */
13860         /* Let's assume that no sane person would want to do
13861            floating-point transfers to or from the program counter,
13862            and simply decline to decode the instruction.  The ARM ARM
13863            doesn't seem to explicitly disallow this case, though. */
13864      } else {
13865         if (b20) {
13866            IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
13867            if (isT)
13868               putIRegT(rD, res, condT);
13869            else
13870               putIRegA(rD, res, condT, Ijk_Boring);
13871            DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
13872         } else {
13873            putFReg(fN, unop(Iop_ReinterpI32asF32,
13874                             isT ? getIRegT(rD) : getIRegA(rD)),
13875                        condT);
13876            DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
13877         }
13878         goto decode_success_vfp;
13879      }
13880      /* fall through */
13881   }
13882
13883   /* --------------------- f{ld,st}s --------------------- */
13884   // FLDS, FSTS
13885   if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
13886       && BITS4(1,0,1,0) == INSN(11,8)) {
13887      UInt bD     = (insn28 >> 22) & 1;
13888      UInt fD     = (INSN(15,12) << 1) | bD;
13889      UInt rN     = INSN(19,16);
13890      UInt offset = (insn28 & 0xFF) << 2;
13891      UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
13892      UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
13893      /* make unconditional */
13894      if (condT != IRTemp_INVALID) {
13895         if (isT)
13896            mk_skip_over_T32_if_cond_is_false( condT );
13897         else
13898            mk_skip_over_A32_if_cond_is_false( condT );
13899         condT = IRTemp_INVALID;
13900      }
13901      IRTemp ea = newTemp(Ity_I32);
13902      assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
13903                       align4if(isT ? getIRegT(rN) : getIRegA(rN),
13904                                rN == 15),
13905                       mkU32(offset)));
13906      if (bL) {
13907         putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
13908      } else {
13909         storeLE(mkexpr(ea), getFReg(fD));
13910      }
13911      DIP("f%ss%s s%u, [r%u, %c#%u]\n",
13912          bL ? "ld" : "st", nCC(conq), fD, rN,
13913          bU ? '+' : '-', offset);
13914      goto decode_success_vfp;
13915   }
13916
13917   /* --------------------- dp insns (F) --------------------- */
13918   if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
13919       && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
13920       && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
13921      UInt    bM  = (insn28 >> 5) & 1;
13922      UInt    bD  = (insn28 >> 22) & 1;
13923      UInt    bN  = (insn28 >> 7) & 1;
13924      UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
13925      UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
13926      UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
13927      UInt    bP  = (insn28 >> 23) & 1;
13928      UInt    bQ  = (insn28 >> 21) & 1;
13929      UInt    bR  = (insn28 >> 20) & 1;
13930      UInt    bS  = (insn28 >> 6) & 1;
13931      UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
13932      IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
13933      switch (opc) {
13934         case BITS4(0,0,0,0): /* MAC: d + n * m */
13935            putFReg(fD, triop(Iop_AddF32, rm,
13936                              getFReg(fD),
13937                              triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
13938                        condT);
13939            DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13940            goto decode_success_vfp;
13941         case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
13942            putFReg(fD, triop(Iop_AddF32, rm,
13943                              getFReg(fD),
13944                              unop(Iop_NegF32,
13945                                   triop(Iop_MulF32, rm, getFReg(fN),
13946                                                         getFReg(fM)))),
13947                        condT);
13948            DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13949            goto decode_success_vfp;
13950         case BITS4(0,0,1,0): /* MSC: - d + n * m */
13951            putFReg(fD, triop(Iop_AddF32, rm,
13952                              unop(Iop_NegF32, getFReg(fD)),
13953                              triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
13954                        condT);
13955            DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13956            goto decode_success_vfp;
13957         case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
13958            putFReg(fD, triop(Iop_AddF32, rm,
13959                              unop(Iop_NegF32, getFReg(fD)),
13960                              unop(Iop_NegF32,
13961                                   triop(Iop_MulF32, rm,
13962                                                     getFReg(fN),
13963                                                    getFReg(fM)))),
13964                        condT);
13965            DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13966            goto decode_success_vfp;
13967         case BITS4(0,1,0,0): /* MUL: n * m */
13968            putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
13969                        condT);
13970            DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13971            goto decode_success_vfp;
13972         case BITS4(0,1,0,1): /* NMUL: - n * m */
13973            putFReg(fD, unop(Iop_NegF32,
13974                             triop(Iop_MulF32, rm, getFReg(fN),
13975                                                   getFReg(fM))),
13976                    condT);
13977            DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13978            goto decode_success_vfp;
13979         case BITS4(0,1,1,0): /* ADD: n + m */
13980            putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
13981                        condT);
13982            DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13983            goto decode_success_vfp;
13984         case BITS4(0,1,1,1): /* SUB: n - m */
13985            putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
13986                        condT);
13987            DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13988            goto decode_success_vfp;
13989         case BITS4(1,0,0,0): /* DIV: n / m */
13990            putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
13991                        condT);
13992            DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
13993            goto decode_success_vfp;
13994         default:
13995            break;
13996      }
13997   }
13998
13999   /* --------------------- compares (S) --------------------- */
14000   /*          31   27   23   19   15 11   7    3
14001                 28   24   20   16 12    8    4    0
14002      FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
14003      FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
14004      FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
14005      FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
14006                                 Z         N
14007
14008      Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
14009      Z=1 Compare Fd:D vs zero
14010
14011      N=1 generates Invalid Operation exn if either arg is any kind of NaN
14012      N=0 generates Invalid Operation exn if either arg is a signalling NaN
14013      (Not that we pay any attention to N here)
14014   */
14015   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14016       && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
14017       && BITS4(1,0,1,0) == INSN(11,8)
14018       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14019      UInt bZ = (insn28 >> 16) & 1;
14020      UInt bN = (insn28 >> 7) & 1;
14021      UInt bD = (insn28 >> 22) & 1;
14022      UInt bM = (insn28 >> 5) & 1;
14023      UInt fD = (INSN(15,12) << 1) | bD;
14024      UInt fM = (INSN(3,0) << 1) | bM;
14025      if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
14026         /* does not decode; fall through */
14027      } else {
14028         IRTemp argL = newTemp(Ity_F64);
14029         IRTemp argR = newTemp(Ity_F64);
14030         IRTemp irRes = newTemp(Ity_I32);
14031
14032         assign(argL, unop(Iop_F32toF64, getFReg(fD)));
14033         assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
14034                         : unop(Iop_F32toF64, getFReg(fM)));
14035         assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
14036
14037         IRTemp nzcv     = IRTemp_INVALID;
14038         IRTemp oldFPSCR = newTemp(Ity_I32);
14039         IRTemp newFPSCR = newTemp(Ity_I32);
14040
14041         /* This is where the fun starts.  We have to convert 'irRes'
14042            from an IR-convention return result (IRCmpF64Result) to an
14043            ARM-encoded (N,Z,C,V) group.  The final result is in the
14044            bottom 4 bits of 'nzcv'. */
14045         /* Map compare result from IR to ARM(nzcv) */
14046         /*
14047            FP cmp result | IR   | ARM(nzcv)
14048            --------------------------------
14049            UN              0x45   0011
14050            LT              0x01   1000
14051            GT              0x00   0010
14052            EQ              0x40   0110
14053         */
14054         nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
14055
14056         /* And update FPSCR accordingly */
14057         assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
14058         assign(newFPSCR,
14059                binop(Iop_Or32,
14060                      binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
14061                      binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
14062
14063         putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
14064
14065         if (bZ) {
14066            DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
14067         } else {
14068            DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
14069                nCC(conq), fD, fM);
14070         }
14071         goto decode_success_vfp;
14072      }
14073      /* fall through */
14074   }
14075
14076   /* --------------------- unary (S) --------------------- */
14077   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14078       && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
14079       && BITS4(1,0,1,0) == INSN(11,8)
14080       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14081      UInt bD = (insn28 >> 22) & 1;
14082      UInt bM = (insn28 >> 5) & 1;
14083      UInt fD  = (INSN(15,12) << 1) | bD;
14084      UInt fM  = (INSN(3,0) << 1) | bM;
14085      UInt b16 = (insn28 >> 16) & 1;
14086      UInt b7  = (insn28 >> 7) & 1;
14087      /**/ if (b16 == 0 && b7 == 0) {
14088         // FCPYS
14089         putFReg(fD, getFReg(fM), condT);
14090         DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
14091         goto decode_success_vfp;
14092      }
14093      else if (b16 == 0 && b7 == 1) {
14094         // FABSS
14095         putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
14096         DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
14097         goto decode_success_vfp;
14098      }
14099      else if (b16 == 1 && b7 == 0) {
14100         // FNEGS
14101         putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
14102         DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
14103         goto decode_success_vfp;
14104      }
14105      else if (b16 == 1 && b7 == 1) {
14106         // FSQRTS
14107         IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
14108         putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
14109         DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
14110         goto decode_success_vfp;
14111      }
14112      else
14113         vassert(0);
14114
14115      /* fall through */
14116   }
14117
14118   /* ----------------- I <-> S conversions ----------------- */
14119
14120   // F{S,U}ITOS fD, fM
14121   /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
14122      bit int will always fit within the 53 bit mantissa, so there's
14123      no possibility of a loss of precision, but that's obviously not
14124      the case here.  Hence this case possibly requires rounding, and
14125      so it drags in the current rounding mode. */
14126   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14127       && BITS4(1,0,0,0) == INSN(19,16)
14128       && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
14129       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14130      UInt bM    = (insn28 >> 5) & 1;
14131      UInt bD    = (insn28 >> 22) & 1;
14132      UInt fM    = (INSN(3,0) << 1) | bM;
14133      UInt fD    = (INSN(15,12) << 1) | bD;
14134      UInt syned = (insn28 >> 7) & 1;
14135      IRTemp rmode = newTemp(Ity_I32);
14136      assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
14137      if (syned) {
14138         // FSITOS
14139         putFReg(fD, binop(Iop_F64toF32,
14140                           mkexpr(rmode),
14141                           unop(Iop_I32StoF64,
14142                                unop(Iop_ReinterpF32asI32, getFReg(fM)))),
14143                 condT);
14144         DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
14145      } else {
14146         // FUITOS
14147         putFReg(fD, binop(Iop_F64toF32,
14148                           mkexpr(rmode),
14149                           unop(Iop_I32UtoF64,
14150                                unop(Iop_ReinterpF32asI32, getFReg(fM)))),
14151                 condT);
14152         DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
14153      }
14154      goto decode_success_vfp;
14155   }
14156
14157   // FTO{S,U}IS fD, fM
14158   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14159       && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
14160       && BITS4(1,0,1,0) == INSN(11,8)
14161       && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
14162      UInt   bM    = (insn28 >> 5) & 1;
14163      UInt   bD    = (insn28 >> 22) & 1;
14164      UInt   fD    = (INSN(15,12) << 1) | bD;
14165      UInt   fM    = (INSN(3,0) << 1) | bM;
14166      UInt   bZ    = (insn28 >> 7) & 1;
14167      UInt   syned = (insn28 >> 16) & 1;
14168      IRTemp rmode = newTemp(Ity_I32);
14169      assign(rmode, bZ ? mkU32(Irrm_ZERO)
14170                       : mkexpr(mk_get_IR_rounding_mode()));
14171      if (syned) {
14172         // FTOSIS
14173         putFReg(fD, unop(Iop_ReinterpI32asF32,
14174                          binop(Iop_F64toI32S, mkexpr(rmode),
14175                                unop(Iop_F32toF64, getFReg(fM)))),
14176                 condT);
14177         DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
14178             nCC(conq), fD, fM);
14179         goto decode_success_vfp;
14180      } else {
14181         // FTOUIS
14182         putFReg(fD, unop(Iop_ReinterpI32asF32,
14183                          binop(Iop_F64toI32U, mkexpr(rmode),
14184                                unop(Iop_F32toF64, getFReg(fM)))),
14185                 condT);
14186         DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
14187             nCC(conq), fD, fM);
14188         goto decode_success_vfp;
14189      }
14190   }
14191
14192   /* ----------------- S <-> D conversions ----------------- */
14193
14194   // FCVTDS
14195   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14196       && BITS4(0,1,1,1) == INSN(19,16)
14197       && BITS4(1,0,1,0) == INSN(11,8)
14198       && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
14199      UInt dD = INSN(15,12) | (INSN(22,22) << 4);
14200      UInt bM = (insn28 >> 5) & 1;
14201      UInt fM = (INSN(3,0) << 1) | bM;
14202      putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
14203      DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
14204      goto decode_success_vfp;
14205   }
14206
14207   // FCVTSD
14208   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14209       && BITS4(0,1,1,1) == INSN(19,16)
14210       && BITS4(1,0,1,1) == INSN(11,8)
14211       && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
14212      UInt   bD    = (insn28 >> 22) & 1;
14213      UInt   fD    = (INSN(15,12) << 1) | bD;
14214      UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
14215      IRTemp rmode = newTemp(Ity_I32);
14216      assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
14217      putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
14218                  condT);
14219      DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
14220      goto decode_success_vfp;
14221   }
14222
14223   /* --------------- VCVT fixed<->floating, VFP --------------- */
14224   /*          31   27   23   19   15 11   7    3
14225                 28   24   20   16 12    8    4    0
14226
14227               cond 1110 1D11 1p1U Vd 101f x1i0 imm4
14228
14229      VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
14230      VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
14231      VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
14232      VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
14233      are of this form.  We only handle a subset of the cases though.
14234   */
14235   if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14236       && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
14237       && BITS3(1,0,1) == INSN(11,9)
14238       && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
14239      UInt bD        = INSN(22,22);
14240      UInt bOP       = INSN(18,18);
14241      UInt bU        = INSN(16,16);
14242      UInt Vd        = INSN(15,12);
14243      UInt bSF       = INSN(8,8);
14244      UInt bSX       = INSN(7,7);
14245      UInt bI        = INSN(5,5);
14246      UInt imm4      = INSN(3,0);
14247      Bool to_fixed  = bOP == 1;
14248      Bool dp_op     = bSF == 1;
14249      Bool unsyned   = bU == 1;
14250      UInt size      = bSX == 0 ? 16 : 32;
14251      Int  frac_bits = size - ((imm4 << 1) | bI);
14252      UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
14253      if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && size == 32) {
14254         /* dp_op == 0 : VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
14255         /* dp_op == 1 : VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
14256         /* This generates really horrible code.  We could potentially
14257            do much better. */
14258         IRTemp rmode = newTemp(Ity_I32);
14259         assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
14260         IRTemp src32 = newTemp(Ity_I32);
14261         if (dp_op == 0) {
14262            assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
14263         } else {
14264            /* Example code sequence of using vcvt.f64.s32. The s32 value is
14265               initialized in s14 but loaded via d7 (s14 is the low half of
14266               d7), so we need to decode the register using getDReg instead of
14267               getFReg. Since the conversion size is from s32 to f64, we also
14268               need to explicitly extract the low half of i64 here.
14269
14270               81a0:       ee07 2a10       vmov            s14, r2
14271               81a4:       eeba 7bef       vcvt.f64.s32    d7, d7, #1
14272             */
14273            IRTemp src64 = newTemp(Ity_I64);
14274            assign(src64,  unop(Iop_ReinterpF64asI64, getDReg(d)));
14275            assign(src32, unop(Iop_64to32, mkexpr(src64)));
14276         }
14277         IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
14278                                mkexpr(src32 ) );
14279         IRTemp scale = newTemp(Ity_F64);
14280         assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
14281         IRExpr* rm     = mkU32(Irrm_NEAREST);
14282         IRExpr* resF64 = triop(Iop_DivF64,
14283                                rm, as_F64,
14284                                triop(Iop_AddF64, rm, mkexpr(scale),
14285                                                      mkexpr(scale)));
14286         if (dp_op == 0) {
14287            IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
14288            putFReg(d, resF32, condT);
14289            DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
14290                unsyned ? 'u' : 's', d, d, frac_bits);
14291         } else {
14292            putDReg(d, resF64, condT);
14293            DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
14294                unsyned ? 'u' : 's', d, d, frac_bits);
14295         }
14296         goto decode_success_vfp;
14297      }
14298      if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
14299                                            && size == 32) {
14300         /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
14301         /* This generates really horrible code.  We could potentially
14302            do much better. */
14303         IRTemp src32 = newTemp(Ity_I32);
14304         assign(src32, unop(Iop_64to32, getDRegI64(d)));
14305         IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
14306                                mkexpr(src32 ) );
14307         IRTemp scale = newTemp(Ity_F64);
14308         assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
14309         IRExpr* rm     = mkU32(Irrm_NEAREST);
14310         IRExpr* resF64 = triop(Iop_DivF64,
14311                                rm, as_F64,
14312                                triop(Iop_AddF64, rm, mkexpr(scale),
14313                                                      mkexpr(scale)));
14314         putDReg(d, resF64, condT);
14315         DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
14316             unsyned ? 'u' : 's', d, d, frac_bits);
14317         goto decode_success_vfp;
14318      }
14319      if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
14320                                            && size == 32) {
14321         /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
14322         IRTemp srcF64 = newTemp(Ity_F64);
14323         assign(srcF64, getDReg(d));
14324         IRTemp scale = newTemp(Ity_F64);
14325         assign(scale, unop(Iop_I32UtoF64, mkU32( 1 << (frac_bits-1) )));
14326         IRTemp scaledF64 = newTemp(Ity_F64);
14327         IRExpr* rm = mkU32(Irrm_NEAREST);
14328         assign(scaledF64, triop(Iop_MulF64,
14329                                 rm, mkexpr(srcF64),
14330                                 triop(Iop_AddF64, rm, mkexpr(scale),
14331                                                       mkexpr(scale))));
14332         IRTemp rmode = newTemp(Ity_I32);
14333         assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
14334         IRTemp asI32 = newTemp(Ity_I32);
14335         assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
14336                             mkexpr(rmode), mkexpr(scaledF64)));
14337         putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
14338                            mkexpr(asI32)), condT);
14339         goto decode_success_vfp;
14340      }
14341      /* fall through */
14342   }
14343
14344   /* FAILURE */
14345   return False;
14346
14347  decode_success_vfp:
14348   /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
14349      assert that we aren't accepting, in this fn, insns that actually
14350      should be handled somewhere else. */
14351   vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
14352   return True;
14353
14354#  undef INSN
14355}
14356
14357
14358/*------------------------------------------------------------*/
14359/*--- Instructions in NV (never) space                     ---*/
14360/*------------------------------------------------------------*/
14361
14362/* ARM only */
14363/* Translate a NV space instruction.  If successful, returns True and
14364   *dres may or may not be updated.  If failure, returns False and
14365   doesn't change *dres nor create any IR.
14366
14367   Note that all NEON instructions (in ARM mode) are handled through
14368   here, since they are all in NV space.
14369*/
14370static Bool decode_NV_instruction ( /*MOD*/DisResult* dres,
14371                                    VexArchInfo* archinfo,
14372                                    UInt insn )
14373{
14374#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
14375#  define INSN_COND          SLICE_UInt(insn, 31, 28)
14376
14377   HChar dis_buf[128];
14378
14379   // Should only be called for NV instructions
14380   vassert(BITS4(1,1,1,1) == INSN_COND);
14381
14382   /* ------------------------ pld ------------------------ */
14383   if (BITS8(0,1,0,1, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
14384       && BITS4(1,1,1,1) == INSN(15,12)) {
14385      UInt rN    = INSN(19,16);
14386      UInt imm12 = INSN(11,0);
14387      UInt bU    = INSN(23,23);
14388      DIP("pld [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
14389      return True;
14390   }
14391
14392   if (BITS8(0,1,1,1, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
14393       && BITS4(1,1,1,1) == INSN(15,12)
14394       && 0 == INSN(4,4)) {
14395      UInt rN   = INSN(19,16);
14396      UInt rM   = INSN(3,0);
14397      UInt imm5 = INSN(11,7);
14398      UInt sh2  = INSN(6,5);
14399      UInt bU   = INSN(23,23);
14400      if (rM != 15) {
14401         IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
14402                                                       sh2, imm5, dis_buf);
14403         IRTemp eaT = newTemp(Ity_I32);
14404         /* Bind eaE to a temp merely for debugging-vex purposes, so we
14405            can check it's a plausible decoding.  It will get removed
14406            by iropt a little later on. */
14407         vassert(eaE);
14408         assign(eaT, eaE);
14409         DIP("pld %s\n", dis_buf);
14410         return True;
14411      }
14412      /* fall through */
14413   }
14414
14415   /* ------------------------ pli ------------------------ */
14416   if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
14417       && BITS4(1,1,1,1) == INSN(15,12)) {
14418      UInt rN    = INSN(19,16);
14419      UInt imm12 = INSN(11,0);
14420      UInt bU    = INSN(23,23);
14421      DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
14422      return True;
14423   }
14424
14425   /* --------------------- Interworking branches --------------------- */
14426
14427   // BLX (1), viz, unconditional branch and link to R15+simm24
14428   // and set CPSR.T = 1, that is, switch to Thumb mode
14429   if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
14430      UInt bitH   = INSN(24,24);
14431      Int  uimm24 = INSN(23,0);
14432      Int  simm24 = (((uimm24 << 8) >> 8) << 2) + (bitH << 1);
14433      /* Now this is a bit tricky.  Since we're decoding an ARM insn,
14434         it is implies that CPSR.T == 0.  Hence the current insn's
14435         address is guaranteed to be of the form X--(30)--X00.  So, no
14436         need to mask any bits off it.  But need to set the lowest bit
14437         to 1 to denote we're in Thumb mode after this, since
14438         guest_R15T has CPSR.T as the lowest bit.  And we can't chase
14439         into the call, so end the block at this point. */
14440      UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
14441      putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
14442                    IRTemp_INVALID/*because AL*/, Ijk_Boring );
14443      llPutIReg(15, mkU32(dst));
14444      dres->jk_StopHere = Ijk_Call;
14445      dres->whatNext    = Dis_StopHere;
14446      DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
14447      return True;
14448   }
14449
14450   /* ------------------- v7 barrier insns ------------------- */
14451   switch (insn) {
14452      case 0xF57FF06F: /* ISB */
14453         stmt( IRStmt_MBE(Imbe_Fence) );
14454         DIP("ISB\n");
14455         return True;
14456      case 0xF57FF04F: /* DSB sy */
14457      case 0xF57FF04E: /* DSB st */
14458      case 0xF57FF04B: /* DSB ish */
14459      case 0xF57FF04A: /* DSB ishst */
14460      case 0xF57FF047: /* DSB nsh */
14461      case 0xF57FF046: /* DSB nshst */
14462      case 0xF57FF043: /* DSB osh */
14463      case 0xF57FF042: /* DSB oshst */
14464         stmt( IRStmt_MBE(Imbe_Fence) );
14465         DIP("DSB\n");
14466         return True;
14467      case 0xF57FF05F: /* DMB sy */
14468      case 0xF57FF05E: /* DMB st */
14469      case 0xF57FF05B: /* DMB ish */
14470      case 0xF57FF05A: /* DMB ishst */
14471      case 0xF57FF057: /* DMB nsh */
14472      case 0xF57FF056: /* DMB nshst */
14473      case 0xF57FF053: /* DMB osh */
14474      case 0xF57FF052: /* DMB oshst */
14475         stmt( IRStmt_MBE(Imbe_Fence) );
14476         DIP("DMB\n");
14477         return True;
14478      default:
14479         break;
14480   }
14481
14482   /* ------------------- CLREX ------------------ */
14483   if (insn == 0xF57FF01F) {
14484      /* AFAICS, this simply cancels a (all?) reservations made by a
14485         (any?) preceding LDREX(es).  Arrange to hand it through to
14486         the back end. */
14487      stmt( IRStmt_MBE(Imbe_CancelReservation) );
14488      DIP("clrex\n");
14489      return True;
14490   }
14491
14492   /* ------------------- NEON ------------------- */
14493   if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
14494      Bool ok_neon = decode_NEON_instruction(
14495                        dres, insn, IRTemp_INVALID/*unconditional*/,
14496                        False/*!isT*/
14497                     );
14498      if (ok_neon)
14499         return True;
14500   }
14501
14502   // unrecognised
14503   return False;
14504
14505#  undef INSN_COND
14506#  undef INSN
14507}
14508
14509
14510/*------------------------------------------------------------*/
14511/*--- Disassemble a single ARM instruction                 ---*/
14512/*------------------------------------------------------------*/
14513
14514/* Disassemble a single ARM instruction into IR.  The instruction is
14515   located in host memory at guest_instr, and has (decoded) guest IP
14516   of guest_R15_curr_instr_notENC, which will have been set before the
14517   call here. */
14518
14519static
14520DisResult disInstr_ARM_WRK (
14521             Bool         (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
14522             Bool         resteerCisOk,
14523             void*        callback_opaque,
14524             UChar*       guest_instr,
14525             VexArchInfo* archinfo,
14526             VexAbiInfo*  abiinfo,
14527             Bool         sigill_diag
14528          )
14529{
14530   // A macro to fish bits out of 'insn'.
14531#  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
14532#  define INSN_COND          SLICE_UInt(insn, 31, 28)
14533
14534   DisResult dres;
14535   UInt      insn;
14536   //Bool      allow_VFP = False;
14537   //UInt      hwcaps = archinfo->hwcaps;
14538   IRTemp    condT; /* :: Ity_I32 */
14539   UInt      summary;
14540   HChar     dis_buf[128];  // big enough to hold LDMIA etc text
14541
14542   /* What insn variants are we supporting today? */
14543   //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14544   // etc etc
14545
14546   /* Set result defaults. */
14547   dres.whatNext    = Dis_Continue;
14548   dres.len         = 4;
14549   dres.continueAt  = 0;
14550   dres.jk_StopHere = Ijk_INVALID;
14551
14552   /* Set default actions for post-insn handling of writes to r15, if
14553      required. */
14554   r15written = False;
14555   r15guard   = IRTemp_INVALID; /* unconditional */
14556   r15kind    = Ijk_Boring;
14557
14558   /* At least this is simple on ARM: insns are all 4 bytes long, and
14559      4-aligned.  So just fish the whole thing out of memory right now
14560      and have done. */
14561   insn = getUIntLittleEndianly( guest_instr );
14562
14563   if (0) vex_printf("insn: 0x%x\n", insn);
14564
14565   DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
14566
14567   vassert(0 == (guest_R15_curr_instr_notENC & 3));
14568
14569   /* ----------------------------------------------------------- */
14570
14571   /* Spot "Special" instructions (see comment at top of file). */
14572   {
14573      UChar* code = (UChar*)guest_instr;
14574      /* Spot the 16-byte preamble:
14575
14576         e1a0c1ec  mov r12, r12, ROR #3
14577         e1a0c6ec  mov r12, r12, ROR #13
14578         e1a0ceec  mov r12, r12, ROR #29
14579         e1a0c9ec  mov r12, r12, ROR #19
14580      */
14581      UInt word1 = 0xE1A0C1EC;
14582      UInt word2 = 0xE1A0C6EC;
14583      UInt word3 = 0xE1A0CEEC;
14584      UInt word4 = 0xE1A0C9EC;
14585      if (getUIntLittleEndianly(code+ 0) == word1 &&
14586          getUIntLittleEndianly(code+ 4) == word2 &&
14587          getUIntLittleEndianly(code+ 8) == word3 &&
14588          getUIntLittleEndianly(code+12) == word4) {
14589         /* Got a "Special" instruction preamble.  Which one is it? */
14590         if (getUIntLittleEndianly(code+16) == 0xE18AA00A
14591                                               /* orr r10,r10,r10 */) {
14592            /* R3 = client_request ( R4 ) */
14593            DIP("r3 = client_request ( %%r4 )\n");
14594            llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
14595            dres.jk_StopHere = Ijk_ClientReq;
14596            dres.whatNext    = Dis_StopHere;
14597            goto decode_success;
14598         }
14599         else
14600         if (getUIntLittleEndianly(code+16) == 0xE18BB00B
14601                                               /* orr r11,r11,r11 */) {
14602            /* R3 = guest_NRADDR */
14603            DIP("r3 = guest_NRADDR\n");
14604            dres.len = 20;
14605            llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
14606            goto decode_success;
14607         }
14608         else
14609         if (getUIntLittleEndianly(code+16) == 0xE18CC00C
14610                                               /* orr r12,r12,r12 */) {
14611            /*  branch-and-link-to-noredir R4 */
14612            DIP("branch-and-link-to-noredir r4\n");
14613            llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
14614            llPutIReg(15, llGetIReg(4));
14615            dres.jk_StopHere = Ijk_NoRedir;
14616            dres.whatNext    = Dis_StopHere;
14617            goto decode_success;
14618         }
14619         else
14620         if (getUIntLittleEndianly(code+16) == 0xE1899009
14621                                               /* orr r9,r9,r9 */) {
14622            /* IR injection */
14623            DIP("IR injection\n");
14624            vex_inject_ir(irsb, Iend_LE);
14625            // Invalidate the current insn. The reason is that the IRop we're
14626            // injecting here can change. In which case the translation has to
14627            // be redone. For ease of handling, we simply invalidate all the
14628            // time.
14629            stmt(IRStmt_Put(OFFB_TISTART, mkU32(guest_R15_curr_instr_notENC)));
14630            stmt(IRStmt_Put(OFFB_TILEN,   mkU32(20)));
14631            llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
14632            dres.whatNext    = Dis_StopHere;
14633            dres.jk_StopHere = Ijk_TInval;
14634            goto decode_success;
14635         }
14636         /* We don't know what it is.  Set opc1/opc2 so decode_failure
14637            can print the insn following the Special-insn preamble. */
14638         insn = getUIntLittleEndianly(code+16);
14639         goto decode_failure;
14640         /*NOTREACHED*/
14641      }
14642
14643   }
14644
14645   /* ----------------------------------------------------------- */
14646
14647   /* Main ARM instruction decoder starts here. */
14648
14649   /* Deal with the condition.  Strategy is to merely generate a
14650      condition temporary at this point (or IRTemp_INVALID, meaning
14651      unconditional).  We leave it to lower-level instruction decoders
14652      to decide whether they can generate straight-line code, or
14653      whether they must generate a side exit before the instruction.
14654      condT :: Ity_I32 and is always either zero or one. */
14655   condT = IRTemp_INVALID;
14656   switch ( (ARMCondcode)INSN_COND ) {
14657      case ARMCondNV: {
14658         // Illegal instruction prior to v5 (see ARM ARM A3-5), but
14659         // some cases are acceptable
14660         Bool ok = decode_NV_instruction(&dres, archinfo, insn);
14661         if (ok)
14662            goto decode_success;
14663         else
14664            goto decode_failure;
14665      }
14666      case ARMCondAL: // Always executed
14667         break;
14668      case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
14669      case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
14670      case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
14671      case ARMCondGT: case ARMCondLE:
14672         condT = newTemp(Ity_I32);
14673         assign( condT, mk_armg_calculate_condition( INSN_COND ));
14674         break;
14675   }
14676
14677   /* ----------------------------------------------------------- */
14678   /* -- ARMv5 integer instructions                            -- */
14679   /* ----------------------------------------------------------- */
14680
14681   /* ---------------- Data processing ops ------------------- */
14682
14683   if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
14684       && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
14685      IRTemp  shop = IRTemp_INVALID; /* shifter operand */
14686      IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
14687      UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
14688      UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
14689      UInt    bitS = (insn >> 20) & 1; /* 20:20 */
14690      IRTemp  rNt  = IRTemp_INVALID;
14691      IRTemp  res  = IRTemp_INVALID;
14692      IRTemp  oldV = IRTemp_INVALID;
14693      IRTemp  oldC = IRTemp_INVALID;
14694      const HChar*  name = NULL;
14695      IROp    op   = Iop_INVALID;
14696      Bool    ok;
14697
14698      switch (INSN(24,21)) {
14699
14700         /* --------- ADD, SUB, AND, OR --------- */
14701         case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
14702            name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
14703         case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
14704            name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
14705         case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
14706            name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
14707         case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
14708            name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
14709         case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
14710            name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
14711         case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
14712            name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
14713         case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
14714            name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
14715         rd_eq_rn_op_SO: {
14716            Bool isRSB = False;
14717            Bool isBIC = False;
14718            switch (INSN(24,21)) {
14719               case BITS4(0,0,1,1):
14720                  vassert(op == Iop_Sub32); isRSB = True; break;
14721               case BITS4(1,1,1,0):
14722                  vassert(op == Iop_And32); isBIC = True; break;
14723               default:
14724                  break;
14725            }
14726            rNt = newTemp(Ity_I32);
14727            assign(rNt, getIRegA(rN));
14728            ok = mk_shifter_operand(
14729                    INSN(25,25), INSN(11,0),
14730                    &shop, bitS ? &shco : NULL, dis_buf
14731                 );
14732            if (!ok)
14733               break;
14734            res = newTemp(Ity_I32);
14735            // compute the main result
14736            if (isRSB) {
14737               // reverse-subtract: shifter_operand - Rn
14738               vassert(op == Iop_Sub32);
14739               assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
14740            } else if (isBIC) {
14741               // andn: shifter_operand & ~Rn
14742               vassert(op == Iop_And32);
14743               assign(res, binop(op, mkexpr(rNt),
14744                                     unop(Iop_Not32, mkexpr(shop))) );
14745            } else {
14746               // normal: Rn op shifter_operand
14747               assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
14748            }
14749            // but don't commit it until after we've finished
14750            // all necessary reads from the guest state
14751            if (bitS
14752                && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
14753               oldV = newTemp(Ity_I32);
14754               assign( oldV, mk_armg_calculate_flag_v() );
14755            }
14756            // can't safely read guest state after here
14757            // now safe to put the main result
14758            putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
14759            // XXXX!! not safe to read any guest state after
14760            // this point (I think the code below doesn't do that).
14761            if (!bitS)
14762               vassert(shco == IRTemp_INVALID);
14763            /* Update the flags thunk if necessary */
14764            if (bitS) {
14765               vassert(shco != IRTemp_INVALID);
14766               switch (op) {
14767                  case Iop_Add32:
14768                     setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
14769                     break;
14770                  case Iop_Sub32:
14771                     if (isRSB) {
14772                        setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
14773                     } else {
14774                        setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
14775                     }
14776                     break;
14777                  case Iop_And32: /* BIC and AND set the flags the same */
14778                  case Iop_Or32:
14779                  case Iop_Xor32:
14780                     // oldV has been read just above
14781                     setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
14782                                        res, shco, oldV, condT );
14783                     break;
14784                  default:
14785                     vassert(0);
14786               }
14787            }
14788            DIP("%s%s%s r%u, r%u, %s\n",
14789                name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
14790            goto decode_success;
14791         }
14792
14793         /* --------- MOV, MVN --------- */
14794         case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
14795         case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
14796            Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
14797            IRTemp jk = Ijk_Boring;
14798            if (rN != 0)
14799               break; /* rN must be zero */
14800            ok = mk_shifter_operand(
14801                    INSN(25,25), INSN(11,0),
14802                    &shop, bitS ? &shco : NULL, dis_buf
14803                 );
14804            if (!ok)
14805               break;
14806            res = newTemp(Ity_I32);
14807            assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
14808                               : mkexpr(shop) );
14809            if (bitS) {
14810               vassert(shco != IRTemp_INVALID);
14811               oldV = newTemp(Ity_I32);
14812               assign( oldV, mk_armg_calculate_flag_v() );
14813            } else {
14814               vassert(shco == IRTemp_INVALID);
14815            }
14816            /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
14817                return for purposes of branch prediction. */
14818            if (!isMVN && INSN(11,0) == 14) {
14819              jk = Ijk_Ret;
14820            }
14821            // can't safely read guest state after here
14822            putIRegA( rD, mkexpr(res), condT, jk );
14823            /* Update the flags thunk if necessary */
14824            if (bitS) {
14825               setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
14826                                  res, shco, oldV, condT );
14827            }
14828            DIP("%s%s%s r%u, %s\n",
14829                isMVN ? "mvn" : "mov",
14830                nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
14831            goto decode_success;
14832         }
14833
14834         /* --------- CMP --------- */
14835         case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
14836         case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
14837            Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
14838            if (rD != 0)
14839               break; /* rD must be zero */
14840            if (bitS == 0)
14841               break; /* if S (bit 20) is not set, it's not CMP/CMN */
14842            rNt = newTemp(Ity_I32);
14843            assign(rNt, getIRegA(rN));
14844            ok = mk_shifter_operand(
14845                    INSN(25,25), INSN(11,0),
14846                    &shop, NULL, dis_buf
14847                 );
14848            if (!ok)
14849               break;
14850            // can't safely read guest state after here
14851            /* Update the flags thunk. */
14852            setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
14853                            rNt, shop, condT );
14854            DIP("%s%s r%u, %s\n",
14855                isCMN ? "cmn" : "cmp",
14856                nCC(INSN_COND), rN, dis_buf );
14857            goto decode_success;
14858         }
14859
14860         /* --------- TST --------- */
14861         case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
14862         case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
14863            Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
14864            if (rD != 0)
14865               break; /* rD must be zero */
14866            if (bitS == 0)
14867               break; /* if S (bit 20) is not set, it's not TST/TEQ */
14868            rNt = newTemp(Ity_I32);
14869            assign(rNt, getIRegA(rN));
14870            ok = mk_shifter_operand(
14871                    INSN(25,25), INSN(11,0),
14872                    &shop, &shco, dis_buf
14873                 );
14874            if (!ok)
14875               break;
14876            /* Update the flags thunk. */
14877            res = newTemp(Ity_I32);
14878            assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
14879                               mkexpr(rNt), mkexpr(shop)) );
14880            oldV = newTemp(Ity_I32);
14881            assign( oldV, mk_armg_calculate_flag_v() );
14882            // can't safely read guest state after here
14883            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
14884                               res, shco, oldV, condT );
14885            DIP("%s%s r%u, %s\n",
14886                isTEQ ? "teq" : "tst",
14887                nCC(INSN_COND), rN, dis_buf );
14888            goto decode_success;
14889         }
14890
14891         /* --------- ADC, SBC, RSC --------- */
14892         case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
14893            name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
14894         case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
14895            name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
14896         case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
14897            name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
14898         rd_eq_rn_op_SO_op_oldC: {
14899            // FIXME: shco isn't used for anything.  Get rid of it.
14900            rNt = newTemp(Ity_I32);
14901            assign(rNt, getIRegA(rN));
14902            ok = mk_shifter_operand(
14903                    INSN(25,25), INSN(11,0),
14904                    &shop, bitS ? &shco : NULL, dis_buf
14905                 );
14906            if (!ok)
14907               break;
14908            oldC = newTemp(Ity_I32);
14909            assign( oldC, mk_armg_calculate_flag_c() );
14910            res = newTemp(Ity_I32);
14911            // compute the main result
14912            switch (INSN(24,21)) {
14913               case BITS4(0,1,0,1): /* ADC */
14914                  assign(res,
14915                         binop(Iop_Add32,
14916                               binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
14917                               mkexpr(oldC) ));
14918                  break;
14919               case BITS4(0,1,1,0): /* SBC */
14920                  assign(res,
14921                         binop(Iop_Sub32,
14922                               binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
14923                               binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
14924                  break;
14925               case BITS4(0,1,1,1): /* RSC */
14926                  assign(res,
14927                         binop(Iop_Sub32,
14928                               binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
14929                               binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
14930                  break;
14931               default:
14932                  vassert(0);
14933            }
14934            // but don't commit it until after we've finished
14935            // all necessary reads from the guest state
14936            // now safe to put the main result
14937            putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
14938            // XXXX!! not safe to read any guest state after
14939            // this point (I think the code below doesn't do that).
14940            if (!bitS)
14941               vassert(shco == IRTemp_INVALID);
14942            /* Update the flags thunk if necessary */
14943            if (bitS) {
14944               vassert(shco != IRTemp_INVALID);
14945               switch (INSN(24,21)) {
14946                  case BITS4(0,1,0,1): /* ADC */
14947                     setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
14948                                        rNt, shop, oldC, condT );
14949                     break;
14950                  case BITS4(0,1,1,0): /* SBC */
14951                     setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
14952                                        rNt, shop, oldC, condT );
14953                     break;
14954                  case BITS4(0,1,1,1): /* RSC */
14955                     setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
14956                                        shop, rNt, oldC, condT );
14957                     break;
14958                  default:
14959                     vassert(0);
14960               }
14961            }
14962            DIP("%s%s%s r%u, r%u, %s\n",
14963                name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
14964            goto decode_success;
14965         }
14966
14967         default:
14968            vassert(0);
14969      }
14970   } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
14971
14972   /* --------------------- Load/store (ubyte & word) -------- */
14973   // LDR STR LDRB STRB
14974   /*                 31   27   23   19 15 11    6   4 3  # highest bit
14975                        28   24   20 16 12
14976      A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
14977      A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
14978      A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
14979      A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
14980      A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
14981      A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
14982   */
14983   /* case coding:
14984             1   at-ea               (access at ea)
14985             2   at-ea-then-upd      (access at ea, then Rn = ea)
14986             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
14987      ea coding
14988             16  Rn +/- imm12
14989             32  Rn +/- Rm sh2 imm5
14990   */
14991   /* Quickly skip over all of this for hopefully most instructions */
14992   if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
14993      goto after_load_store_ubyte_or_word;
14994
14995   summary = 0;
14996
14997   /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
14998      summary = 1 | 16;
14999   }
15000   else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
15001                                          && INSN(4,4) == 0) {
15002      summary = 1 | 32;
15003   }
15004   else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
15005      summary = 2 | 16;
15006   }
15007   else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
15008                                          && INSN(4,4) == 0) {
15009      summary = 2 | 32;
15010   }
15011   else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
15012      summary = 3 | 16;
15013   }
15014   else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
15015                                          && INSN(4,4) == 0) {
15016      summary = 3 | 32;
15017   }
15018   else goto after_load_store_ubyte_or_word;
15019
15020   { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
15021     UInt rD = (insn >> 12) & 0xF; /* 15:12 */
15022     UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
15023     UInt bU = (insn >> 23) & 1;      /* 23 */
15024     UInt bB = (insn >> 22) & 1;      /* 22 */
15025     UInt bL = (insn >> 20) & 1;      /* 20 */
15026     UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
15027     UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
15028     UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
15029
15030     /* Skip some invalid cases, which would lead to two competing
15031        updates to the same register, or which are otherwise
15032        disallowed by the spec. */
15033     switch (summary) {
15034        case 1 | 16:
15035           break;
15036        case 1 | 32:
15037           if (rM == 15) goto after_load_store_ubyte_or_word;
15038           break;
15039        case 2 | 16: case 3 | 16:
15040           if (rN == 15) goto after_load_store_ubyte_or_word;
15041           if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
15042           break;
15043        case 2 | 32: case 3 | 32:
15044           if (rM == 15) goto after_load_store_ubyte_or_word;
15045           if (rN == 15) goto after_load_store_ubyte_or_word;
15046           if (rN == rM) goto after_load_store_ubyte_or_word;
15047           if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
15048           break;
15049        default:
15050           vassert(0);
15051     }
15052
15053     /* compute the effective address.  Bind it to a tmp since we
15054        may need to use it twice. */
15055     IRExpr* eaE = NULL;
15056     switch (summary & 0xF0) {
15057        case 16:
15058           eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
15059           break;
15060        case 32:
15061           eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
15062                                                  dis_buf );
15063           break;
15064     }
15065     vassert(eaE);
15066     IRTemp eaT = newTemp(Ity_I32);
15067     assign(eaT, eaE);
15068
15069     /* get the old Rn value */
15070     IRTemp rnT = newTemp(Ity_I32);
15071     assign(rnT, getIRegA(rN));
15072
15073     /* decide on the transfer address */
15074     IRTemp taT = IRTemp_INVALID;
15075     switch (summary & 0x0F) {
15076        case 1: case 2: taT = eaT; break;
15077        case 3:         taT = rnT; break;
15078     }
15079     vassert(taT != IRTemp_INVALID);
15080
15081     if (bL == 0) {
15082       /* Store.  If necessary, update the base register before the
15083          store itself, so that the common idiom of "str rX, [sp,
15084          #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
15085          rX") doesn't cause Memcheck to complain that the access is
15086          below the stack pointer.  Also, not updating sp before the
15087          store confuses Valgrind's dynamic stack-extending logic.  So
15088          do it before the store.  Hence we need to snarf the store
15089          data before doing the basereg update. */
15090
15091        /* get hold of the data to be stored */
15092        IRTemp rDt = newTemp(Ity_I32);
15093        assign(rDt, getIRegA(rD));
15094
15095        /* Update Rn if necessary. */
15096        switch (summary & 0x0F) {
15097           case 2: case 3:
15098              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
15099              break;
15100        }
15101
15102        /* generate the transfer */
15103        if (bB == 0) { // word store
15104           storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
15105        } else { // byte store
15106           vassert(bB == 1);
15107           storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
15108        }
15109
15110     } else {
15111        /* Load */
15112        vassert(bL == 1);
15113
15114        /* generate the transfer */
15115        if (bB == 0) { // word load
15116           IRTemp jk = Ijk_Boring;
15117           /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
15118               base register and PC as the destination register is a return for
15119               purposes of branch prediction.
15120              The ARM ARM Sec. C9.10.1 further specifies that it must use a
15121               post-increment by immediate addressing mode to be counted in
15122               event 0x0E (Procedure return).*/
15123           if (rN == 13 && summary == (3 | 16) && bB == 0) {
15124              jk = Ijk_Ret;
15125           }
15126           IRTemp tD = newTemp(Ity_I32);
15127           loadGuardedLE( tD, ILGop_Ident32,
15128                          mkexpr(taT), llGetIReg(rD), condT );
15129           /* "rD == 15 ? condT : IRTemp_INVALID": simply
15130              IRTemp_INVALID would be correct in all cases here, and
15131              for the non-r15 case it generates better code, by
15132              avoiding two tests of the cond (since it is already
15133              tested by loadGuardedLE).  However, the logic at the end
15134              of this function, that deals with writes to r15, has an
15135              optimisation which depends on seeing whether or not the
15136              write is conditional.  Hence in this particular case we
15137              let it "see" the guard condition. */
15138           putIRegA( rD, mkexpr(tD),
15139                     rD == 15 ? condT : IRTemp_INVALID, jk );
15140        } else { // byte load
15141           vassert(bB == 1);
15142           IRTemp tD = newTemp(Ity_I32);
15143           loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
15144           /* No point in similar 3rd arg complexity here, since we
15145              can't sanely write anything to r15 like this. */
15146           putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
15147        }
15148
15149        /* Update Rn if necessary. */
15150        switch (summary & 0x0F) {
15151           case 2: case 3:
15152              // should be assured by logic above:
15153              if (bL == 1)
15154                 vassert(rD != rN); /* since we just wrote rD */
15155              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
15156              break;
15157        }
15158     }
15159
15160     switch (summary & 0x0F) {
15161        case 1:  DIP("%sr%s%s r%u, %s\n",
15162                     bL == 0 ? "st" : "ld",
15163                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
15164                 break;
15165        case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
15166                     bL == 0 ? "st" : "ld",
15167                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
15168                 break;
15169        case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
15170                     bL == 0 ? "st" : "ld",
15171                     bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
15172                 break;
15173        default: vassert(0);
15174     }
15175
15176     /* XXX deal with alignment constraints */
15177
15178     goto decode_success;
15179
15180     /* Complications:
15181
15182        For all loads: if the Amode specifies base register
15183        writeback, and the same register is specified for Rd and Rn,
15184        the results are UNPREDICTABLE.
15185
15186        For all loads and stores: if R15 is written, branch to
15187        that address afterwards.
15188
15189        STRB: straightforward
15190        LDRB: loaded data is zero extended
15191        STR:  lowest 2 bits of address are ignored
15192        LDR:  if the lowest 2 bits of the address are nonzero
15193              then the loaded value is rotated right by 8 * the lowest 2 bits
15194     */
15195   }
15196
15197  after_load_store_ubyte_or_word:
15198
15199   /* --------------------- Load/store (sbyte & hword) -------- */
15200   // LDRH LDRSH STRH LDRSB
15201   /*                 31   27   23   19 15 11   7    3     # highest bit
15202                        28   24   20 16 12    8    4    0
15203      A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
15204      A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
15205      A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
15206      A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
15207      A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
15208      A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
15209   */
15210   /* case coding:
15211             1   at-ea               (access at ea)
15212             2   at-ea-then-upd      (access at ea, then Rn = ea)
15213             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
15214      ea coding
15215             16  Rn +/- imm8
15216             32  Rn +/- Rm
15217   */
15218   /* Quickly skip over all of this for hopefully most instructions */
15219   if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
15220      goto after_load_store_sbyte_or_hword;
15221
15222   /* Check the "1SH1" thing. */
15223   if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
15224      goto after_load_store_sbyte_or_hword;
15225
15226   summary = 0;
15227
15228   /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
15229      summary = 1 | 16;
15230   }
15231   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
15232      summary = 1 | 32;
15233   }
15234   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
15235      summary = 2 | 16;
15236   }
15237   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
15238      summary = 2 | 32;
15239   }
15240   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
15241      summary = 3 | 16;
15242   }
15243   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
15244      summary = 3 | 32;
15245   }
15246   else goto after_load_store_sbyte_or_hword;
15247
15248   { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
15249     UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
15250     UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
15251     UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
15252     UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
15253     UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
15254     UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
15255     UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
15256
15257     /* Skip combinations that are either meaningless or already
15258        handled by main word-or-unsigned-byte load-store
15259        instructions. */
15260     if (bS == 0 && bH == 0) /* "unsigned byte" */
15261        goto after_load_store_sbyte_or_hword;
15262     if (bS == 1 && bL == 0) /* "signed store" */
15263        goto after_load_store_sbyte_or_hword;
15264
15265     /* Require 11:8 == 0 for Rn +/- Rm cases */
15266     if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
15267        goto after_load_store_sbyte_or_hword;
15268
15269     /* Skip some invalid cases, which would lead to two competing
15270        updates to the same register, or which are otherwise
15271        disallowed by the spec. */
15272     switch (summary) {
15273        case 1 | 16:
15274           break;
15275        case 1 | 32:
15276           if (rM == 15) goto after_load_store_sbyte_or_hword;
15277           break;
15278        case 2 | 16: case 3 | 16:
15279           if (rN == 15) goto after_load_store_sbyte_or_hword;
15280           if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
15281           break;
15282        case 2 | 32: case 3 | 32:
15283           if (rM == 15) goto after_load_store_sbyte_or_hword;
15284           if (rN == 15) goto after_load_store_sbyte_or_hword;
15285           if (rN == rM) goto after_load_store_sbyte_or_hword;
15286           if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
15287           break;
15288        default:
15289           vassert(0);
15290     }
15291
15292     /* If this is a branch, make it unconditional at this point.
15293        Doing conditional branches in-line is too complex (for now).
15294        Note that you'd have to be insane to use any of these loads to
15295        do a branch, since they only load 16 bits at most, but we
15296        handle it just in case. */
15297     if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
15298        // go uncond
15299        mk_skip_over_A32_if_cond_is_false( condT );
15300        condT = IRTemp_INVALID;
15301        // now uncond
15302     }
15303
15304     /* compute the effective address.  Bind it to a tmp since we
15305        may need to use it twice. */
15306     IRExpr* eaE = NULL;
15307     switch (summary & 0xF0) {
15308        case 16:
15309           eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
15310           break;
15311        case 32:
15312           eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
15313           break;
15314     }
15315     vassert(eaE);
15316     IRTemp eaT = newTemp(Ity_I32);
15317     assign(eaT, eaE);
15318
15319     /* get the old Rn value */
15320     IRTemp rnT = newTemp(Ity_I32);
15321     assign(rnT, getIRegA(rN));
15322
15323     /* decide on the transfer address */
15324     IRTemp taT = IRTemp_INVALID;
15325     switch (summary & 0x0F) {
15326        case 1: case 2: taT = eaT; break;
15327        case 3:         taT = rnT; break;
15328     }
15329     vassert(taT != IRTemp_INVALID);
15330
15331     /* ll previous value of rD, for dealing with conditional loads */
15332     IRTemp llOldRd = newTemp(Ity_I32);
15333     assign(llOldRd, llGetIReg(rD));
15334
15335     /* halfword store  H 1  L 0  S 0
15336        uhalf load      H 1  L 1  S 0
15337        shalf load      H 1  L 1  S 1
15338        sbyte load      H 0  L 1  S 1
15339     */
15340     const HChar* name = NULL;
15341     /* generate the transfer */
15342     /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
15343        storeGuardedLE( mkexpr(taT),
15344                        unop(Iop_32to16, getIRegA(rD)), condT );
15345        name = "strh";
15346     }
15347     else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
15348        IRTemp newRd = newTemp(Ity_I32);
15349        loadGuardedLE( newRd, ILGop_16Uto32,
15350                       mkexpr(taT), mkexpr(llOldRd), condT );
15351        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
15352        name = "ldrh";
15353     }
15354     else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
15355        IRTemp newRd = newTemp(Ity_I32);
15356        loadGuardedLE( newRd, ILGop_16Sto32,
15357                       mkexpr(taT), mkexpr(llOldRd), condT );
15358        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
15359        name = "ldrsh";
15360     }
15361     else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
15362        IRTemp newRd = newTemp(Ity_I32);
15363        loadGuardedLE( newRd, ILGop_8Sto32,
15364                       mkexpr(taT), mkexpr(llOldRd), condT );
15365        putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
15366        name = "ldrsb";
15367     }
15368     else
15369        vassert(0); // should be assured by logic above
15370
15371     /* Update Rn if necessary. */
15372     switch (summary & 0x0F) {
15373        case 2: case 3:
15374           // should be assured by logic above:
15375           if (bL == 1)
15376              vassert(rD != rN); /* since we just wrote rD */
15377           putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
15378           break;
15379     }
15380
15381     switch (summary & 0x0F) {
15382        case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
15383                 break;
15384        case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
15385                     name, nCC(INSN_COND), rD, dis_buf);
15386                 break;
15387        case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
15388                     name, nCC(INSN_COND), rD, dis_buf);
15389                 break;
15390        default: vassert(0);
15391     }
15392
15393     /* XXX deal with alignment constraints */
15394
15395     goto decode_success;
15396
15397     /* Complications:
15398
15399        For all loads: if the Amode specifies base register
15400        writeback, and the same register is specified for Rd and Rn,
15401        the results are UNPREDICTABLE.
15402
15403        For all loads and stores: if R15 is written, branch to
15404        that address afterwards.
15405
15406        Misaligned halfword stores => Unpredictable
15407        Misaligned halfword loads  => Unpredictable
15408     */
15409   }
15410
15411  after_load_store_sbyte_or_hword:
15412
15413   /* --------------------- Load/store multiple -------------- */
15414   // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
15415   // Remarkably complex and difficult to get right
15416   // match 27:20 as 100XX0WL
15417   if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
15418      // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
15419      // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
15420      // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
15421      // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
15422      //                   28   24   20 16       0
15423
15424      UInt bINC    = (insn >> 23) & 1;
15425      UInt bBEFORE = (insn >> 24) & 1;
15426
15427      UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
15428      UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
15429      UInt rN      = (insn >> 16) & 0xF;
15430      UInt regList = insn & 0xFFFF;
15431      /* Skip some invalid cases, which would lead to two competing
15432         updates to the same register, or which are otherwise
15433         disallowed by the spec.  Note the test above has required
15434         that S == 0, since that looks like a kernel-mode only thing.
15435         Done by forcing the real pattern, viz 100XXSWL to actually be
15436         100XX0WL. */
15437      if (rN == 15) goto after_load_store_multiple;
15438      // reglist can't be empty
15439      if (regList == 0) goto after_load_store_multiple;
15440      // if requested to writeback Rn, and this is a load instruction,
15441      // then Rn can't appear in RegList, since we'd have two competing
15442      // new values for Rn.  We do however accept this case for store
15443      // instructions.
15444      if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
15445         goto after_load_store_multiple;
15446
15447      /* Now, we can't do a conditional load or store, since that very
15448         likely will generate an exception.  So we have to take a side
15449         exit at this point if the condition is false. */
15450      if (condT != IRTemp_INVALID) {
15451         mk_skip_over_A32_if_cond_is_false( condT );
15452         condT = IRTemp_INVALID;
15453      }
15454
15455      /* Ok, now we're unconditional.  Generate the IR. */
15456      mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
15457
15458      DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
15459          bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
15460          nCC(INSN_COND),
15461          rN, bW ? "!" : "", regList);
15462
15463      goto decode_success;
15464   }
15465
15466  after_load_store_multiple:
15467
15468   /* --------------------- Control flow --------------------- */
15469   // B, BL (Branch, or Branch-and-Link, to immediate offset)
15470   //
15471   if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
15472      UInt link   = (insn >> 24) & 1;
15473      UInt uimm24 = insn & ((1<<24)-1);
15474      Int  simm24 = (Int)uimm24;
15475      UInt dst    = guest_R15_curr_instr_notENC + 8
15476                    + (((simm24 << 8) >> 8) << 2);
15477      IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
15478      if (link) {
15479         putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
15480                      condT, Ijk_Boring);
15481      }
15482      if (condT == IRTemp_INVALID) {
15483         /* unconditional transfer to 'dst'.  See if we can simply
15484            continue tracing at the destination. */
15485         if (resteerOkFn( callback_opaque, (Addr64)dst )) {
15486            /* yes */
15487            dres.whatNext   = Dis_ResteerU;
15488            dres.continueAt = (Addr64)dst;
15489         } else {
15490            /* no; terminate the SB at this point. */
15491            llPutIReg(15, mkU32(dst));
15492            dres.jk_StopHere = jk;
15493            dres.whatNext    = Dis_StopHere;
15494         }
15495         DIP("b%s 0x%x\n", link ? "l" : "", dst);
15496      } else {
15497         /* conditional transfer to 'dst' */
15498         const HChar* comment = "";
15499
15500         /* First see if we can do some speculative chasing into one
15501            arm or the other.  Be conservative and only chase if
15502            !link, that is, this is a normal conditional branch to a
15503            known destination. */
15504         if (!link
15505             && resteerCisOk
15506             && vex_control.guest_chase_cond
15507             && dst < guest_R15_curr_instr_notENC
15508             && resteerOkFn( callback_opaque, (Addr64)(Addr32)dst) ) {
15509            /* Speculation: assume this backward branch is taken.  So
15510               we need to emit a side-exit to the insn following this
15511               one, on the negation of the condition, and continue at
15512               the branch target address (dst). */
15513            stmt( IRStmt_Exit( unop(Iop_Not1,
15514                                    unop(Iop_32to1, mkexpr(condT))),
15515                               Ijk_Boring,
15516                               IRConst_U32(guest_R15_curr_instr_notENC+4),
15517                               OFFB_R15T ));
15518            dres.whatNext   = Dis_ResteerC;
15519            dres.continueAt = (Addr64)(Addr32)dst;
15520            comment = "(assumed taken)";
15521         }
15522         else
15523         if (!link
15524             && resteerCisOk
15525             && vex_control.guest_chase_cond
15526             && dst >= guest_R15_curr_instr_notENC
15527             && resteerOkFn( callback_opaque,
15528                             (Addr64)(Addr32)
15529                                     (guest_R15_curr_instr_notENC+4)) ) {
15530            /* Speculation: assume this forward branch is not taken.
15531               So we need to emit a side-exit to dst (the dest) and
15532               continue disassembling at the insn immediately
15533               following this one. */
15534            stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
15535                               Ijk_Boring,
15536                               IRConst_U32(dst),
15537                               OFFB_R15T ));
15538            dres.whatNext   = Dis_ResteerC;
15539            dres.continueAt = (Addr64)(Addr32)
15540                                      (guest_R15_curr_instr_notENC+4);
15541            comment = "(assumed not taken)";
15542         }
15543         else {
15544            /* Conservative default translation - end the block at
15545               this point. */
15546            stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
15547                               jk, IRConst_U32(dst), OFFB_R15T ));
15548            llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
15549            dres.jk_StopHere = Ijk_Boring;
15550            dres.whatNext    = Dis_StopHere;
15551         }
15552         DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
15553             dst, comment);
15554      }
15555      goto decode_success;
15556   }
15557
15558   // B, BL (Branch, or Branch-and-Link, to a register)
15559   // NB: interworking branch
15560   if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
15561       && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
15562       && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
15563           || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
15564      IRTemp  dst = newTemp(Ity_I32);
15565      UInt    link = (INSN(11,4) >> 1) & 1;
15566      UInt    rM   = INSN(3,0);
15567      // we don't decode the case (link && rM == 15), as that's
15568      // Unpredictable.
15569      if (!(link && rM == 15)) {
15570         if (condT != IRTemp_INVALID) {
15571            mk_skip_over_A32_if_cond_is_false( condT );
15572         }
15573         // rM contains an interworking address exactly as we require
15574         // (with continuation CPSR.T in bit 0), so we can use it
15575         // as-is, with no masking.
15576         assign( dst, getIRegA(rM) );
15577         if (link) {
15578            putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
15579                      IRTemp_INVALID/*because AL*/, Ijk_Boring );
15580         }
15581         llPutIReg(15, mkexpr(dst));
15582         dres.jk_StopHere = link ? Ijk_Call
15583                                 : (rM == 14 ? Ijk_Ret : Ijk_Boring);
15584         dres.whatNext    = Dis_StopHere;
15585         if (condT == IRTemp_INVALID) {
15586            DIP("b%sx r%u\n", link ? "l" : "", rM);
15587         } else {
15588            DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
15589         }
15590         goto decode_success;
15591      }
15592      /* else: (link && rM == 15): just fall through */
15593   }
15594
15595   /* --- NB: ARM interworking branches are in NV space, hence
15596      are handled elsewhere by decode_NV_instruction.
15597      ---
15598   */
15599
15600   /* --------------------- Clz --------------------- */
15601   // CLZ
15602   if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
15603       && INSN(19,16) == BITS4(1,1,1,1)
15604       && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
15605      UInt rD = INSN(15,12);
15606      UInt rM = INSN(3,0);
15607      IRTemp arg = newTemp(Ity_I32);
15608      IRTemp res = newTemp(Ity_I32);
15609      assign(arg, getIRegA(rM));
15610      assign(res, IRExpr_ITE(
15611                     binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
15612                     mkU32(32),
15613                     unop(Iop_Clz32, mkexpr(arg))
15614            ));
15615      putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
15616      DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
15617      goto decode_success;
15618   }
15619
15620   /* --------------------- Mul etc --------------------- */
15621   // MUL
15622   if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
15623       && INSN(15,12) == BITS4(0,0,0,0)
15624       && INSN(7,4) == BITS4(1,0,0,1)) {
15625      UInt bitS = (insn >> 20) & 1; /* 20:20 */
15626      UInt rD = INSN(19,16);
15627      UInt rS = INSN(11,8);
15628      UInt rM = INSN(3,0);
15629      if (rD == 15 || rM == 15 || rS == 15) {
15630         /* Unpredictable; don't decode; fall through */
15631      } else {
15632         IRTemp argL = newTemp(Ity_I32);
15633         IRTemp argR = newTemp(Ity_I32);
15634         IRTemp res  = newTemp(Ity_I32);
15635         IRTemp oldC = IRTemp_INVALID;
15636         IRTemp oldV = IRTemp_INVALID;
15637         assign( argL, getIRegA(rM));
15638         assign( argR, getIRegA(rS));
15639         assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
15640         if (bitS) {
15641            oldC = newTemp(Ity_I32);
15642            assign(oldC, mk_armg_calculate_flag_c());
15643            oldV = newTemp(Ity_I32);
15644            assign(oldV, mk_armg_calculate_flag_v());
15645         }
15646         // now update guest state
15647         putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
15648         if (bitS) {
15649            IRTemp pair = newTemp(Ity_I32);
15650            assign( pair, binop(Iop_Or32,
15651                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15652                                mkexpr(oldV)) );
15653            setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
15654         }
15655         DIP("mul%c%s r%u, r%u, r%u\n",
15656             bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
15657         goto decode_success;
15658      }
15659      /* fall through */
15660   }
15661
15662   /* --------------------- Integer Divides --------------------- */
15663   // SDIV
15664   if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
15665       && INSN(15,12) == BITS4(1,1,1,1)
15666       && INSN(7,4) == BITS4(0,0,0,1)) {
15667      UInt rD = INSN(19,16);
15668      UInt rM = INSN(11,8);
15669      UInt rN = INSN(3,0);
15670      if (rD == 15 || rM == 15 || rN == 15) {
15671         /* Unpredictable; don't decode; fall through */
15672      } else {
15673         IRTemp res  = newTemp(Ity_I32);
15674         IRTemp argL = newTemp(Ity_I32);
15675         IRTemp argR = newTemp(Ity_I32);
15676         assign(argL, getIRegA(rN));
15677         assign(argR, getIRegA(rM));
15678         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
15679         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
15680         DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
15681         goto decode_success;
15682      }
15683    }
15684
15685   // UDIV
15686   if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
15687       && INSN(15,12) == BITS4(1,1,1,1)
15688       && INSN(7,4) == BITS4(0,0,0,1)) {
15689      UInt rD = INSN(19,16);
15690      UInt rM = INSN(11,8);
15691      UInt rN = INSN(3,0);
15692      if (rD == 15 || rM == 15 || rN == 15) {
15693         /* Unpredictable; don't decode; fall through */
15694      } else {
15695         IRTemp res  = newTemp(Ity_I32);
15696         IRTemp argL = newTemp(Ity_I32);
15697         IRTemp argR = newTemp(Ity_I32);
15698         assign(argL, getIRegA(rN));
15699         assign(argR, getIRegA(rM));
15700         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
15701         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
15702         DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
15703         goto decode_success;
15704      }
15705   }
15706
15707   // MLA, MLS
15708   if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
15709       && INSN(7,4) == BITS4(1,0,0,1)) {
15710      UInt bitS  = (insn >> 20) & 1; /* 20:20 */
15711      UInt isMLS = (insn >> 22) & 1; /* 22:22 */
15712      UInt rD = INSN(19,16);
15713      UInt rN = INSN(15,12);
15714      UInt rS = INSN(11,8);
15715      UInt rM = INSN(3,0);
15716      if (bitS == 1 && isMLS == 1) {
15717         /* This isn't allowed (MLS that sets flags).  don't decode;
15718            fall through */
15719      }
15720      else
15721      if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
15722         /* Unpredictable; don't decode; fall through */
15723      } else {
15724         IRTemp argL = newTemp(Ity_I32);
15725         IRTemp argR = newTemp(Ity_I32);
15726         IRTemp argP = newTemp(Ity_I32);
15727         IRTemp res  = newTemp(Ity_I32);
15728         IRTemp oldC = IRTemp_INVALID;
15729         IRTemp oldV = IRTemp_INVALID;
15730         assign( argL, getIRegA(rM));
15731         assign( argR, getIRegA(rS));
15732         assign( argP, getIRegA(rN));
15733         assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
15734                            mkexpr(argP),
15735                            binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
15736         if (bitS) {
15737            vassert(!isMLS); // guaranteed above
15738            oldC = newTemp(Ity_I32);
15739            assign(oldC, mk_armg_calculate_flag_c());
15740            oldV = newTemp(Ity_I32);
15741            assign(oldV, mk_armg_calculate_flag_v());
15742         }
15743         // now update guest state
15744         putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
15745         if (bitS) {
15746            IRTemp pair = newTemp(Ity_I32);
15747            assign( pair, binop(Iop_Or32,
15748                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15749                                mkexpr(oldV)) );
15750            setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
15751         }
15752         DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
15753             isMLS ? 's' : 'a', bitS ? 's' : ' ',
15754             nCC(INSN_COND), rD, rM, rS, rN);
15755         goto decode_success;
15756      }
15757      /* fall through */
15758   }
15759
15760   // SMULL, UMULL
15761   if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
15762       && INSN(7,4) == BITS4(1,0,0,1)) {
15763      UInt bitS = (insn >> 20) & 1; /* 20:20 */
15764      UInt rDhi = INSN(19,16);
15765      UInt rDlo = INSN(15,12);
15766      UInt rS   = INSN(11,8);
15767      UInt rM   = INSN(3,0);
15768      UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
15769      if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
15770         /* Unpredictable; don't decode; fall through */
15771      } else {
15772         IRTemp argL  = newTemp(Ity_I32);
15773         IRTemp argR  = newTemp(Ity_I32);
15774         IRTemp res   = newTemp(Ity_I64);
15775         IRTemp resHi = newTemp(Ity_I32);
15776         IRTemp resLo = newTemp(Ity_I32);
15777         IRTemp oldC  = IRTemp_INVALID;
15778         IRTemp oldV  = IRTemp_INVALID;
15779         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
15780         assign( argL, getIRegA(rM));
15781         assign( argR, getIRegA(rS));
15782         assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
15783         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
15784         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
15785         if (bitS) {
15786            oldC = newTemp(Ity_I32);
15787            assign(oldC, mk_armg_calculate_flag_c());
15788            oldV = newTemp(Ity_I32);
15789            assign(oldV, mk_armg_calculate_flag_v());
15790         }
15791         // now update guest state
15792         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
15793         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
15794         if (bitS) {
15795            IRTemp pair = newTemp(Ity_I32);
15796            assign( pair, binop(Iop_Or32,
15797                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15798                                mkexpr(oldV)) );
15799            setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
15800         }
15801         DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
15802             isS ? 's' : 'u', bitS ? 's' : ' ',
15803             nCC(INSN_COND), rDlo, rDhi, rM, rS);
15804         goto decode_success;
15805      }
15806      /* fall through */
15807   }
15808
15809   // SMLAL, UMLAL
15810   if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
15811       && INSN(7,4) == BITS4(1,0,0,1)) {
15812      UInt bitS = (insn >> 20) & 1; /* 20:20 */
15813      UInt rDhi = INSN(19,16);
15814      UInt rDlo = INSN(15,12);
15815      UInt rS   = INSN(11,8);
15816      UInt rM   = INSN(3,0);
15817      UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
15818      if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
15819         /* Unpredictable; don't decode; fall through */
15820      } else {
15821         IRTemp argL  = newTemp(Ity_I32);
15822         IRTemp argR  = newTemp(Ity_I32);
15823         IRTemp old   = newTemp(Ity_I64);
15824         IRTemp res   = newTemp(Ity_I64);
15825         IRTemp resHi = newTemp(Ity_I32);
15826         IRTemp resLo = newTemp(Ity_I32);
15827         IRTemp oldC  = IRTemp_INVALID;
15828         IRTemp oldV  = IRTemp_INVALID;
15829         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
15830         assign( argL, getIRegA(rM));
15831         assign( argR, getIRegA(rS));
15832         assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
15833         assign( res, binop(Iop_Add64,
15834                            mkexpr(old),
15835                            binop(mulOp, mkexpr(argL), mkexpr(argR))) );
15836         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
15837         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
15838         if (bitS) {
15839            oldC = newTemp(Ity_I32);
15840            assign(oldC, mk_armg_calculate_flag_c());
15841            oldV = newTemp(Ity_I32);
15842            assign(oldV, mk_armg_calculate_flag_v());
15843         }
15844         // now update guest state
15845         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
15846         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
15847         if (bitS) {
15848            IRTemp pair = newTemp(Ity_I32);
15849            assign( pair, binop(Iop_Or32,
15850                                binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
15851                                mkexpr(oldV)) );
15852            setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
15853         }
15854         DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
15855             isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
15856             rDlo, rDhi, rM, rS);
15857         goto decode_success;
15858      }
15859      /* fall through */
15860   }
15861
15862   // UMAAL
15863   if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
15864      UInt rDhi = INSN(19,16);
15865      UInt rDlo = INSN(15,12);
15866      UInt rM   = INSN(11,8);
15867      UInt rN   = INSN(3,0);
15868      if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
15869         /* Unpredictable; don't decode; fall through */
15870      } else {
15871         IRTemp argN   = newTemp(Ity_I32);
15872         IRTemp argM   = newTemp(Ity_I32);
15873         IRTemp argDhi = newTemp(Ity_I32);
15874         IRTemp argDlo = newTemp(Ity_I32);
15875         IRTemp res    = newTemp(Ity_I64);
15876         IRTemp resHi  = newTemp(Ity_I32);
15877         IRTemp resLo  = newTemp(Ity_I32);
15878         assign( argN,   getIRegA(rN) );
15879         assign( argM,   getIRegA(rM) );
15880         assign( argDhi, getIRegA(rDhi) );
15881         assign( argDlo, getIRegA(rDlo) );
15882         assign( res,
15883                 binop(Iop_Add64,
15884                       binop(Iop_Add64,
15885                             binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
15886                             unop(Iop_32Uto64, mkexpr(argDhi))),
15887                       unop(Iop_32Uto64, mkexpr(argDlo))) );
15888         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
15889         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
15890         // now update guest state
15891         putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
15892         putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
15893         DIP("umaal %s r%u, r%u, r%u, r%u\n",
15894             nCC(INSN_COND), rDlo, rDhi, rN, rM);
15895         goto decode_success;
15896      }
15897      /* fall through */
15898   }
15899
15900   /* --------------------- Msr etc --------------------- */
15901
15902   // MSR apsr, #imm
15903   if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
15904       && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
15905      UInt write_ge    = INSN(18,18);
15906      UInt write_nzcvq = INSN(19,19);
15907      if (write_nzcvq || write_ge) {
15908         UInt   imm = (INSN(11,0) >> 0) & 0xFF;
15909         UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
15910         IRTemp immT = newTemp(Ity_I32);
15911         vassert(rot <= 30);
15912         imm = ROR32(imm, rot);
15913         assign(immT, mkU32(imm));
15914         desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
15915         DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
15916             write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
15917         goto decode_success;
15918      }
15919      /* fall through */
15920   }
15921
15922   // MSR apsr, reg
15923   if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
15924       && INSN(17,12) == BITS6(0,0,1,1,1,1)
15925       && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
15926      UInt rN          = INSN(3,0);
15927      UInt write_ge    = INSN(18,18);
15928      UInt write_nzcvq = INSN(19,19);
15929      if (rN != 15 && (write_nzcvq || write_ge)) {
15930         IRTemp rNt = newTemp(Ity_I32);
15931         assign(rNt, getIRegA(rN));
15932         desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
15933         DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
15934             write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
15935         goto decode_success;
15936      }
15937      /* fall through */
15938   }
15939
15940   // MRS rD, cpsr
15941   if ((insn & 0x0FFF0FFF) == 0x010F0000) {
15942      UInt rD   = INSN(15,12);
15943      if (rD != 15) {
15944         IRTemp apsr = synthesise_APSR();
15945         putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
15946         DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
15947         goto decode_success;
15948      }
15949      /* fall through */
15950   }
15951
15952   /* --------------------- Svc --------------------- */
15953   if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
15954      UInt imm24 = (insn >> 0) & 0xFFFFFF;
15955      if (imm24 == 0) {
15956         /* A syscall.  We can't do this conditionally, hence: */
15957         if (condT != IRTemp_INVALID) {
15958            mk_skip_over_A32_if_cond_is_false( condT );
15959         }
15960         // AL after here
15961         llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
15962         dres.jk_StopHere = Ijk_Sys_syscall;
15963         dres.whatNext    = Dis_StopHere;
15964         DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
15965         goto decode_success;
15966      }
15967      /* fall through */
15968   }
15969
15970   /* ------------------------ swp ------------------------ */
15971
15972   // SWP, SWPB
15973   if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15974       && BITS4(0,0,0,0) == INSN(11,8)
15975       && BITS4(1,0,0,1) == INSN(7,4)) {
15976      UInt   rN   = INSN(19,16);
15977      UInt   rD   = INSN(15,12);
15978      UInt   rM   = INSN(3,0);
15979      IRTemp tRn  = newTemp(Ity_I32);
15980      IRTemp tNew = newTemp(Ity_I32);
15981      IRTemp tOld = IRTemp_INVALID;
15982      IRTemp tSC1 = newTemp(Ity_I1);
15983      UInt   isB  = (insn >> 22) & 1;
15984
15985      if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
15986         /* undecodable; fall through */
15987      } else {
15988         /* make unconditional */
15989         if (condT != IRTemp_INVALID) {
15990            mk_skip_over_A32_if_cond_is_false( condT );
15991            condT = IRTemp_INVALID;
15992         }
15993         /* Ok, now we're unconditional.  Generate a LL-SC loop. */
15994         assign(tRn, getIRegA(rN));
15995         assign(tNew, getIRegA(rM));
15996         if (isB) {
15997            /* swpb */
15998            tOld = newTemp(Ity_I8);
15999            stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
16000                              NULL/*=>isLL*/) );
16001            stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
16002                              unop(Iop_32to8, mkexpr(tNew))) );
16003         } else {
16004            /* swp */
16005            tOld = newTemp(Ity_I32);
16006            stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
16007                              NULL/*=>isLL*/) );
16008            stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
16009                              mkexpr(tNew)) );
16010         }
16011         stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
16012                           /*Ijk_NoRedir*/Ijk_Boring,
16013                           IRConst_U32(guest_R15_curr_instr_notENC),
16014                           OFFB_R15T ));
16015         putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
16016                      IRTemp_INVALID, Ijk_Boring);
16017         DIP("swp%s%s r%u, r%u, [r%u]\n",
16018             isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
16019         goto decode_success;
16020      }
16021      /* fall through */
16022   }
16023
16024   /* ----------------------------------------------------------- */
16025   /* -- ARMv6 instructions                                    -- */
16026   /* ----------------------------------------------------------- */
16027
16028   /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
16029
16030   // LDREXD, LDREX, LDREXH, LDREXB
16031   if (0x01900F9F == (insn & 0x0F900FFF)) {
16032      UInt   rT    = INSN(15,12);
16033      UInt   rN    = INSN(19,16);
16034      IRType ty    = Ity_INVALID;
16035      IROp   widen = Iop_INVALID;
16036      const HChar* nm = NULL;
16037      Bool   valid = True;
16038      switch (INSN(22,21)) {
16039         case 0: nm = "";  ty = Ity_I32; break;
16040         case 1: nm = "d"; ty = Ity_I64; break;
16041         case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
16042         case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
16043         default: vassert(0);
16044      }
16045      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
16046         if (rT == 15 || rN == 15)
16047            valid = False;
16048      } else {
16049         vassert(ty == Ity_I64);
16050         if ((rT & 1) == 1 || rT == 14 || rN == 15)
16051            valid = False;
16052      }
16053      if (valid) {
16054         IRTemp res;
16055         /* make unconditional */
16056         if (condT != IRTemp_INVALID) {
16057           mk_skip_over_A32_if_cond_is_false( condT );
16058           condT = IRTemp_INVALID;
16059         }
16060         /* Ok, now we're unconditional.  Do the load. */
16061         res = newTemp(ty);
16062         // FIXME: assumes little-endian guest
16063         stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
16064                           NULL/*this is a load*/) );
16065         if (ty == Ity_I64) {
16066            // FIXME: assumes little-endian guest
16067            putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
16068                           IRTemp_INVALID, Ijk_Boring);
16069            putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
16070                           IRTemp_INVALID, Ijk_Boring);
16071            DIP("ldrex%s%s r%u, r%u, [r%u]\n",
16072                nm, nCC(INSN_COND), rT+0, rT+1, rN);
16073         } else {
16074            putIRegA(rT, widen == Iop_INVALID
16075                            ? mkexpr(res) : unop(widen, mkexpr(res)),
16076                     IRTemp_INVALID, Ijk_Boring);
16077            DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
16078         }
16079         goto decode_success;
16080      }
16081      /* undecodable; fall through */
16082   }
16083
16084   // STREXD, STREX, STREXH, STREXB
16085   if (0x01800F90 == (insn & 0x0F900FF0)) {
16086      UInt   rT     = INSN(3,0);
16087      UInt   rN     = INSN(19,16);
16088      UInt   rD     = INSN(15,12);
16089      IRType ty     = Ity_INVALID;
16090      IROp   narrow = Iop_INVALID;
16091      const HChar* nm = NULL;
16092      Bool   valid  = True;
16093      switch (INSN(22,21)) {
16094         case 0: nm = "";  ty = Ity_I32; break;
16095         case 1: nm = "d"; ty = Ity_I64; break;
16096         case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
16097         case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
16098         default: vassert(0);
16099      }
16100      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
16101         if (rD == 15 || rN == 15 || rT == 15
16102             || rD == rN || rD == rT)
16103            valid = False;
16104      } else {
16105         vassert(ty == Ity_I64);
16106         if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
16107             || rD == rN || rD == rT || rD == rT+1)
16108            valid = False;
16109      }
16110      if (valid) {
16111         IRTemp resSC1, resSC32, data;
16112         /* make unconditional */
16113         if (condT != IRTemp_INVALID) {
16114            mk_skip_over_A32_if_cond_is_false( condT );
16115            condT = IRTemp_INVALID;
16116         }
16117         /* Ok, now we're unconditional.  Do the store. */
16118         data = newTemp(ty);
16119         assign(data,
16120                ty == Ity_I64
16121                   // FIXME: assumes little-endian guest
16122                   ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
16123                   : narrow == Iop_INVALID
16124                      ? getIRegA(rT)
16125                      : unop(narrow, getIRegA(rT)));
16126         resSC1 = newTemp(Ity_I1);
16127         // FIXME: assumes little-endian guest
16128         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
16129
16130         /* Set rD to 1 on failure, 0 on success.  Currently we have
16131            resSC1 == 0 on failure, 1 on success. */
16132         resSC32 = newTemp(Ity_I32);
16133         assign(resSC32,
16134                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
16135
16136         putIRegA(rD, mkexpr(resSC32),
16137                      IRTemp_INVALID, Ijk_Boring);
16138         if (ty == Ity_I64) {
16139            DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
16140                nm, nCC(INSN_COND), rD, rT, rT+1, rN);
16141         } else {
16142            DIP("strex%s%s r%u, r%u, [r%u]\n",
16143                nm, nCC(INSN_COND), rD, rT, rN);
16144         }
16145         goto decode_success;
16146      }
16147      /* fall through */
16148   }
16149
16150   /* --------------------- movw, movt --------------------- */
16151   if (0x03000000 == (insn & 0x0FF00000)
16152       || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
16153      UInt rD    = INSN(15,12);
16154      UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
16155      UInt isT   = (insn >> 22) & 1;
16156      if (rD == 15) {
16157         /* forget it */
16158      } else {
16159         if (isT) {
16160            putIRegA(rD,
16161                     binop(Iop_Or32,
16162                           binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
16163                           mkU32(imm16 << 16)),
16164                     condT, Ijk_Boring);
16165            DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
16166            goto decode_success;
16167         } else {
16168            putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
16169            DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
16170            goto decode_success;
16171         }
16172      }
16173      /* fall through */
16174   }
16175
16176   /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
16177   /* FIXME: this is an exact duplicate of the Thumb version.  They
16178      should be commoned up. */
16179   if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
16180       && BITS4(1,1,1,1) == INSN(19,16)
16181       && BITS4(0,1,1,1) == INSN(7,4)
16182       && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
16183      UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
16184      if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
16185         Int    rot  = (INSN(11,8) >> 2) & 3;
16186         UInt   rM   = INSN(3,0);
16187         UInt   rD   = INSN(15,12);
16188         IRTemp srcT = newTemp(Ity_I32);
16189         IRTemp rotT = newTemp(Ity_I32);
16190         IRTemp dstT = newTemp(Ity_I32);
16191         const HChar* nm = "???";
16192         assign(srcT, getIRegA(rM));
16193         assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
16194         switch (subopc) {
16195            case BITS4(0,1,1,0): // UXTB
16196               assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
16197               nm = "uxtb";
16198               break;
16199            case BITS4(0,0,1,0): // SXTB
16200               assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
16201               nm = "sxtb";
16202               break;
16203            case BITS4(0,1,1,1): // UXTH
16204               assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
16205               nm = "uxth";
16206               break;
16207            case BITS4(0,0,1,1): // SXTH
16208               assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
16209               nm = "sxth";
16210               break;
16211            case BITS4(0,1,0,0): // UXTB16
16212               assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
16213               nm = "uxtb16";
16214               break;
16215            case BITS4(0,0,0,0): { // SXTB16
16216               IRTemp lo32 = newTemp(Ity_I32);
16217               IRTemp hi32 = newTemp(Ity_I32);
16218               assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
16219               assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
16220               assign(
16221                  dstT,
16222                  binop(Iop_Or32,
16223                        binop(Iop_And32,
16224                              unop(Iop_8Sto32,
16225                                   unop(Iop_32to8, mkexpr(lo32))),
16226                              mkU32(0xFFFF)),
16227                        binop(Iop_Shl32,
16228                              unop(Iop_8Sto32,
16229                                   unop(Iop_32to8, mkexpr(hi32))),
16230                              mkU8(16))
16231               ));
16232               nm = "sxtb16";
16233               break;
16234            }
16235            default:
16236               vassert(0); // guarded by "if" above
16237         }
16238         putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
16239         DIP("%s%s r%u, r%u, ROR #%u\n", nm, nCC(INSN_COND), rD, rM, rot);
16240         goto decode_success;
16241      }
16242      /* fall through */
16243   }
16244
16245   /* ------------------- bfi, bfc ------------------- */
16246   if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
16247       && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
16248      UInt rD  = INSN(15,12);
16249      UInt rN  = INSN(3,0);
16250      UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
16251      UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
16252      if (rD == 15 || msb < lsb) {
16253         /* undecodable; fall through */
16254      } else {
16255         IRTemp src    = newTemp(Ity_I32);
16256         IRTemp olddst = newTemp(Ity_I32);
16257         IRTemp newdst = newTemp(Ity_I32);
16258         UInt   mask = 1 << (msb - lsb);
16259         mask = (mask - 1) + mask;
16260         vassert(mask != 0); // guaranteed by "msb < lsb" check above
16261         mask <<= lsb;
16262
16263         assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
16264         assign(olddst, getIRegA(rD));
16265         assign(newdst,
16266                binop(Iop_Or32,
16267                   binop(Iop_And32,
16268                         binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
16269                         mkU32(mask)),
16270                   binop(Iop_And32,
16271                         mkexpr(olddst),
16272                         mkU32(~mask)))
16273               );
16274
16275         putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
16276
16277         if (rN == 15) {
16278            DIP("bfc%s r%u, #%u, #%u\n",
16279                nCC(INSN_COND), rD, lsb, msb-lsb+1);
16280         } else {
16281            DIP("bfi%s r%u, r%u, #%u, #%u\n",
16282                nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
16283         }
16284         goto decode_success;
16285      }
16286      /* fall through */
16287   }
16288
16289   /* ------------------- {u,s}bfx ------------------- */
16290   if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
16291       && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
16292      UInt rD  = INSN(15,12);
16293      UInt rN  = INSN(3,0);
16294      UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
16295      UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
16296      UInt msb = lsb + wm1;
16297      UInt isU = (insn >> 22) & 1;    /* 22:22 */
16298      if (rD == 15 || rN == 15 || msb >= 32) {
16299         /* undecodable; fall through */
16300      } else {
16301         IRTemp src  = newTemp(Ity_I32);
16302         IRTemp tmp  = newTemp(Ity_I32);
16303         IRTemp res  = newTemp(Ity_I32);
16304         UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
16305         vassert(msb >= 0 && msb <= 31);
16306         vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
16307
16308         assign(src, getIRegA(rN));
16309         assign(tmp, binop(Iop_And32,
16310                           binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
16311                           mkU32(mask)));
16312         assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
16313                           binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
16314                           mkU8(31-wm1)));
16315
16316         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16317
16318         DIP("%s%s r%u, r%u, #%u, #%u\n",
16319             isU ? "ubfx" : "sbfx",
16320             nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
16321         goto decode_success;
16322      }
16323      /* fall through */
16324   }
16325
16326   /* --------------------- Load/store doubleword ------------- */
16327   // LDRD STRD
16328   /*                 31   27   23   19 15 11   7    3     # highest bit
16329                        28   24   20 16 12    8    4    0
16330      A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
16331      A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
16332      A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
16333      A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
16334      A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
16335      A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
16336   */
16337   /* case coding:
16338             1   at-ea               (access at ea)
16339             2   at-ea-then-upd      (access at ea, then Rn = ea)
16340             3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16341      ea coding
16342             16  Rn +/- imm8
16343             32  Rn +/- Rm
16344   */
16345   /* Quickly skip over all of this for hopefully most instructions */
16346   if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
16347      goto after_load_store_doubleword;
16348
16349   /* Check the "11S1" thing. */
16350   if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
16351      goto after_load_store_doubleword;
16352
16353   summary = 0;
16354
16355   /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
16356      summary = 1 | 16;
16357   }
16358   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
16359      summary = 1 | 32;
16360   }
16361   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
16362      summary = 2 | 16;
16363   }
16364   else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
16365      summary = 2 | 32;
16366   }
16367   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
16368      summary = 3 | 16;
16369   }
16370   else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
16371      summary = 3 | 32;
16372   }
16373   else goto after_load_store_doubleword;
16374
16375   { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
16376     UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
16377     UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
16378     UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
16379     UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
16380     UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
16381
16382     /* Require rD to be an even numbered register */
16383     if ((rD & 1) != 0)
16384        goto after_load_store_doubleword;
16385
16386     /* Require 11:8 == 0 for Rn +/- Rm cases */
16387     if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
16388        goto after_load_store_doubleword;
16389
16390     /* Skip some invalid cases, which would lead to two competing
16391        updates to the same register, or which are otherwise
16392        disallowed by the spec. */
16393     switch (summary) {
16394        case 1 | 16:
16395           break;
16396        case 1 | 32:
16397           if (rM == 15) goto after_load_store_doubleword;
16398           break;
16399        case 2 | 16: case 3 | 16:
16400           if (rN == 15) goto after_load_store_doubleword;
16401           if (bS == 0 && (rN == rD || rN == rD+1))
16402              goto after_load_store_doubleword;
16403           break;
16404        case 2 | 32: case 3 | 32:
16405           if (rM == 15) goto after_load_store_doubleword;
16406           if (rN == 15) goto after_load_store_doubleword;
16407           if (rN == rM) goto after_load_store_doubleword;
16408           if (bS == 0 && (rN == rD || rN == rD+1))
16409              goto after_load_store_doubleword;
16410           break;
16411        default:
16412           vassert(0);
16413     }
16414
16415     /* If this is a branch, make it unconditional at this point.
16416        Doing conditional branches in-line is too complex (for
16417        now). */
16418     vassert((rD & 1) == 0); /* from tests above */
16419     if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
16420        // go uncond
16421        mk_skip_over_A32_if_cond_is_false( condT );
16422        condT = IRTemp_INVALID;
16423        // now uncond
16424     }
16425
16426     /* compute the effective address.  Bind it to a tmp since we
16427        may need to use it twice. */
16428     IRExpr* eaE = NULL;
16429     switch (summary & 0xF0) {
16430        case 16:
16431           eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
16432           break;
16433        case 32:
16434           eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
16435           break;
16436     }
16437     vassert(eaE);
16438     IRTemp eaT = newTemp(Ity_I32);
16439     assign(eaT, eaE);
16440
16441     /* get the old Rn value */
16442     IRTemp rnT = newTemp(Ity_I32);
16443     assign(rnT, getIRegA(rN));
16444
16445     /* decide on the transfer address */
16446     IRTemp taT = IRTemp_INVALID;
16447     switch (summary & 0x0F) {
16448        case 1: case 2: taT = eaT; break;
16449        case 3:         taT = rnT; break;
16450     }
16451     vassert(taT != IRTemp_INVALID);
16452
16453     /* XXX deal with alignment constraints */
16454     /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
16455        ignore alignment issues for the time being. */
16456
16457     /* For almost all cases, we do the writeback after the transfers.
16458        However, that leaves the stack "uncovered" in this case:
16459           strd    rD, [sp, #-8]
16460        In which case, do the writeback to SP now, instead of later.
16461        This is bad in that it makes the insn non-restartable if the
16462        accesses fault, but at least keeps Memcheck happy. */
16463     Bool writeback_already_done = False;
16464     if (bS == 1 /*store*/ && summary == (2 | 16)
16465         && rN == 13 && rN != rD && rN != rD+1
16466         && bU == 0/*minus*/ && imm8 == 8) {
16467        putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16468        writeback_already_done = True;
16469     }
16470
16471     /* doubleword store  S 1
16472        doubleword load   S 0
16473     */
16474     const HChar* name = NULL;
16475     /* generate the transfers */
16476     if (bS == 1) { // doubleword store
16477        storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
16478                        getIRegA(rD+0), condT );
16479        storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
16480                        getIRegA(rD+1), condT );
16481        name = "strd";
16482     } else { // doubleword load
16483        IRTemp oldRd0 = newTemp(Ity_I32);
16484        IRTemp oldRd1 = newTemp(Ity_I32);
16485        assign(oldRd0, llGetIReg(rD+0));
16486        assign(oldRd1, llGetIReg(rD+1));
16487        IRTemp newRd0 = newTemp(Ity_I32);
16488        IRTemp newRd1 = newTemp(Ity_I32);
16489        loadGuardedLE( newRd0, ILGop_Ident32,
16490                       binop(Iop_Add32, mkexpr(taT), mkU32(0)),
16491                       mkexpr(oldRd0), condT );
16492        putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
16493        loadGuardedLE( newRd1, ILGop_Ident32,
16494                       binop(Iop_Add32, mkexpr(taT), mkU32(4)),
16495                       mkexpr(oldRd1), condT );
16496        putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
16497        name = "ldrd";
16498     }
16499
16500     /* Update Rn if necessary. */
16501     switch (summary & 0x0F) {
16502        case 2: case 3:
16503           // should be assured by logic above:
16504           vassert(rN != 15); /* from checks above */
16505           if (bS == 0) {
16506              vassert(rD+0 != rN); /* since we just wrote rD+0 */
16507              vassert(rD+1 != rN); /* since we just wrote rD+1 */
16508           }
16509           if (!writeback_already_done)
16510              putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16511           break;
16512     }
16513
16514     switch (summary & 0x0F) {
16515        case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
16516                 break;
16517        case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16518                     name, nCC(INSN_COND), rD, dis_buf);
16519                 break;
16520        case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16521                     name, nCC(INSN_COND), rD, dis_buf);
16522                 break;
16523        default: vassert(0);
16524     }
16525
16526     goto decode_success;
16527   }
16528
16529  after_load_store_doubleword:
16530
16531   /* ------------------- {s,u}xtab ------------- */
16532   if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
16533       && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
16534       && BITS4(0,1,1,1) == INSN(7,4)) {
16535      UInt rN  = INSN(19,16);
16536      UInt rD  = INSN(15,12);
16537      UInt rM  = INSN(3,0);
16538      UInt rot = (insn >> 10) & 3;
16539      UInt isU = INSN(22,22);
16540      if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
16541         /* undecodable; fall through */
16542      } else {
16543         IRTemp srcL = newTemp(Ity_I32);
16544         IRTemp srcR = newTemp(Ity_I32);
16545         IRTemp res  = newTemp(Ity_I32);
16546         assign(srcR, getIRegA(rM));
16547         assign(srcL, getIRegA(rN));
16548         assign(res,  binop(Iop_Add32,
16549                            mkexpr(srcL),
16550                            unop(isU ? Iop_8Uto32 : Iop_8Sto32,
16551                                 unop(Iop_32to8,
16552                                      genROR32(srcR, 8 * rot)))));
16553         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16554         DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
16555             isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
16556         goto decode_success;
16557      }
16558      /* fall through */
16559   }
16560
16561   /* ------------------- {s,u}xtah ------------- */
16562   if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
16563       && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
16564       && BITS4(0,1,1,1) == INSN(7,4)) {
16565      UInt rN  = INSN(19,16);
16566      UInt rD  = INSN(15,12);
16567      UInt rM  = INSN(3,0);
16568      UInt rot = (insn >> 10) & 3;
16569      UInt isU = INSN(22,22);
16570      if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
16571         /* undecodable; fall through */
16572      } else {
16573         IRTemp srcL = newTemp(Ity_I32);
16574         IRTemp srcR = newTemp(Ity_I32);
16575         IRTemp res  = newTemp(Ity_I32);
16576         assign(srcR, getIRegA(rM));
16577         assign(srcL, getIRegA(rN));
16578         assign(res,  binop(Iop_Add32,
16579                            mkexpr(srcL),
16580                            unop(isU ? Iop_16Uto32 : Iop_16Sto32,
16581                                 unop(Iop_32to16,
16582                                      genROR32(srcR, 8 * rot)))));
16583         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16584
16585         DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
16586             isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
16587         goto decode_success;
16588      }
16589      /* fall through */
16590   }
16591
16592   /* ------------------- rev16, rev ------------------ */
16593   if (INSN(27,16) == 0x6BF
16594       && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
16595      Bool isREV = INSN(11,4) == 0xF3;
16596      UInt rM    = INSN(3,0);
16597      UInt rD    = INSN(15,12);
16598      if (rM != 15 && rD != 15) {
16599         IRTemp rMt = newTemp(Ity_I32);
16600         assign(rMt, getIRegA(rM));
16601         IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
16602         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16603         DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
16604             nCC(INSN_COND), rD, rM);
16605         goto decode_success;
16606      }
16607   }
16608
16609   /* ------------------- revsh ----------------------- */
16610   if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
16611      UInt rM = INSN(3,0);
16612      UInt rD = INSN(15,12);
16613      if (rM != 15 && rD != 15) {
16614         IRTemp irt_rM  = newTemp(Ity_I32);
16615         IRTemp irt_hi  = newTemp(Ity_I32);
16616         IRTemp irt_low = newTemp(Ity_I32);
16617         IRTemp irt_res = newTemp(Ity_I32);
16618         assign(irt_rM, getIRegA(rM));
16619         assign(irt_hi,
16620                binop(Iop_Sar32,
16621                      binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
16622                      mkU8(16)
16623                )
16624         );
16625         assign(irt_low,
16626                binop(Iop_And32,
16627                      binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
16628                      mkU32(0xFF)
16629                )
16630         );
16631         assign(irt_res,
16632                binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
16633         );
16634         putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
16635         DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
16636         goto decode_success;
16637      }
16638   }
16639
16640   /* ------------------- rbit ------------------ */
16641   if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
16642      UInt rD = INSN(15,12);
16643      UInt rM = INSN(3,0);
16644      if (rD != 15 && rM != 15) {
16645         IRTemp arg = newTemp(Ity_I32);
16646         assign(arg, getIRegA(rM));
16647         IRTemp res = gen_BITREV(arg);
16648         putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
16649         DIP("rbit r%u, r%u\n", rD, rM);
16650         goto decode_success;
16651      }
16652   }
16653
16654   /* ------------------- smmul ------------------ */
16655   if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
16656       && INSN(15,12) == BITS4(1,1,1,1)
16657       && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
16658      UInt bitR = INSN(5,5);
16659      UInt rD = INSN(19,16);
16660      UInt rM = INSN(11,8);
16661      UInt rN = INSN(3,0);
16662      if (rD != 15 && rM != 15 && rN != 15) {
16663         IRExpr* res
16664         = unop(Iop_64HIto32,
16665                binop(Iop_Add64,
16666                      binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
16667                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
16668         putIRegA(rD, res, condT, Ijk_Boring);
16669         DIP("smmul%s%s r%u, r%u, r%u\n",
16670             nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
16671         goto decode_success;
16672      }
16673   }
16674
16675   /* ------------------- smmla ------------------ */
16676   if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
16677       && INSN(15,12) != BITS4(1,1,1,1)
16678       && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
16679      UInt bitR = INSN(5,5);
16680      UInt rD = INSN(19,16);
16681      UInt rA = INSN(15,12);
16682      UInt rM = INSN(11,8);
16683      UInt rN = INSN(3,0);
16684      if (rD != 15 && rM != 15 && rN != 15) {
16685         IRExpr* res
16686         = unop(Iop_64HIto32,
16687                binop(Iop_Add64,
16688                      binop(Iop_Add64,
16689                            binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
16690                            binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
16691                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
16692         putIRegA(rD, res, condT, Ijk_Boring);
16693         DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
16694             nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
16695         goto decode_success;
16696      }
16697   }
16698
16699   /* ------------------- smmla ------------------ */
16700   if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
16701       && INSN(15,12) != BITS4(1,1,1,1)
16702       && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
16703      UInt bitR = INSN(5,5);
16704      UInt rD = INSN(19,16);
16705      UInt rA = INSN(15,12);
16706      UInt rM = INSN(11,8);
16707      UInt rN = INSN(3,0);
16708      if (rD != 15 && rM != 15 && rN != 15) {
16709         IRExpr* res
16710         = unop(Iop_64HIto32,
16711                binop(Iop_Add64,
16712                      binop(Iop_Add64,
16713                            binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
16714                            binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
16715                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
16716         putIRegA(rD, res, condT, Ijk_Boring);
16717         DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
16718             nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
16719         goto decode_success;
16720      }
16721   }
16722
16723   /* ------------------- NOP ------------------ */
16724   if (0x0320F000 == (insn & 0x0FFFFFFF)) {
16725      DIP("nop%s\n", nCC(INSN_COND));
16726      goto decode_success;
16727   }
16728
16729   /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
16730   /* Load Register Unprivileged:
16731      ldrt<c> Rt, [Rn] {, #+/-imm12}
16732   */
16733   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
16734      UInt rT     = INSN(15,12);
16735      UInt rN     = INSN(19,16);
16736      UInt imm12  = INSN(11,0);
16737      UInt bU     = INSN(23,23);
16738      Bool valid  = True;
16739      if (rT == 15 || rN == 15 || rN == rT) valid = False;
16740      if (valid) {
16741         IRTemp newRt = newTemp(Ity_I32);
16742         loadGuardedLE( newRt,
16743                        ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
16744         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16745         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16746                             getIRegA(rN), mkU32(imm12));
16747         putIRegA(rN, erN, condT, Ijk_Boring);
16748         DIP("ldrt%s r%u, [r%u], #%c%u\n",
16749             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
16750         goto decode_success;
16751      }
16752   }
16753
16754   /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
16755   /* Load Register Unprivileged:
16756      ldrt<c> Rt, [Rn], +/-Rm{, shift}
16757   */
16758   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
16759        && INSN(4,4) == 0 ) {
16760      UInt rT     = INSN(15,12);
16761      UInt rN     = INSN(19,16);
16762      UInt rM     = INSN(3,0);
16763      UInt imm5   = INSN(11,7);
16764      UInt bU     = INSN(23,23);
16765      UInt type   = INSN(6,5);
16766      Bool valid  = True;
16767      if (rT == 15 || rN == 15 || rN == rT || rM == 15
16768          /* || (ArchVersion() < 6 && rM == rN) */)
16769         valid = False;
16770      if (valid) {
16771         IRTemp newRt = newTemp(Ity_I32);
16772         loadGuardedLE( newRt,
16773                        ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
16774         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16775         // dis_buf generated is slightly bogus, in fact.
16776         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
16777                                                       type, imm5, dis_buf);
16778         putIRegA(rN, erN, condT, Ijk_Boring);
16779         DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
16780         goto decode_success;
16781      }
16782   }
16783
16784   /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
16785   /* Load Register Byte Unprivileged:
16786      ldrbt<c> Rt, [Rn], #+/-imm12
16787   */
16788   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
16789      UInt rT     = INSN(15,12);
16790      UInt rN     = INSN(19,16);
16791      UInt imm12  = INSN(11,0);
16792      UInt bU     = INSN(23,23);
16793      Bool valid  = True;
16794      if (rT == 15 || rN == 15 || rN == rT) valid = False;
16795      if (valid) {
16796         IRTemp newRt = newTemp(Ity_I32);
16797         loadGuardedLE( newRt,
16798                        ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
16799         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16800         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16801                             getIRegA(rN), mkU32(imm12));
16802         putIRegA(rN, erN, condT, Ijk_Boring);
16803         DIP("ldrbt%s r%u, [r%u], #%c%u\n",
16804             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
16805         goto decode_success;
16806      }
16807   }
16808
16809   /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
16810   /* Load Register Byte Unprivileged:
16811      ldrbt<c> Rt, [Rn], +/-Rm{, shift}
16812   */
16813   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
16814        && INSN(4,4) == 0 ) {
16815      UInt rT     = INSN(15,12);
16816      UInt rN     = INSN(19,16);
16817      UInt rM     = INSN(3,0);
16818      UInt imm5   = INSN(11,7);
16819      UInt bU     = INSN(23,23);
16820      UInt type   = INSN(6,5);
16821      Bool valid  = True;
16822      if (rT == 15 || rN == 15 || rN == rT || rM == 15
16823          /* || (ArchVersion() < 6 && rM == rN) */)
16824         valid = False;
16825      if (valid) {
16826         IRTemp newRt = newTemp(Ity_I32);
16827         loadGuardedLE( newRt,
16828                        ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
16829         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16830         // dis_buf generated is slightly bogus, in fact.
16831         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
16832                                                       type, imm5, dis_buf);
16833         putIRegA(rN, erN, condT, Ijk_Boring);
16834         DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
16835         goto decode_success;
16836      }
16837   }
16838
16839   /* -------------- (A1) LDRHT reg+#imm8 -------------- */
16840   /* Load Register Halfword Unprivileged:
16841      ldrht<c> Rt, [Rn] {, #+/-imm8}
16842   */
16843   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
16844       && INSN(7,4) == BITS4(1,0,1,1) ) {
16845      UInt rT    = INSN(15,12);
16846      UInt rN    = INSN(19,16);
16847      UInt bU    = INSN(23,23);
16848      UInt imm4H = INSN(11,8);
16849      UInt imm4L = INSN(3,0);
16850      UInt imm8  = (imm4H << 4) | imm4L;
16851      Bool valid = True;
16852      if (rT == 15 || rN == 15 || rN == rT)
16853         valid = False;
16854      if (valid) {
16855         IRTemp newRt = newTemp(Ity_I32);
16856         loadGuardedLE( newRt,
16857                        ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
16858         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16859         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16860                             getIRegA(rN), mkU32(imm8));
16861         putIRegA(rN, erN, condT, Ijk_Boring);
16862         DIP("ldrht%s r%u, [r%u], #%c%u\n",
16863             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
16864         goto decode_success;
16865      }
16866   }
16867
16868   /* -------------- (A2) LDRHT reg+/-reg -------------- */
16869   /* Load Register Halfword Unprivileged:
16870      ldrht<c> Rt, [Rn], +/-Rm
16871   */
16872   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
16873       && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
16874      UInt rT    = INSN(15,12);
16875      UInt rN    = INSN(19,16);
16876      UInt rM    = INSN(3,0);
16877      UInt bU    = INSN(23,23);
16878      Bool valid = True;
16879      if (rT == 15 || rN == 15 || rN == rT || rM == 15)
16880         valid = False;
16881      if (valid) {
16882         IRTemp newRt = newTemp(Ity_I32);
16883         loadGuardedLE( newRt,
16884                        ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
16885         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16886         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16887                             getIRegA(rN), getIRegA(rM));
16888         putIRegA(rN, erN, condT, Ijk_Boring);
16889         DIP("ldrht%s r%u, [r%u], %cr%u\n",
16890             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
16891         goto decode_success;
16892      }
16893   }
16894
16895   /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
16896   /* Load Register Signed Halfword Unprivileged:
16897      ldrsht<c> Rt, [Rn] {, #+/-imm8}
16898   */
16899   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
16900       && INSN(7,4) == BITS4(1,1,1,1)) {
16901      UInt rT    = INSN(15,12);
16902      UInt rN    = INSN(19,16);
16903      UInt bU    = INSN(23,23);
16904      UInt imm4H = INSN(11,8);
16905      UInt imm4L = INSN(3,0);
16906      UInt imm8  = (imm4H << 4) | imm4L;
16907      Bool valid = True;
16908      if (rN == 15 || rT == 15 || rN == rT)
16909         valid = False;
16910      if (valid) {
16911         IRTemp newRt = newTemp(Ity_I32);
16912         loadGuardedLE( newRt,
16913                        ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
16914         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16915         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16916                             getIRegA(rN), mkU32(imm8));
16917         putIRegA(rN, erN, condT, Ijk_Boring);
16918         DIP("ldrsht%s r%u, [r%u], #%c%u\n",
16919             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
16920         goto decode_success;
16921      }
16922   }
16923
16924   /* -------------- (A2) LDRSHT reg+/-reg -------------- */
16925   /* Load Register Signed Halfword Unprivileged:
16926      ldrsht<c> Rt, [Rn], +/-Rm
16927   */
16928   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
16929       && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
16930      UInt rT    = INSN(15,12);
16931      UInt rN    = INSN(19,16);
16932      UInt rM    = INSN(3,0);
16933      UInt bU    = INSN(23,23);
16934      Bool valid = True;
16935      if (rN == 15 || rT == 15 || rN == rT || rM == 15)
16936         valid = False;
16937      if (valid) {
16938         IRTemp newRt = newTemp(Ity_I32);
16939         loadGuardedLE( newRt,
16940                        ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
16941         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16942         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16943                             getIRegA(rN), getIRegA(rM));
16944         putIRegA(rN, erN, condT, Ijk_Boring);
16945         DIP("ldrsht%s r%u, [r%u], %cr%u\n",
16946             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
16947         goto decode_success;
16948      }
16949   }
16950
16951   /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
16952   /* Load Register Signed Byte Unprivileged:
16953      ldrsbt<c> Rt, [Rn] {, #+/-imm8}
16954   */
16955   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
16956       && INSN(7,4) == BITS4(1,1,0,1)) {
16957      UInt rT    = INSN(15,12);
16958      UInt rN    = INSN(19,16);
16959      UInt bU    = INSN(23,23);
16960      UInt imm4H = INSN(11,8);
16961      UInt imm4L = INSN(3,0);
16962      UInt imm8  = (imm4H << 4) | imm4L;
16963      Bool valid = True;
16964      if (rT == 15 || rN == 15 || rN == rT)
16965         valid = False;
16966      if (valid) {
16967         IRTemp newRt = newTemp(Ity_I32);
16968         loadGuardedLE( newRt,
16969                        ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
16970         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16971         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16972                             getIRegA(rN), mkU32(imm8));
16973         putIRegA(rN, erN, condT, Ijk_Boring);
16974         DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
16975             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
16976         goto decode_success;
16977      }
16978   }
16979
16980   /* -------------- (A2) LDRSBT reg+/-reg -------------- */
16981   /* Load Register Signed Byte Unprivileged:
16982      ldrsbt<c> Rt, [Rn], +/-Rm
16983   */
16984   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
16985       && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
16986      UInt rT    = INSN(15,12);
16987      UInt rN    = INSN(19,16);
16988      UInt bU    = INSN(23,23);
16989      UInt rM    = INSN(3,0);
16990      Bool valid = True;
16991      if (rT == 15 || rN == 15 || rN == rT || rM == 15)
16992         valid = False;
16993      if (valid) {
16994         IRTemp newRt = newTemp(Ity_I32);
16995         loadGuardedLE( newRt,
16996                        ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
16997         putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
16998         IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
16999                             getIRegA(rN), getIRegA(rM));
17000         putIRegA(rN, erN, condT, Ijk_Boring);
17001         DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
17002             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
17003         goto decode_success;
17004      }
17005   }
17006
17007   /* -------------- (A1) STRBT reg+#imm12 -------------- */
17008   /* Store Register Byte Unprivileged:
17009      strbt<c> Rt, [Rn], #+/-imm12
17010   */
17011   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
17012      UInt rT     = INSN(15,12);
17013      UInt rN     = INSN(19,16);
17014      UInt imm12  = INSN(11,0);
17015      UInt bU     = INSN(23,23);
17016      Bool valid = True;
17017      if (rT == 15 || rN == 15 || rN == rT) valid = False;
17018      if (valid) {
17019         IRExpr* address = getIRegA(rN);
17020         IRExpr* data = unop(Iop_32to8, getIRegA(rT));
17021         storeGuardedLE( address, data, condT);
17022         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17023                               getIRegA(rN), mkU32(imm12));
17024         putIRegA(rN, newRn, condT, Ijk_Boring);
17025         DIP("strbt%s r%u, [r%u], #%c%u\n",
17026             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
17027         goto decode_success;
17028      }
17029   }
17030
17031   /* -------------- (A2) STRBT reg+/-reg -------------- */
17032   /* Store Register Byte Unprivileged:
17033      strbt<c> Rt, [Rn], +/-Rm{, shift}
17034   */
17035   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
17036       && INSN(4,4) == 0) {
17037      UInt rT     = INSN(15,12);
17038      UInt rN     = INSN(19,16);
17039      UInt imm5   = INSN(11,7);
17040      UInt type   = INSN(6,5);
17041      UInt rM     = INSN(3,0);
17042      UInt bU     = INSN(23,23);
17043      Bool valid  = True;
17044      if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
17045      if (valid) {
17046         IRExpr* address = getIRegA(rN);
17047         IRExpr* data = unop(Iop_32to8, getIRegA(rT));
17048         storeGuardedLE( address, data, condT);
17049         // dis_buf generated is slightly bogus, in fact.
17050         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
17051                                                       type, imm5, dis_buf);
17052         putIRegA(rN, erN, condT, Ijk_Boring);
17053         DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
17054         goto decode_success;
17055      }
17056   }
17057
17058   /* -------------- (A1) STRHT reg+#imm8 -------------- */
17059   /* Store Register Halfword Unprivileged:
17060      strht<c> Rt, [Rn], #+/-imm8
17061   */
17062   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
17063       && INSN(7,4) == BITS4(1,0,1,1) ) {
17064      UInt rT    = INSN(15,12);
17065      UInt rN    = INSN(19,16);
17066      UInt imm4H = INSN(11,8);
17067      UInt imm4L = INSN(3,0);
17068      UInt imm8  = (imm4H << 4) | imm4L;
17069      UInt bU    = INSN(23,23);
17070      Bool valid = True;
17071      if (rT == 15 || rN == 15 || rN == rT) valid = False;
17072      if (valid) {
17073         IRExpr* address = getIRegA(rN);
17074         IRExpr* data = unop(Iop_32to16, getIRegA(rT));
17075         storeGuardedLE( address, data, condT);
17076         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17077                               getIRegA(rN), mkU32(imm8));
17078         putIRegA(rN, newRn, condT, Ijk_Boring);
17079         DIP("strht%s r%u, [r%u], #%c%u\n",
17080             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
17081         goto decode_success;
17082      }
17083   }
17084
17085   /* -------------- (A2) STRHT reg+reg -------------- */
17086   /* Store Register Halfword Unprivileged:
17087      strht<c> Rt, [Rn], +/-Rm
17088   */
17089   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
17090       && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
17091      UInt rT    = INSN(15,12);
17092      UInt rN    = INSN(19,16);
17093      UInt rM    = INSN(3,0);
17094      UInt bU    = INSN(23,23);
17095      Bool valid = True;
17096      if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
17097      if (valid) {
17098         IRExpr* address = getIRegA(rN);
17099         IRExpr* data = unop(Iop_32to16, getIRegA(rT));
17100         storeGuardedLE( address, data, condT);
17101         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17102                               getIRegA(rN), getIRegA(rM));
17103         putIRegA(rN, newRn, condT, Ijk_Boring);
17104         DIP("strht%s r%u, [r%u], %cr%u\n",
17105             nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
17106         goto decode_success;
17107      }
17108   }
17109
17110   /* -------------- (A1) STRT reg+imm12 -------------- */
17111   /* Store Register Unprivileged:
17112      strt<c> Rt, [Rn], #+/-imm12
17113   */
17114   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
17115      UInt rT    = INSN(15,12);
17116      UInt rN    = INSN(19,16);
17117      UInt imm12 = INSN(11,0);
17118      UInt bU    = INSN(23,23);
17119      Bool valid = True;
17120      if (rN == 15 || rN == rT) valid = False;
17121      if (valid) {
17122         IRExpr* address = getIRegA(rN);
17123         storeGuardedLE( address, getIRegA(rT), condT);
17124         IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
17125                               getIRegA(rN), mkU32(imm12));
17126         putIRegA(rN, newRn, condT, Ijk_Boring);
17127         DIP("strt%s r%u, [r%u], %c%u\n",
17128             nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
17129         goto decode_success;
17130      }
17131   }
17132
17133   /* -------------- (A2) STRT reg+reg -------------- */
17134   /* Store Register Unprivileged:
17135      strt<c> Rt, [Rn], +/-Rm{, shift}
17136   */
17137   if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
17138       && INSN(4,4) == 0 ) {
17139      UInt rT    = INSN(15,12);
17140      UInt rN    = INSN(19,16);
17141      UInt rM    = INSN(3,0);
17142      UInt type  = INSN(6,5);
17143      UInt imm5  = INSN(11,7);
17144      UInt bU    = INSN(23,23);
17145      Bool valid = True;
17146      if (rN == 15 || rN == rT || rM == 15) valid = False;
17147      /* FIXME We didn't do:
17148         if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
17149      if (valid) {
17150         storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
17151         // dis_buf generated is slightly bogus, in fact.
17152         IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
17153                                                       type, imm5, dis_buf);
17154         putIRegA(rN, erN, condT, Ijk_Boring);
17155         DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
17156         goto decode_success;
17157      }
17158   }
17159
17160   /* ----------------------------------------------------------- */
17161   /* -- ARMv7 instructions                                    -- */
17162   /* ----------------------------------------------------------- */
17163
17164   /* -------------- read CP15 TPIDRURO register ------------- */
17165   /* mrc     p15, 0, r0, c13, c0, 3  up to
17166      mrc     p15, 0, r14, c13, c0, 3
17167   */
17168   /* I don't know whether this is really v7-only.  But anyway, we
17169      have to support it since arm-linux uses TPIDRURO as a thread
17170      state register. */
17171   if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
17172      UInt rD = INSN(15,12);
17173      if (rD <= 14) {
17174         /* skip r15, that's too stupid to handle */
17175         putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
17176                      condT, Ijk_Boring);
17177         DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
17178         goto decode_success;
17179      }
17180      /* fall through */
17181   }
17182
17183   /* Handle various kinds of barriers.  This is rather indiscriminate
17184      in the sense that they are all turned into an IR Fence, which
17185      means we don't know which they are, so the back end has to
17186      re-emit them all when it comes acrosss an IR Fence.
17187   */
17188   /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
17189   if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
17190      UInt rT = INSN(15,12);
17191      if (rT <= 14) {
17192         /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
17193            Memory Barrier -- ensures ordering of memory accesses. */
17194         stmt( IRStmt_MBE(Imbe_Fence) );
17195         DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
17196         goto decode_success;
17197      }
17198      /* fall through */
17199   }
17200   /* other flavours of barrier */
17201   switch (insn) {
17202      case 0xEE070F9A: /* v6 */
17203         /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
17204            Synch Barrier -- ensures completion of memory accesses. */
17205         stmt( IRStmt_MBE(Imbe_Fence) );
17206         DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
17207         goto decode_success;
17208      case 0xEE070F95: /* v6 */
17209         /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
17210            Instruction Synchronisation Barrier (or Flush Prefetch
17211            Buffer) -- a pipe flush, I think.  I suspect we could
17212            ignore those, but to be on the safe side emit a fence
17213            anyway. */
17214         stmt( IRStmt_MBE(Imbe_Fence) );
17215         DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
17216         goto decode_success;
17217      default:
17218         break;
17219   }
17220
17221   /* ----------------------------------------------------------- */
17222   /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
17223   /* ----------------------------------------------------------- */
17224
17225   if (INSN_COND != ARMCondNV) {
17226      Bool ok_vfp = decode_CP10_CP11_instruction (
17227                       &dres, INSN(27,0), condT, INSN_COND,
17228                       False/*!isT*/
17229                    );
17230      if (ok_vfp)
17231         goto decode_success;
17232   }
17233
17234   /* ----------------------------------------------------------- */
17235   /* -- NEON instructions (in ARM mode)                       -- */
17236   /* ----------------------------------------------------------- */
17237
17238   /* These are all in NV space, and so are taken care of (far) above,
17239      by a call from this function to decode_NV_instruction(). */
17240
17241   /* ----------------------------------------------------------- */
17242   /* -- v6 media instructions (in ARM mode)                   -- */
17243   /* ----------------------------------------------------------- */
17244
17245   { Bool ok_v6m = decode_V6MEDIA_instruction(
17246                       &dres, INSN(27,0), condT, INSN_COND,
17247                       False/*!isT*/
17248                   );
17249     if (ok_v6m)
17250        goto decode_success;
17251   }
17252
17253   /* ----------------------------------------------------------- */
17254   /* -- Undecodable                                           -- */
17255   /* ----------------------------------------------------------- */
17256
17257   goto decode_failure;
17258   /*NOTREACHED*/
17259
17260  decode_failure:
17261   /* All decode failures end up here. */
17262   if (sigill_diag) {
17263      vex_printf("disInstr(arm): unhandled instruction: "
17264                 "0x%x\n", insn);
17265      vex_printf("                 cond=%d(0x%x) 27:20=%u(0x%02x) "
17266                                   "4:4=%d "
17267                                   "3:0=%u(0x%x)\n",
17268                 (Int)INSN_COND, (UInt)INSN_COND,
17269                 (Int)INSN(27,20), (UInt)INSN(27,20),
17270                 (Int)INSN(4,4),
17271                 (Int)INSN(3,0), (UInt)INSN(3,0) );
17272   }
17273
17274   /* Tell the dispatcher that this insn cannot be decoded, and so has
17275      not been executed, and (is currently) the next to be executed.
17276      R15 should be up-to-date since it made so at the start of each
17277      insn, but nevertheless be paranoid and update it again right
17278      now. */
17279   vassert(0 == (guest_R15_curr_instr_notENC & 3));
17280   llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
17281   dres.whatNext    = Dis_StopHere;
17282   dres.jk_StopHere = Ijk_NoDecode;
17283   dres.len         = 0;
17284   return dres;
17285
17286  decode_success:
17287   /* All decode successes end up here. */
17288   DIP("\n");
17289
17290   vassert(dres.len == 4 || dres.len == 20);
17291
17292   /* Now then.  Do we have an implicit jump to r15 to deal with? */
17293   if (r15written) {
17294      /* If we get jump to deal with, we assume that there's been no
17295         other competing branch stuff previously generated for this
17296         insn.  That's reasonable, in the sense that the ARM insn set
17297         appears to declare as "Unpredictable" any instruction which
17298         generates more than one possible new value for r15.  Hence
17299         just assert.  The decoders themselves should check against
17300         all such instructions which are thusly Unpredictable, and
17301         decline to decode them.  Hence we should never get here if we
17302         have competing new values for r15, and hence it is safe to
17303         assert here. */
17304      vassert(dres.whatNext == Dis_Continue);
17305      vassert(irsb->next == NULL);
17306      vassert(irsb->jumpkind == Ijk_Boring);
17307      /* If r15 is unconditionally written, terminate the block by
17308         jumping to it.  If it's conditionally written, still
17309         terminate the block (a shame, but we can't do side exits to
17310         arbitrary destinations), but first jump to the next
17311         instruction if the condition doesn't hold. */
17312      /* We can't use getIReg(15) to get the destination, since that
17313         will produce r15+8, which isn't what we want.  Must use
17314         llGetIReg(15) instead. */
17315      if (r15guard == IRTemp_INVALID) {
17316         /* unconditional */
17317      } else {
17318         /* conditional */
17319         stmt( IRStmt_Exit(
17320                  unop(Iop_32to1,
17321                       binop(Iop_Xor32,
17322                             mkexpr(r15guard), mkU32(1))),
17323                  r15kind,
17324                  IRConst_U32(guest_R15_curr_instr_notENC + 4),
17325                  OFFB_R15T
17326         ));
17327      }
17328      /* This seems crazy, but we're required to finish the insn with
17329         a write to the guest PC.  As usual we rely on ir_opt to tidy
17330         up later. */
17331      llPutIReg(15, llGetIReg(15));
17332      dres.whatNext    = Dis_StopHere;
17333      dres.jk_StopHere = r15kind;
17334   } else {
17335      /* Set up the end-state in the normal way. */
17336      switch (dres.whatNext) {
17337         case Dis_Continue:
17338            llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
17339            break;
17340         case Dis_ResteerU:
17341         case Dis_ResteerC:
17342            llPutIReg(15, mkU32(dres.continueAt));
17343            break;
17344         case Dis_StopHere:
17345            break;
17346         default:
17347            vassert(0);
17348      }
17349   }
17350
17351   return dres;
17352
17353#  undef INSN_COND
17354#  undef INSN
17355}
17356
17357
17358/*------------------------------------------------------------*/
17359/*--- Disassemble a single Thumb2 instruction              ---*/
17360/*------------------------------------------------------------*/
17361
17362static const UChar it_length_table[256]; /* fwds */
17363
17364/* NB: in Thumb mode we do fetches of regs with getIRegT, which
17365   automagically adds 4 to fetches of r15.  However, writes to regs
17366   are done with putIRegT, which disallows writes to r15.  Hence any
17367   r15 writes and associated jumps have to be done "by hand". */
17368
17369/* Disassemble a single Thumb instruction into IR.  The instruction is
17370   located in host memory at guest_instr, and has (decoded) guest IP
17371   of guest_R15_curr_instr_notENC, which will have been set before the
17372   call here. */
17373
17374static
17375DisResult disInstr_THUMB_WRK (
17376             Bool         (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
17377             Bool         resteerCisOk,
17378             void*        callback_opaque,
17379             UChar*       guest_instr,
17380             VexArchInfo* archinfo,
17381             VexAbiInfo*  abiinfo,
17382             Bool         sigill_diag
17383          )
17384{
17385   /* A macro to fish bits out of insn0.  There's also INSN1, to fish
17386      bits out of insn1, but that's defined only after the end of the
17387      16-bit insn decoder, so as to stop it mistakenly being used
17388      therein. */
17389#  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
17390
17391   DisResult dres;
17392   UShort    insn0; /*  first 16 bits of the insn */
17393   UShort    insn1; /* second 16 bits of the insn */
17394   //Bool      allow_VFP = False;
17395   //UInt      hwcaps = archinfo->hwcaps;
17396   HChar     dis_buf[128];  // big enough to hold LDMIA etc text
17397
17398   /* Summary result of the ITxxx backwards analysis: False == safe
17399      but suboptimal. */
17400   Bool guaranteedUnconditional = False;
17401
17402   /* What insn variants are we supporting today? */
17403   //allow_VFP  = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
17404   // etc etc
17405
17406   /* Set result defaults. */
17407   dres.whatNext    = Dis_Continue;
17408   dres.len         = 2;
17409   dres.continueAt  = 0;
17410   dres.jk_StopHere = Ijk_INVALID;
17411
17412   /* Set default actions for post-insn handling of writes to r15, if
17413      required. */
17414   r15written = False;
17415   r15guard   = IRTemp_INVALID; /* unconditional */
17416   r15kind    = Ijk_Boring;
17417
17418   /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
17419      this point.  If we need the second 16, get them later.  We can't
17420      get them both out immediately because it risks a fault (very
17421      unlikely, but ..) if the second 16 bits aren't actually
17422      necessary. */
17423   insn0 = getUShortLittleEndianly( guest_instr );
17424   insn1 = 0; /* We'll get it later, once we know we need it. */
17425
17426   /* Similarly, will set this later. */
17427   IRTemp old_itstate = IRTemp_INVALID;
17428
17429   if (0) vex_printf("insn: 0x%x\n", insn0);
17430
17431   DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
17432
17433   vassert(0 == (guest_R15_curr_instr_notENC & 1));
17434
17435   /* ----------------------------------------------------------- */
17436   /* Spot "Special" instructions (see comment at top of file). */
17437   {
17438      UChar* code = (UChar*)guest_instr;
17439      /* Spot the 16-byte preamble:
17440
17441         ea4f 0cfc  mov.w   ip, ip, ror #3
17442         ea4f 3c7c  mov.w   ip, ip, ror #13
17443         ea4f 7c7c  mov.w   ip, ip, ror #29
17444         ea4f 4cfc  mov.w   ip, ip, ror #19
17445      */
17446      UInt word1 = 0x0CFCEA4F;
17447      UInt word2 = 0x3C7CEA4F;
17448      UInt word3 = 0x7C7CEA4F;
17449      UInt word4 = 0x4CFCEA4F;
17450      if (getUIntLittleEndianly(code+ 0) == word1 &&
17451          getUIntLittleEndianly(code+ 4) == word2 &&
17452          getUIntLittleEndianly(code+ 8) == word3 &&
17453          getUIntLittleEndianly(code+12) == word4) {
17454         /* Got a "Special" instruction preamble.  Which one is it? */
17455         // 0x 0A 0A EA 4A
17456         if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
17457                                               /* orr.w r10,r10,r10 */) {
17458            /* R3 = client_request ( R4 ) */
17459            DIP("r3 = client_request ( %%r4 )\n");
17460            llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
17461            dres.jk_StopHere = Ijk_ClientReq;
17462            dres.whatNext    = Dis_StopHere;
17463            goto decode_success;
17464         }
17465         else
17466         // 0x 0B 0B EA 4B
17467         if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
17468                                               /* orr r11,r11,r11 */) {
17469            /* R3 = guest_NRADDR */
17470            DIP("r3 = guest_NRADDR\n");
17471            dres.len = 20;
17472            llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
17473            goto decode_success;
17474         }
17475         else
17476         // 0x 0C 0C EA 4C
17477         if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
17478                                               /* orr r12,r12,r12 */) {
17479            /*  branch-and-link-to-noredir R4 */
17480            DIP("branch-and-link-to-noredir r4\n");
17481            llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
17482            llPutIReg(15, getIRegT(4));
17483            dres.jk_StopHere = Ijk_NoRedir;
17484            dres.whatNext    = Dis_StopHere;
17485            goto decode_success;
17486         }
17487         else
17488         // 0x 09 09 EA 49
17489         if (getUIntLittleEndianly(code+16) == 0x0909EA49
17490                                               /* orr r9,r9,r9 */) {
17491            /* IR injection */
17492            DIP("IR injection\n");
17493            vex_inject_ir(irsb, Iend_LE);
17494            // Invalidate the current insn. The reason is that the IRop we're
17495            // injecting here can change. In which case the translation has to
17496            // be redone. For ease of handling, we simply invalidate all the
17497            // time.
17498            stmt(IRStmt_Put(OFFB_TISTART, mkU32(guest_R15_curr_instr_notENC)));
17499            stmt(IRStmt_Put(OFFB_TILEN,   mkU32(20)));
17500            llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
17501            dres.whatNext    = Dis_StopHere;
17502            dres.jk_StopHere = Ijk_TInval;
17503            goto decode_success;
17504         }
17505         /* We don't know what it is.  Set insn0 so decode_failure
17506            can print the insn following the Special-insn preamble. */
17507         insn0 = getUShortLittleEndianly(code+16);
17508         goto decode_failure;
17509         /*NOTREACHED*/
17510      }
17511
17512   }
17513
17514   /* ----------------------------------------------------------- */
17515
17516   /* Main Thumb instruction decoder starts here.  It's a series of
17517      switches which examine ever longer bit sequences at the MSB of
17518      the instruction word, first for 16-bit insns, then for 32-bit
17519      insns. */
17520
17521   /* --- BEGIN ITxxx optimisation analysis --- */
17522   /* This is a crucial optimisation for the ITState boilerplate that
17523      follows.  Examine the 9 halfwords preceding this instruction,
17524      and if we are absolutely sure that none of them constitute an
17525      'it' instruction, then we can be sure that this instruction is
17526      not under the control of any 'it' instruction, and so
17527      guest_ITSTATE must be zero.  So write zero into ITSTATE right
17528      now, so that iropt can fold out almost all of the resulting
17529      junk.
17530
17531      If we aren't sure, we can always safely skip this step.  So be a
17532      bit conservative about it: only poke around in the same page as
17533      this instruction, lest we get a fault from the previous page
17534      that would not otherwise have happened.  The saving grace is
17535      that such skipping is pretty rare -- it only happens,
17536      statistically, 18/4096ths of the time, so is judged unlikely to
17537      be a performance problems.
17538
17539      FIXME: do better.  Take into account the number of insns covered
17540      by any IT insns we find, to rule out cases where an IT clearly
17541      cannot cover this instruction.  This would improve behaviour for
17542      branch targets immediately following an IT-guarded group that is
17543      not of full length.  Eg, (and completely ignoring issues of 16-
17544      vs 32-bit insn length):
17545
17546             ite cond
17547             insn1
17548             insn2
17549      label: insn3
17550             insn4
17551
17552      The 'it' only conditionalises insn1 and insn2.  However, the
17553      current analysis is conservative and considers insn3 and insn4
17554      also possibly guarded.  Hence if 'label:' is the start of a hot
17555      loop we will get a big performance hit.
17556   */
17557   {
17558      /* Summary result of this analysis: False == safe but
17559         suboptimal. */
17560      vassert(guaranteedUnconditional == False);
17561
17562      UInt pc = guest_R15_curr_instr_notENC;
17563      vassert(0 == (pc & 1));
17564
17565      UInt pageoff = pc & 0xFFF;
17566      if (pageoff >= 18) {
17567         /* It's safe to poke about in the 9 halfwords preceding this
17568            insn.  So, have a look at them. */
17569         guaranteedUnconditional = True; /* assume no 'it' insn found,
17570                                            till we do */
17571         UShort* hwp = (UShort*)(HWord)pc;
17572         Int i;
17573         for (i = -1; i >= -9; i--) {
17574            /* We're in the same page.  (True, but commented out due
17575               to expense.) */
17576            /*
17577            vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
17578                      == ( pc & 0xFFFFF000 ) );
17579            */
17580            /* All valid IT instructions must have the form 0xBFxy,
17581               where x can be anything, but y must be nonzero.  Find
17582               the number of insns covered by it (1 .. 4) and check to
17583               see if it can possibly reach up to the instruction in
17584               question.  Some (x,y) combinations mean UNPREDICTABLE,
17585               and the table is constructed to be conservative by
17586               returning 4 for those cases, so the analysis is safe
17587               even if the code uses unpredictable IT instructions (in
17588               which case its authors are nuts, but hey.)  */
17589            UShort hwp_i = hwp[i];
17590            if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
17591               /* might be an 'it' insn. */
17592               /* # guarded insns */
17593               Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
17594               vassert(n_guarded >= 1 && n_guarded <= 4);
17595               if (n_guarded * 2 /* # guarded HWs, worst case */
17596                   > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
17597                   /* -(i+0) also seems to work, even though I think
17598                      it's wrong.  I don't understand that. */
17599                  guaranteedUnconditional = False;
17600               break;
17601            }
17602         }
17603      }
17604   }
17605   /* --- END ITxxx optimisation analysis --- */
17606
17607   /* Generate the guarding condition for this insn, by examining
17608      ITSTATE.  Assign it to condT.  Also, generate new
17609      values for ITSTATE ready for stuffing back into the
17610      guest state, but don't actually do the Put yet, since it will
17611      need to stuffed back in only after the instruction gets to a
17612      point where it is sure to complete.  Mostly we let the code at
17613      decode_success handle this, but in cases where the insn contains
17614      a side exit, we have to update them before the exit. */
17615
17616   /* If the ITxxx optimisation analysis above could not prove that
17617      this instruction is guaranteed unconditional, we insert a
17618      lengthy IR preamble to compute the guarding condition at
17619      runtime.  If it can prove it (which obviously we hope is the
17620      normal case) then we insert a minimal preamble, which is
17621      equivalent to setting guest_ITSTATE to zero and then folding
17622      that through the full preamble (which completely disappears). */
17623
17624   IRTemp condT              = IRTemp_INVALID;
17625   IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
17626
17627   IRTemp new_itstate        = IRTemp_INVALID;
17628   vassert(old_itstate == IRTemp_INVALID);
17629
17630   if (guaranteedUnconditional) {
17631      /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
17632
17633      // ITSTATE = 0 :: I32
17634      IRTemp z32 = newTemp(Ity_I32);
17635      assign(z32, mkU32(0));
17636      put_ITSTATE(z32);
17637
17638      // old_itstate = 0 :: I32
17639      //
17640      // old_itstate = get_ITSTATE();
17641      old_itstate = z32; /* 0 :: I32 */
17642
17643      // new_itstate = old_itstate >> 8
17644      //             = 0 >> 8
17645      //             = 0 :: I32
17646      //
17647      // new_itstate = newTemp(Ity_I32);
17648      // assign(new_itstate,
17649      //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
17650      new_itstate = z32;
17651
17652      // ITSTATE = 0 :: I32(again)
17653      //
17654      // put_ITSTATE(new_itstate);
17655
17656      // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
17657      //        = calc_cond_dyn( xor(0,0xE0) )
17658      //        = calc_cond_dyn ( 0xE0 )
17659      //        = 1 :: I32
17660      // Not that this matters, since the computed value is not used:
17661      // see condT folding below
17662      //
17663      // IRTemp condT1 = newTemp(Ity_I32);
17664      // assign(condT1,
17665      //        mk_armg_calculate_condition_dyn(
17666      //           binop(Iop_Xor32,
17667      //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
17668      //                 mkU32(0xE0))
17669      //       )
17670      // );
17671
17672      // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
17673      //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
17674      //       = 32to8(0) == 0  ? 1  : condT1
17675      //       = 0 == 0  ? 1  : condT1
17676      //       = 1
17677      //
17678      // condT = newTemp(Ity_I32);
17679      // assign(condT, IRExpr_ITE(
17680      //                  unop(Iop_32to8, binop(Iop_And32,
17681      //                                        mkexpr(old_itstate),
17682      //                                        mkU32(0xF0))),
17683      //                  mkexpr(condT1),
17684      //                  mkU32(1))
17685      //       ));
17686      condT = newTemp(Ity_I32);
17687      assign(condT, mkU32(1));
17688
17689      // notInITt = xor32(and32(old_itstate, 1), 1)
17690      //          = xor32(and32(0, 1), 1)
17691      //          = xor32(0, 1)
17692      //          = 1 :: I32
17693      //
17694      // IRTemp notInITt = newTemp(Ity_I32);
17695      // assign(notInITt,
17696      //        binop(Iop_Xor32,
17697      //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
17698      //              mkU32(1)));
17699
17700      // cond_AND_notInIT_T = and32(notInITt, condT)
17701      //                    = and32(1, 1)
17702      //                    = 1
17703      //
17704      // cond_AND_notInIT_T = newTemp(Ity_I32);
17705      // assign(cond_AND_notInIT_T,
17706      //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
17707      cond_AND_notInIT_T = condT; /* 1 :: I32 */
17708
17709      /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
17710   } else {
17711      /* BEGIN { STANDARD PREAMBLE; } */
17712
17713      old_itstate = get_ITSTATE();
17714
17715      new_itstate = newTemp(Ity_I32);
17716      assign(new_itstate,
17717             binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
17718
17719      put_ITSTATE(new_itstate);
17720
17721      /* Same strategy as for ARM insns: generate a condition
17722         temporary at this point (or IRTemp_INVALID, meaning
17723         unconditional).  We leave it to lower-level instruction
17724         decoders to decide whether they can generate straight-line
17725         code, or whether they must generate a side exit before the
17726         instruction.  condT :: Ity_I32 and is always either zero or
17727         one. */
17728      IRTemp condT1 = newTemp(Ity_I32);
17729      assign(condT1,
17730             mk_armg_calculate_condition_dyn(
17731                binop(Iop_Xor32,
17732                      binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
17733                      mkU32(0xE0))
17734            )
17735      );
17736
17737      /* This is a bit complex, but needed to make Memcheck understand
17738         that, if the condition in old_itstate[7:4] denotes AL (that
17739         is, if this instruction is to be executed unconditionally),
17740         then condT does not depend on the results of calling the
17741         helper.
17742
17743         We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
17744         that case set condT directly to 1.  Else we use the results
17745         of the helper.  Since old_itstate is always defined and
17746         because Memcheck does lazy V-bit propagation through ITE,
17747         this will cause condT to always be a defined 1 if the
17748         condition is 'AL'.  From an execution semantics point of view
17749         this is irrelevant since we're merely duplicating part of the
17750         behaviour of the helper.  But it makes it clear to Memcheck,
17751         in this case, that condT does not in fact depend on the
17752         contents of the condition code thunk.  Without it, we get
17753         quite a lot of false errors.
17754
17755         So, just to clarify: from a straight semantics point of view,
17756         we can simply do "assign(condT, mkexpr(condT1))", and the
17757         simulator still runs fine.  It's just that we get loads of
17758         false errors from Memcheck. */
17759      condT = newTemp(Ity_I32);
17760      assign(condT, IRExpr_ITE(
17761                       binop(Iop_CmpNE32, binop(Iop_And32,
17762                                                mkexpr(old_itstate),
17763                                                mkU32(0xF0)),
17764                                          mkU32(0)),
17765                       mkexpr(condT1),
17766                       mkU32(1)
17767            ));
17768
17769      /* Something we don't have in ARM: generate a 0 or 1 value
17770         indicating whether or not we are in an IT block (NB: 0 = in
17771         IT block, 1 = not in IT block).  This is used to gate
17772         condition code updates in 16-bit Thumb instructions. */
17773      IRTemp notInITt = newTemp(Ity_I32);
17774      assign(notInITt,
17775             binop(Iop_Xor32,
17776                   binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
17777                   mkU32(1)));
17778
17779      /* Compute 'condT && notInITt' -- that is, the instruction is
17780         going to execute, and we're not in an IT block.  This is the
17781         gating condition for updating condition codes in 16-bit Thumb
17782         instructions, except for CMP, CMN and TST. */
17783      cond_AND_notInIT_T = newTemp(Ity_I32);
17784      assign(cond_AND_notInIT_T,
17785             binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
17786      /* END { STANDARD PREAMBLE; } */
17787   }
17788
17789
17790   /* At this point:
17791      * ITSTATE has been updated
17792      * condT holds the guarding condition for this instruction (0 or 1),
17793      * notInITt is 1 if we're in "normal" code, 0 if in an IT block
17794      * cond_AND_notInIT_T is the AND of the above two.
17795
17796      If the instruction proper can't trap, then there's nothing else
17797      to do w.r.t. ITSTATE -- just go and and generate IR for the
17798      insn, taking into account the guarding condition.
17799
17800      If, however, the instruction might trap, then we must back up
17801      ITSTATE to the old value, and re-update it after the potentially
17802      trapping IR section.  A trap can happen either via a memory
17803      reference or because we need to throw SIGILL.
17804
17805      If an instruction has a side exit, we need to be sure that any
17806      ITSTATE backup is re-updated before the side exit.
17807   */
17808
17809   /* ----------------------------------------------------------- */
17810   /* --                                                       -- */
17811   /* -- Thumb 16-bit integer instructions                     -- */
17812   /* --                                                       -- */
17813   /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
17814   /* --            not allowed in this section                -- */
17815   /* --                                                       -- */
17816   /* ----------------------------------------------------------- */
17817
17818   /* 16-bit instructions inside an IT block, apart from CMP, CMN and
17819      TST, do not set the condition codes.  Hence we must dynamically
17820      test for this case for every condition code update. */
17821
17822   IROp   anOp   = Iop_INVALID;
17823   const HChar* anOpNm = NULL;
17824
17825   /* ================ 16-bit 15:6 cases ================ */
17826
17827   switch (INSN0(15,6)) {
17828
17829   case 0x10a:   // CMP
17830   case 0x10b: { // CMN
17831      /* ---------------- CMP Rn, Rm ---------------- */
17832      Bool   isCMN = INSN0(15,6) == 0x10b;
17833      UInt   rN    = INSN0(2,0);
17834      UInt   rM    = INSN0(5,3);
17835      IRTemp argL  = newTemp(Ity_I32);
17836      IRTemp argR  = newTemp(Ity_I32);
17837      assign( argL, getIRegT(rN) );
17838      assign( argR, getIRegT(rM) );
17839      /* Update flags regardless of whether in an IT block or not. */
17840      setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
17841                      argL, argR, condT );
17842      DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
17843      goto decode_success;
17844   }
17845
17846   case 0x108: {
17847      /* ---------------- TST Rn, Rm ---------------- */
17848      UInt   rN   = INSN0(2,0);
17849      UInt   rM   = INSN0(5,3);
17850      IRTemp oldC = newTemp(Ity_I32);
17851      IRTemp oldV = newTemp(Ity_I32);
17852      IRTemp res  = newTemp(Ity_I32);
17853      assign( oldC, mk_armg_calculate_flag_c() );
17854      assign( oldV, mk_armg_calculate_flag_v() );
17855      assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
17856      /* Update flags regardless of whether in an IT block or not. */
17857      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
17858      DIP("tst r%u, r%u\n", rN, rM);
17859      goto decode_success;
17860   }
17861
17862   case 0x109: {
17863      /* ---------------- NEGS Rd, Rm ---------------- */
17864      /* Rd = -Rm */
17865      UInt   rM   = INSN0(5,3);
17866      UInt   rD   = INSN0(2,0);
17867      IRTemp arg  = newTemp(Ity_I32);
17868      IRTemp zero = newTemp(Ity_I32);
17869      assign(arg, getIRegT(rM));
17870      assign(zero, mkU32(0));
17871      // rD can never be r15
17872      putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
17873      setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
17874      DIP("negs r%u, r%u\n", rD, rM);
17875      goto decode_success;
17876   }
17877
17878   case 0x10F: {
17879      /* ---------------- MVNS Rd, Rm ---------------- */
17880      /* Rd = ~Rm */
17881      UInt   rM   = INSN0(5,3);
17882      UInt   rD   = INSN0(2,0);
17883      IRTemp oldV = newTemp(Ity_I32);
17884      IRTemp oldC = newTemp(Ity_I32);
17885      IRTemp res  = newTemp(Ity_I32);
17886      assign( oldV, mk_armg_calculate_flag_v() );
17887      assign( oldC, mk_armg_calculate_flag_c() );
17888      assign(res, unop(Iop_Not32, getIRegT(rM)));
17889      // rD can never be r15
17890      putIRegT(rD, mkexpr(res), condT);
17891      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
17892                         cond_AND_notInIT_T );
17893      DIP("mvns r%u, r%u\n", rD, rM);
17894      goto decode_success;
17895   }
17896
17897   case 0x10C:
17898      /* ---------------- ORRS Rd, Rm ---------------- */
17899      anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
17900   case 0x100:
17901      /* ---------------- ANDS Rd, Rm ---------------- */
17902      anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
17903   case 0x101:
17904      /* ---------------- EORS Rd, Rm ---------------- */
17905      anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
17906   case 0x10d:
17907      /* ---------------- MULS Rd, Rm ---------------- */
17908      anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
17909   and_orr_eor_mul: {
17910      /* Rd = Rd `op` Rm */
17911      UInt   rM   = INSN0(5,3);
17912      UInt   rD   = INSN0(2,0);
17913      IRTemp res  = newTemp(Ity_I32);
17914      IRTemp oldV = newTemp(Ity_I32);
17915      IRTemp oldC = newTemp(Ity_I32);
17916      assign( oldV, mk_armg_calculate_flag_v() );
17917      assign( oldC, mk_armg_calculate_flag_c() );
17918      assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
17919      // not safe to read guest state after here
17920      // rD can never be r15
17921      putIRegT(rD, mkexpr(res), condT);
17922      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
17923                         cond_AND_notInIT_T );
17924      DIP("%s r%u, r%u\n", anOpNm, rD, rM);
17925      goto decode_success;
17926   }
17927
17928   case 0x10E: {
17929      /* ---------------- BICS Rd, Rm ---------------- */
17930      /* Rd = Rd & ~Rm */
17931      UInt   rM   = INSN0(5,3);
17932      UInt   rD   = INSN0(2,0);
17933      IRTemp res  = newTemp(Ity_I32);
17934      IRTemp oldV = newTemp(Ity_I32);
17935      IRTemp oldC = newTemp(Ity_I32);
17936      assign( oldV, mk_armg_calculate_flag_v() );
17937      assign( oldC, mk_armg_calculate_flag_c() );
17938      assign( res, binop(Iop_And32, getIRegT(rD),
17939                                    unop(Iop_Not32, getIRegT(rM) )));
17940      // not safe to read guest state after here
17941      // rD can never be r15
17942      putIRegT(rD, mkexpr(res), condT);
17943      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
17944                         cond_AND_notInIT_T );
17945      DIP("bics r%u, r%u\n", rD, rM);
17946      goto decode_success;
17947   }
17948
17949   case 0x105: {
17950      /* ---------------- ADCS Rd, Rm ---------------- */
17951      /* Rd = Rd + Rm + oldC */
17952      UInt   rM   = INSN0(5,3);
17953      UInt   rD   = INSN0(2,0);
17954      IRTemp argL = newTemp(Ity_I32);
17955      IRTemp argR = newTemp(Ity_I32);
17956      IRTemp oldC = newTemp(Ity_I32);
17957      IRTemp res  = newTemp(Ity_I32);
17958      assign(argL, getIRegT(rD));
17959      assign(argR, getIRegT(rM));
17960      assign(oldC, mk_armg_calculate_flag_c());
17961      assign(res, binop(Iop_Add32,
17962                        binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
17963                        mkexpr(oldC)));
17964      // rD can never be r15
17965      putIRegT(rD, mkexpr(res), condT);
17966      setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
17967                         cond_AND_notInIT_T );
17968      DIP("adcs r%u, r%u\n", rD, rM);
17969      goto decode_success;
17970   }
17971
17972   case 0x106: {
17973      /* ---------------- SBCS Rd, Rm ---------------- */
17974      /* Rd = Rd - Rm - (oldC ^ 1) */
17975      UInt   rM   = INSN0(5,3);
17976      UInt   rD   = INSN0(2,0);
17977      IRTemp argL = newTemp(Ity_I32);
17978      IRTemp argR = newTemp(Ity_I32);
17979      IRTemp oldC = newTemp(Ity_I32);
17980      IRTemp res  = newTemp(Ity_I32);
17981      assign(argL, getIRegT(rD));
17982      assign(argR, getIRegT(rM));
17983      assign(oldC, mk_armg_calculate_flag_c());
17984      assign(res, binop(Iop_Sub32,
17985                        binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
17986                        binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
17987      // rD can never be r15
17988      putIRegT(rD, mkexpr(res), condT);
17989      setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
17990                         cond_AND_notInIT_T );
17991      DIP("sbcs r%u, r%u\n", rD, rM);
17992      goto decode_success;
17993   }
17994
17995   case 0x2CB: {
17996      /* ---------------- UXTB Rd, Rm ---------------- */
17997      /* Rd = 8Uto32(Rm) */
17998      UInt rM = INSN0(5,3);
17999      UInt rD = INSN0(2,0);
18000      putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
18001                   condT);
18002      DIP("uxtb r%u, r%u\n", rD, rM);
18003      goto decode_success;
18004   }
18005
18006   case 0x2C9: {
18007      /* ---------------- SXTB Rd, Rm ---------------- */
18008      /* Rd = 8Sto32(Rm) */
18009      UInt rM = INSN0(5,3);
18010      UInt rD = INSN0(2,0);
18011      putIRegT(rD, binop(Iop_Sar32,
18012                         binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
18013                         mkU8(24)),
18014                   condT);
18015      DIP("sxtb r%u, r%u\n", rD, rM);
18016      goto decode_success;
18017   }
18018
18019   case 0x2CA: {
18020      /* ---------------- UXTH Rd, Rm ---------------- */
18021      /* Rd = 16Uto32(Rm) */
18022      UInt rM = INSN0(5,3);
18023      UInt rD = INSN0(2,0);
18024      putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
18025                   condT);
18026      DIP("uxth r%u, r%u\n", rD, rM);
18027      goto decode_success;
18028   }
18029
18030   case 0x2C8: {
18031      /* ---------------- SXTH Rd, Rm ---------------- */
18032      /* Rd = 16Sto32(Rm) */
18033      UInt rM = INSN0(5,3);
18034      UInt rD = INSN0(2,0);
18035      putIRegT(rD, binop(Iop_Sar32,
18036                         binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
18037                         mkU8(16)),
18038                   condT);
18039      DIP("sxth r%u, r%u\n", rD, rM);
18040      goto decode_success;
18041   }
18042
18043   case 0x102:   // LSLS
18044   case 0x103:   // LSRS
18045   case 0x104:   // ASRS
18046   case 0x107: { // RORS
18047      /* ---------------- LSLS Rs, Rd ---------------- */
18048      /* ---------------- LSRS Rs, Rd ---------------- */
18049      /* ---------------- ASRS Rs, Rd ---------------- */
18050      /* ---------------- RORS Rs, Rd ---------------- */
18051      /* Rd = Rd `op` Rs, and set flags */
18052      UInt   rS   = INSN0(5,3);
18053      UInt   rD   = INSN0(2,0);
18054      IRTemp oldV = newTemp(Ity_I32);
18055      IRTemp rDt  = newTemp(Ity_I32);
18056      IRTemp rSt  = newTemp(Ity_I32);
18057      IRTemp res  = newTemp(Ity_I32);
18058      IRTemp resC = newTemp(Ity_I32);
18059      const HChar* wot  = "???";
18060      assign(rSt, getIRegT(rS));
18061      assign(rDt, getIRegT(rD));
18062      assign(oldV, mk_armg_calculate_flag_v());
18063      /* Does not appear to be the standard 'how' encoding. */
18064      switch (INSN0(15,6)) {
18065         case 0x102:
18066            compute_result_and_C_after_LSL_by_reg(
18067               dis_buf, &res, &resC, rDt, rSt, rD, rS
18068            );
18069            wot = "lsl";
18070            break;
18071         case 0x103:
18072            compute_result_and_C_after_LSR_by_reg(
18073               dis_buf, &res, &resC, rDt, rSt, rD, rS
18074            );
18075            wot = "lsr";
18076            break;
18077         case 0x104:
18078            compute_result_and_C_after_ASR_by_reg(
18079               dis_buf, &res, &resC, rDt, rSt, rD, rS
18080            );
18081            wot = "asr";
18082            break;
18083         case 0x107:
18084            compute_result_and_C_after_ROR_by_reg(
18085               dis_buf, &res, &resC, rDt, rSt, rD, rS
18086            );
18087            wot = "ror";
18088            break;
18089         default:
18090            /*NOTREACHED*/vassert(0);
18091      }
18092      // not safe to read guest state after this point
18093      putIRegT(rD, mkexpr(res), condT);
18094      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
18095                         cond_AND_notInIT_T );
18096      DIP("%ss r%u, r%u\n", wot, rS, rD);
18097      goto decode_success;
18098   }
18099
18100   case 0x2E8:   // REV
18101   case 0x2E9: { // REV16
18102      /* ---------------- REV   Rd, Rm ---------------- */
18103      /* ---------------- REV16 Rd, Rm ---------------- */
18104      UInt rM = INSN0(5,3);
18105      UInt rD = INSN0(2,0);
18106      Bool isREV = INSN0(15,6) == 0x2E8;
18107      IRTemp arg = newTemp(Ity_I32);
18108      assign(arg, getIRegT(rM));
18109      IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
18110      putIRegT(rD, mkexpr(res), condT);
18111      DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
18112      goto decode_success;
18113   }
18114
18115   case 0x2EB: { // REVSH
18116      /* ---------------- REVSH Rd, Rn ---------------- */
18117      UInt rM = INSN0(5,3);
18118      UInt rD = INSN0(2,0);
18119      IRTemp irt_rM  = newTemp(Ity_I32);
18120      IRTemp irt_hi  = newTemp(Ity_I32);
18121      IRTemp irt_low = newTemp(Ity_I32);
18122      IRTemp irt_res = newTemp(Ity_I32);
18123      assign(irt_rM, getIRegT(rM));
18124      assign(irt_hi,
18125             binop(Iop_Sar32,
18126                   binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
18127                   mkU8(16)
18128             )
18129      );
18130      assign(irt_low,
18131             binop(Iop_And32,
18132                   binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
18133                   mkU32(0xFF)
18134             )
18135      );
18136      assign(irt_res,
18137             binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
18138      );
18139      putIRegT(rD, mkexpr(irt_res), condT);
18140      DIP("revsh r%u, r%u\n", rD, rM);
18141      goto decode_success;
18142   }
18143
18144   default:
18145      break; /* examine the next shortest prefix */
18146
18147   }
18148
18149
18150   /* ================ 16-bit 15:7 cases ================ */
18151
18152   switch (INSN0(15,7)) {
18153
18154   case BITS9(1,0,1,1,0,0,0,0,0): {
18155      /* ------------ ADD SP, #imm7 * 4 ------------ */
18156      UInt uimm7 = INSN0(6,0);
18157      putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
18158                   condT);
18159      DIP("add sp, #%u\n", uimm7 * 4);
18160      goto decode_success;
18161   }
18162
18163   case BITS9(1,0,1,1,0,0,0,0,1): {
18164      /* ------------ SUB SP, #imm7 * 4 ------------ */
18165      UInt uimm7 = INSN0(6,0);
18166      putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
18167                   condT);
18168      DIP("sub sp, #%u\n", uimm7 * 4);
18169      goto decode_success;
18170   }
18171
18172   case BITS9(0,1,0,0,0,1,1,1,0): {
18173      /* ---------------- BX rM ---------------- */
18174      /* Branch to reg, and optionally switch modes.  Reg contains a
18175         suitably encoded address therefore (w CPSR.T at the bottom).
18176         Have to special-case r15, as usual. */
18177      UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
18178      if (BITS3(0,0,0) == INSN0(2,0)) {
18179         IRTemp dst = newTemp(Ity_I32);
18180         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18181         mk_skip_over_T16_if_cond_is_false(condT);
18182         condT = IRTemp_INVALID;
18183         // now uncond
18184         if (rM <= 14) {
18185            assign( dst, getIRegT(rM) );
18186         } else {
18187            vassert(rM == 15);
18188            assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
18189         }
18190         llPutIReg(15, mkexpr(dst));
18191         dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
18192         dres.whatNext    = Dis_StopHere;
18193         DIP("bx r%u (possibly switch to ARM mode)\n", rM);
18194         goto decode_success;
18195      }
18196      break;
18197   }
18198
18199   /* ---------------- BLX rM ---------------- */
18200   /* Branch and link to interworking address in rM. */
18201   case BITS9(0,1,0,0,0,1,1,1,1): {
18202      if (BITS3(0,0,0) == INSN0(2,0)) {
18203         UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
18204         IRTemp dst = newTemp(Ity_I32);
18205         if (rM <= 14) {
18206            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18207            mk_skip_over_T16_if_cond_is_false(condT);
18208            condT = IRTemp_INVALID;
18209            // now uncond
18210            /* We're returning to Thumb code, hence "| 1" */
18211            assign( dst, getIRegT(rM) );
18212            putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
18213                          IRTemp_INVALID );
18214            llPutIReg(15, mkexpr(dst));
18215            dres.jk_StopHere = Ijk_Call;
18216            dres.whatNext    = Dis_StopHere;
18217            DIP("blx r%u (possibly switch to ARM mode)\n", rM);
18218            goto decode_success;
18219         }
18220         /* else unpredictable, fall through */
18221      }
18222      break;
18223   }
18224
18225   default:
18226      break; /* examine the next shortest prefix */
18227
18228   }
18229
18230
18231   /* ================ 16-bit 15:8 cases ================ */
18232
18233   switch (INSN0(15,8)) {
18234
18235   case BITS8(1,1,0,1,1,1,1,1): {
18236      /* ---------------- SVC ---------------- */
18237      UInt imm8 = INSN0(7,0);
18238      if (imm8 == 0) {
18239         /* A syscall.  We can't do this conditionally, hence: */
18240         mk_skip_over_T16_if_cond_is_false( condT );
18241         // FIXME: what if we have to back up and restart this insn?
18242         // then ITSTATE will be wrong (we'll have it as "used")
18243         // when it isn't.  Correct is to save ITSTATE in a
18244         // stash pseudo-reg, and back up from that if we have to
18245         // restart.
18246         // uncond after here
18247         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
18248         dres.jk_StopHere = Ijk_Sys_syscall;
18249         dres.whatNext    = Dis_StopHere;
18250         DIP("svc #0x%08x\n", imm8);
18251         goto decode_success;
18252      }
18253      /* else fall through */
18254      break;
18255   }
18256
18257   case BITS8(0,1,0,0,0,1,0,0): {
18258      /* ---------------- ADD(HI) Rd, Rm ---------------- */
18259      UInt h1 = INSN0(7,7);
18260      UInt h2 = INSN0(6,6);
18261      UInt rM = (h2 << 3) | INSN0(5,3);
18262      UInt rD = (h1 << 3) | INSN0(2,0);
18263      //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
18264      if (rD == 15 && rM == 15) {
18265         // then it's invalid
18266      } else {
18267         IRTemp res = newTemp(Ity_I32);
18268         assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
18269         if (rD != 15) {
18270            putIRegT( rD, mkexpr(res), condT );
18271         } else {
18272            /* Only allowed outside or last-in IT block; SIGILL if not so. */
18273            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18274            /* jump over insn if not selected */
18275            mk_skip_over_T16_if_cond_is_false(condT);
18276            condT = IRTemp_INVALID;
18277            // now uncond
18278            /* non-interworking branch */
18279            llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
18280            dres.jk_StopHere = Ijk_Boring;
18281            dres.whatNext    = Dis_StopHere;
18282         }
18283         DIP("add(hi) r%u, r%u\n", rD, rM);
18284         goto decode_success;
18285      }
18286      break;
18287   }
18288
18289   case BITS8(0,1,0,0,0,1,0,1): {
18290      /* ---------------- CMP(HI) Rd, Rm ---------------- */
18291      UInt h1 = INSN0(7,7);
18292      UInt h2 = INSN0(6,6);
18293      UInt rM = (h2 << 3) | INSN0(5,3);
18294      UInt rN = (h1 << 3) | INSN0(2,0);
18295      if (h1 != 0 || h2 != 0) {
18296         IRTemp argL  = newTemp(Ity_I32);
18297         IRTemp argR  = newTemp(Ity_I32);
18298         assign( argL, getIRegT(rN) );
18299         assign( argR, getIRegT(rM) );
18300         /* Update flags regardless of whether in an IT block or not. */
18301         setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
18302         DIP("cmphi r%u, r%u\n", rN, rM);
18303         goto decode_success;
18304      }
18305      break;
18306   }
18307
18308   case BITS8(0,1,0,0,0,1,1,0): {
18309      /* ---------------- MOV(HI) Rd, Rm ---------------- */
18310      UInt h1 = INSN0(7,7);
18311      UInt h2 = INSN0(6,6);
18312      UInt rM = (h2 << 3) | INSN0(5,3);
18313      UInt rD = (h1 << 3) | INSN0(2,0);
18314      /* The old ARM ARM seems to disallow the case where both Rd and
18315         Rm are "low" registers, but newer versions allow it. */
18316      if (1 /*h1 != 0 || h2 != 0*/) {
18317         IRTemp val = newTemp(Ity_I32);
18318         assign( val, getIRegT(rM) );
18319         if (rD != 15) {
18320            putIRegT( rD, mkexpr(val), condT );
18321         } else {
18322            /* Only allowed outside or last-in IT block; SIGILL if not so. */
18323            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
18324            /* jump over insn if not selected */
18325            mk_skip_over_T16_if_cond_is_false(condT);
18326            condT = IRTemp_INVALID;
18327            // now uncond
18328            /* non-interworking branch */
18329            llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
18330            dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
18331            dres.whatNext    = Dis_StopHere;
18332         }
18333         DIP("mov r%u, r%u\n", rD, rM);
18334         goto decode_success;
18335      }
18336      break;
18337   }
18338
18339   case BITS8(1,0,1,1,1,1,1,1): {
18340      /* ---------------- IT (if-then) ---------------- */
18341      UInt firstcond = INSN0(7,4);
18342      UInt mask = INSN0(3,0);
18343      UInt newITSTATE = 0;
18344      /* This is the ITSTATE represented as described in
18345         libvex_guest_arm.h.  It is not the ARM ARM representation. */
18346      HChar c1 = '.';
18347      HChar c2 = '.';
18348      HChar c3 = '.';
18349      Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
18350                                    firstcond, mask );
18351      if (valid && firstcond != 0xF/*NV*/) {
18352         /* Not allowed in an IT block; SIGILL if so. */
18353         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
18354
18355         IRTemp t = newTemp(Ity_I32);
18356         assign(t, mkU32(newITSTATE));
18357         put_ITSTATE(t);
18358
18359         DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
18360         goto decode_success;
18361      }
18362      break;
18363   }
18364
18365   case BITS8(1,0,1,1,0,0,0,1):
18366   case BITS8(1,0,1,1,0,0,1,1):
18367   case BITS8(1,0,1,1,1,0,0,1):
18368   case BITS8(1,0,1,1,1,0,1,1): {
18369      /* ---------------- CB{N}Z ---------------- */
18370      UInt rN    = INSN0(2,0);
18371      UInt bOP   = INSN0(11,11);
18372      UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
18373      gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
18374      /* It's a conditional branch forward. */
18375      IRTemp kond = newTemp(Ity_I1);
18376      assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
18377                          getIRegT(rN), mkU32(0)) );
18378
18379      vassert(0 == (guest_R15_curr_instr_notENC & 1));
18380      /* Looks like the nearest insn we can branch to is the one after
18381         next.  That makes sense, as there's no point in being able to
18382         encode a conditional branch to the next instruction. */
18383      UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
18384      stmt(IRStmt_Exit( mkexpr(kond),
18385                        Ijk_Boring,
18386                        IRConst_U32(toUInt(dst)),
18387                        OFFB_R15T ));
18388      DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
18389      goto decode_success;
18390   }
18391
18392   default:
18393      break; /* examine the next shortest prefix */
18394
18395   }
18396
18397
18398   /* ================ 16-bit 15:9 cases ================ */
18399
18400   switch (INSN0(15,9)) {
18401
18402   case BITS7(1,0,1,1,0,1,0): {
18403      /* ---------------- PUSH ---------------- */
18404      /* This is a bit like STMxx, but way simpler. Complications we
18405         don't have to deal with:
18406         * SP being one of the transferred registers
18407         * direction (increment vs decrement)
18408         * before-vs-after-ness
18409      */
18410      Int  i, nRegs;
18411      UInt bitR    = INSN0(8,8);
18412      UInt regList = INSN0(7,0);
18413      if (bitR) regList |= (1 << 14);
18414
18415      /* At least one register must be transferred, else result is
18416         UNPREDICTABLE. */
18417      if (regList != 0) {
18418         /* Since we can't generate a guaranteed non-trapping IR
18419            sequence, (1) jump over the insn if it is gated false, and
18420            (2) back out the ITSTATE update. */
18421         mk_skip_over_T16_if_cond_is_false(condT);
18422         condT = IRTemp_INVALID;
18423         put_ITSTATE(old_itstate);
18424         // now uncond
18425
18426         nRegs = 0;
18427         for (i = 0; i < 16; i++) {
18428            if ((regList & (1 << i)) != 0)
18429               nRegs++;
18430         }
18431         vassert(nRegs >= 1 && nRegs <= 9);
18432
18433         /* Move SP down first of all, so we're "covered".  And don't
18434            mess with its alignment. */
18435         IRTemp newSP = newTemp(Ity_I32);
18436         assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
18437         putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
18438
18439         /* Generate a transfer base address as a forced-aligned
18440            version of the final SP value. */
18441         IRTemp base = newTemp(Ity_I32);
18442         assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
18443
18444         /* Now the transfers */
18445         nRegs = 0;
18446         for (i = 0; i < 16; i++) {
18447            if ((regList & (1 << i)) != 0) {
18448               storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
18449                        getIRegT(i) );
18450               nRegs++;
18451            }
18452         }
18453
18454         /* Reinstate the ITSTATE update. */
18455         put_ITSTATE(new_itstate);
18456
18457         DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
18458         goto decode_success;
18459      }
18460      break;
18461   }
18462
18463   case BITS7(1,0,1,1,1,1,0): {
18464      /* ---------------- POP ---------------- */
18465      Int  i, nRegs;
18466      UInt bitR    = INSN0(8,8);
18467      UInt regList = INSN0(7,0);
18468
18469      /* At least one register must be transferred, else result is
18470         UNPREDICTABLE. */
18471      if (regList != 0 || bitR) {
18472         /* Since we can't generate a guaranteed non-trapping IR
18473            sequence, (1) jump over the insn if it is gated false, and
18474            (2) back out the ITSTATE update. */
18475         mk_skip_over_T16_if_cond_is_false(condT);
18476         condT = IRTemp_INVALID;
18477         put_ITSTATE(old_itstate);
18478         // now uncond
18479
18480         nRegs = 0;
18481         for (i = 0; i < 8; i++) {
18482            if ((regList & (1 << i)) != 0)
18483               nRegs++;
18484         }
18485         vassert(nRegs >= 0 && nRegs <= 8);
18486         vassert(bitR == 0 || bitR == 1);
18487
18488         IRTemp oldSP = newTemp(Ity_I32);
18489         assign(oldSP, getIRegT(13));
18490
18491         /* Generate a transfer base address as a forced-aligned
18492            version of the original SP value. */
18493         IRTemp base = newTemp(Ity_I32);
18494         assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
18495
18496         /* Compute a new value for SP, but don't install it yet, so
18497            that we're "covered" until all the transfers are done.
18498            And don't mess with its alignment. */
18499         IRTemp newSP = newTemp(Ity_I32);
18500         assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
18501                                        mkU32(4 * (nRegs + bitR))));
18502
18503         /* Now the transfers, not including PC */
18504         nRegs = 0;
18505         for (i = 0; i < 8; i++) {
18506            if ((regList & (1 << i)) != 0) {
18507               putIRegT(i, loadLE( Ity_I32,
18508                                   binop(Iop_Add32, mkexpr(base),
18509                                                    mkU32(4 * nRegs))),
18510                           IRTemp_INVALID );
18511               nRegs++;
18512            }
18513         }
18514
18515         IRTemp newPC = IRTemp_INVALID;
18516         if (bitR) {
18517            newPC = newTemp(Ity_I32);
18518            assign( newPC, loadLE( Ity_I32,
18519                                   binop(Iop_Add32, mkexpr(base),
18520                                                    mkU32(4 * nRegs))));
18521         }
18522
18523         /* Now we can safely install the new SP value */
18524         putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
18525
18526         /* Reinstate the ITSTATE update. */
18527         put_ITSTATE(new_itstate);
18528
18529         /* now, do we also have to do a branch?  If so, it turns out
18530            that the new PC value is encoded exactly as we need it to
18531            be -- with CPSR.T in the bottom bit.  So we can simply use
18532            it as is, no need to mess with it.  Note, therefore, this
18533            is an interworking return. */
18534         if (bitR) {
18535            llPutIReg(15, mkexpr(newPC));
18536            dres.jk_StopHere = Ijk_Ret;
18537            dres.whatNext    = Dis_StopHere;
18538         }
18539
18540         DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
18541         goto decode_success;
18542      }
18543      break;
18544   }
18545
18546   case BITS7(0,0,0,1,1,1,0):   /* ADDS */
18547   case BITS7(0,0,0,1,1,1,1): { /* SUBS */
18548      /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
18549      /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
18550      UInt   uimm3 = INSN0(8,6);
18551      UInt   rN    = INSN0(5,3);
18552      UInt   rD    = INSN0(2,0);
18553      UInt   isSub = INSN0(9,9);
18554      IRTemp argL  = newTemp(Ity_I32);
18555      IRTemp argR  = newTemp(Ity_I32);
18556      assign( argL, getIRegT(rN) );
18557      assign( argR, mkU32(uimm3) );
18558      putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
18559                         mkexpr(argL), mkexpr(argR)),
18560                   condT);
18561      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
18562                      argL, argR, cond_AND_notInIT_T );
18563      DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
18564      goto decode_success;
18565   }
18566
18567   case BITS7(0,0,0,1,1,0,0):   /* ADDS */
18568   case BITS7(0,0,0,1,1,0,1): { /* SUBS */
18569      /* ---------------- ADDS Rd, Rn, Rm ---------------- */
18570      /* ---------------- SUBS Rd, Rn, Rm ---------------- */
18571      UInt   rM    = INSN0(8,6);
18572      UInt   rN    = INSN0(5,3);
18573      UInt   rD    = INSN0(2,0);
18574      UInt   isSub = INSN0(9,9);
18575      IRTemp argL  = newTemp(Ity_I32);
18576      IRTemp argR  = newTemp(Ity_I32);
18577      assign( argL, getIRegT(rN) );
18578      assign( argR, getIRegT(rM) );
18579      putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
18580                          mkexpr(argL), mkexpr(argR)),
18581                    condT );
18582      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
18583                      argL, argR, cond_AND_notInIT_T );
18584      DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
18585      goto decode_success;
18586   }
18587
18588   case BITS7(0,1,0,1,0,0,0):   /* STR */
18589   case BITS7(0,1,0,1,1,0,0): { /* LDR */
18590      /* ------------- LDR Rd, [Rn, Rm] ------------- */
18591      /* ------------- STR Rd, [Rn, Rm] ------------- */
18592      /* LDR/STR Rd, [Rn + Rm] */
18593      UInt    rD   = INSN0(2,0);
18594      UInt    rN   = INSN0(5,3);
18595      UInt    rM   = INSN0(8,6);
18596      UInt    isLD = INSN0(11,11);
18597
18598      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18599      put_ITSTATE(old_itstate); // backout
18600      if (isLD) {
18601         IRTemp tD = newTemp(Ity_I32);
18602         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
18603         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18604      } else {
18605         storeGuardedLE(ea, getIRegT(rD), condT);
18606      }
18607      put_ITSTATE(new_itstate); // restore
18608
18609      DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
18610      goto decode_success;
18611   }
18612
18613   case BITS7(0,1,0,1,0,0,1):
18614   case BITS7(0,1,0,1,1,0,1): {
18615      /* ------------- LDRH Rd, [Rn, Rm] ------------- */
18616      /* ------------- STRH Rd, [Rn, Rm] ------------- */
18617      /* LDRH/STRH Rd, [Rn + Rm] */
18618      UInt    rD   = INSN0(2,0);
18619      UInt    rN   = INSN0(5,3);
18620      UInt    rM   = INSN0(8,6);
18621      UInt    isLD = INSN0(11,11);
18622
18623      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18624      put_ITSTATE(old_itstate); // backout
18625      if (isLD) {
18626         IRTemp tD = newTemp(Ity_I32);
18627         loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
18628         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18629      } else {
18630         storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
18631      }
18632      put_ITSTATE(new_itstate); // restore
18633
18634      DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
18635      goto decode_success;
18636   }
18637
18638   case BITS7(0,1,0,1,1,1,1): {
18639      /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
18640      /* LDRSH Rd, [Rn + Rm] */
18641      UInt    rD = INSN0(2,0);
18642      UInt    rN = INSN0(5,3);
18643      UInt    rM = INSN0(8,6);
18644
18645      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18646      put_ITSTATE(old_itstate); // backout
18647      IRTemp tD = newTemp(Ity_I32);
18648      loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
18649      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18650      put_ITSTATE(new_itstate); // restore
18651
18652      DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
18653      goto decode_success;
18654   }
18655
18656   case BITS7(0,1,0,1,0,1,1): {
18657      /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
18658      /* LDRSB Rd, [Rn + Rm] */
18659      UInt    rD = INSN0(2,0);
18660      UInt    rN = INSN0(5,3);
18661      UInt    rM = INSN0(8,6);
18662
18663      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18664      put_ITSTATE(old_itstate); // backout
18665      IRTemp tD = newTemp(Ity_I32);
18666      loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
18667      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18668      put_ITSTATE(new_itstate); // restore
18669
18670      DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
18671      goto decode_success;
18672   }
18673
18674   case BITS7(0,1,0,1,0,1,0):
18675   case BITS7(0,1,0,1,1,1,0): {
18676      /* ------------- LDRB Rd, [Rn, Rm] ------------- */
18677      /* ------------- STRB Rd, [Rn, Rm] ------------- */
18678      /* LDRB/STRB Rd, [Rn + Rm] */
18679      UInt    rD   = INSN0(2,0);
18680      UInt    rN   = INSN0(5,3);
18681      UInt    rM   = INSN0(8,6);
18682      UInt    isLD = INSN0(11,11);
18683
18684      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
18685      put_ITSTATE(old_itstate); // backout
18686      if (isLD) {
18687         IRTemp tD = newTemp(Ity_I32);
18688         loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
18689         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18690      } else {
18691         storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
18692      }
18693      put_ITSTATE(new_itstate); // restore
18694
18695      DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
18696      goto decode_success;
18697   }
18698
18699   default:
18700      break; /* examine the next shortest prefix */
18701
18702   }
18703
18704
18705   /* ================ 16-bit 15:11 cases ================ */
18706
18707   switch (INSN0(15,11)) {
18708
18709   case BITS5(0,0,1,1,0):
18710   case BITS5(0,0,1,1,1): {
18711      /* ---------------- ADDS Rn, #uimm8 ---------------- */
18712      /* ---------------- SUBS Rn, #uimm8 ---------------- */
18713      UInt   isSub = INSN0(11,11);
18714      UInt   rN    = INSN0(10,8);
18715      UInt   uimm8 = INSN0(7,0);
18716      IRTemp argL  = newTemp(Ity_I32);
18717      IRTemp argR  = newTemp(Ity_I32);
18718      assign( argL, getIRegT(rN) );
18719      assign( argR, mkU32(uimm8) );
18720      putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
18721                          mkexpr(argL), mkexpr(argR)), condT );
18722      setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
18723                      argL, argR, cond_AND_notInIT_T );
18724      DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
18725      goto decode_success;
18726   }
18727
18728   case BITS5(1,0,1,0,0): {
18729      /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
18730      /* a.k.a. ADR */
18731      /* rD = align4(PC) + imm8 * 4 */
18732      UInt rD   = INSN0(10,8);
18733      UInt imm8 = INSN0(7,0);
18734      putIRegT(rD, binop(Iop_Add32,
18735                         binop(Iop_And32, getIRegT(15), mkU32(~3U)),
18736                         mkU32(imm8 * 4)),
18737                   condT);
18738      DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
18739      goto decode_success;
18740   }
18741
18742   case BITS5(1,0,1,0,1): {
18743      /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
18744      UInt rD   = INSN0(10,8);
18745      UInt imm8 = INSN0(7,0);
18746      putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
18747                   condT);
18748      DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
18749      goto decode_success;
18750   }
18751
18752   case BITS5(0,0,1,0,1): {
18753      /* ---------------- CMP Rn, #uimm8 ---------------- */
18754      UInt   rN    = INSN0(10,8);
18755      UInt   uimm8 = INSN0(7,0);
18756      IRTemp argL  = newTemp(Ity_I32);
18757      IRTemp argR  = newTemp(Ity_I32);
18758      assign( argL, getIRegT(rN) );
18759      assign( argR, mkU32(uimm8) );
18760      /* Update flags regardless of whether in an IT block or not. */
18761      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
18762      DIP("cmp r%u, #%u\n", rN, uimm8);
18763      goto decode_success;
18764   }
18765
18766   case BITS5(0,0,1,0,0): {
18767      /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
18768      UInt   rD    = INSN0(10,8);
18769      UInt   uimm8 = INSN0(7,0);
18770      IRTemp oldV  = newTemp(Ity_I32);
18771      IRTemp oldC  = newTemp(Ity_I32);
18772      IRTemp res   = newTemp(Ity_I32);
18773      assign( oldV, mk_armg_calculate_flag_v() );
18774      assign( oldC, mk_armg_calculate_flag_c() );
18775      assign( res, mkU32(uimm8) );
18776      putIRegT(rD, mkexpr(res), condT);
18777      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
18778                         cond_AND_notInIT_T );
18779      DIP("movs r%u, #%u\n", rD, uimm8);
18780      goto decode_success;
18781   }
18782
18783   case BITS5(0,1,0,0,1): {
18784      /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
18785      /* LDR Rd, [align4(PC) + imm8 * 4] */
18786      UInt   rD   = INSN0(10,8);
18787      UInt   imm8 = INSN0(7,0);
18788      IRTemp ea   = newTemp(Ity_I32);
18789
18790      assign(ea, binop(Iop_Add32,
18791                       binop(Iop_And32, getIRegT(15), mkU32(~3U)),
18792                       mkU32(imm8 * 4)));
18793      put_ITSTATE(old_itstate); // backout
18794      IRTemp tD = newTemp(Ity_I32);
18795      loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
18796      putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18797      put_ITSTATE(new_itstate); // restore
18798
18799      DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
18800      goto decode_success;
18801   }
18802
18803   case BITS5(0,1,1,0,0):   /* STR */
18804   case BITS5(0,1,1,0,1): { /* LDR */
18805      /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
18806      /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
18807      /* LDR/STR Rd, [Rn + imm5 * 4] */
18808      UInt    rD   = INSN0(2,0);
18809      UInt    rN   = INSN0(5,3);
18810      UInt    imm5 = INSN0(10,6);
18811      UInt    isLD = INSN0(11,11);
18812
18813      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
18814      put_ITSTATE(old_itstate); // backout
18815      if (isLD) {
18816         IRTemp tD = newTemp(Ity_I32);
18817         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
18818         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18819      } else {
18820         storeGuardedLE( ea, getIRegT(rD), condT );
18821      }
18822      put_ITSTATE(new_itstate); // restore
18823
18824      DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
18825      goto decode_success;
18826   }
18827
18828   case BITS5(1,0,0,0,0):   /* STRH */
18829   case BITS5(1,0,0,0,1): { /* LDRH */
18830      /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
18831      /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
18832      /* LDRH/STRH Rd, [Rn + imm5 * 2] */
18833      UInt    rD   = INSN0(2,0);
18834      UInt    rN   = INSN0(5,3);
18835      UInt    imm5 = INSN0(10,6);
18836      UInt    isLD = INSN0(11,11);
18837
18838      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
18839      put_ITSTATE(old_itstate); // backout
18840      if (isLD) {
18841         IRTemp tD = newTemp(Ity_I32);
18842         loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
18843         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18844      } else {
18845         storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
18846      }
18847      put_ITSTATE(new_itstate); // restore
18848
18849      DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
18850      goto decode_success;
18851   }
18852
18853   case BITS5(0,1,1,1,0):   /* STRB */
18854   case BITS5(0,1,1,1,1): { /* LDRB */
18855      /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
18856      /* ------------- STRB Rd, [Rn, #imm5] ------------- */
18857      /* LDRB/STRB Rd, [Rn + imm5] */
18858      UInt    rD   = INSN0(2,0);
18859      UInt    rN   = INSN0(5,3);
18860      UInt    imm5 = INSN0(10,6);
18861      UInt    isLD = INSN0(11,11);
18862
18863      IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
18864      put_ITSTATE(old_itstate); // backout
18865      if (isLD) {
18866         IRTemp tD = newTemp(Ity_I32);
18867         loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
18868         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18869      } else {
18870         storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
18871      }
18872      put_ITSTATE(new_itstate); // restore
18873
18874      DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
18875      goto decode_success;
18876   }
18877
18878   case BITS5(1,0,0,1,0):   /* STR */
18879   case BITS5(1,0,0,1,1): { /* LDR */
18880      /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
18881      /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
18882      /* LDR/STR Rd, [SP + imm8 * 4] */
18883      UInt rD    = INSN0(10,8);
18884      UInt imm8  = INSN0(7,0);
18885      UInt isLD  = INSN0(11,11);
18886
18887      IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
18888      put_ITSTATE(old_itstate); // backout
18889      if (isLD) {
18890         IRTemp tD = newTemp(Ity_I32);
18891         loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
18892         putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
18893      } else {
18894         storeGuardedLE(ea, getIRegT(rD), condT);
18895      }
18896      put_ITSTATE(new_itstate); // restore
18897
18898      DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
18899      goto decode_success;
18900   }
18901
18902   case BITS5(1,1,0,0,1): {
18903      /* ------------- LDMIA Rn!, {reglist} ------------- */
18904      Int i, nRegs = 0;
18905      UInt rN   = INSN0(10,8);
18906      UInt list = INSN0(7,0);
18907      /* Empty lists aren't allowed. */
18908      if (list != 0) {
18909         mk_skip_over_T16_if_cond_is_false(condT);
18910         condT = IRTemp_INVALID;
18911         put_ITSTATE(old_itstate);
18912         // now uncond
18913
18914         IRTemp oldRn = newTemp(Ity_I32);
18915         IRTemp base  = newTemp(Ity_I32);
18916         assign(oldRn, getIRegT(rN));
18917         assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
18918         for (i = 0; i < 8; i++) {
18919            if (0 == (list & (1 << i)))
18920               continue;
18921            nRegs++;
18922            putIRegT(
18923               i, loadLE(Ity_I32,
18924                         binop(Iop_Add32, mkexpr(base),
18925                                          mkU32(nRegs * 4 - 4))),
18926               IRTemp_INVALID
18927            );
18928         }
18929         /* Only do the writeback for rN if it isn't in the list of
18930            registers to be transferred. */
18931         if (0 == (list & (1 << rN))) {
18932            putIRegT(rN,
18933                     binop(Iop_Add32, mkexpr(oldRn),
18934                                      mkU32(nRegs * 4)),
18935                     IRTemp_INVALID
18936            );
18937         }
18938
18939         /* Reinstate the ITSTATE update. */
18940         put_ITSTATE(new_itstate);
18941
18942         DIP("ldmia r%u!, {0x%04x}\n", rN, list);
18943         goto decode_success;
18944      }
18945      break;
18946   }
18947
18948   case BITS5(1,1,0,0,0): {
18949      /* ------------- STMIA Rn!, {reglist} ------------- */
18950      Int i, nRegs = 0;
18951      UInt rN   = INSN0(10,8);
18952      UInt list = INSN0(7,0);
18953      /* Empty lists aren't allowed.  Also, if rN is in the list then
18954         it must be the lowest numbered register in the list. */
18955      Bool valid = list != 0;
18956      if (valid && 0 != (list & (1 << rN))) {
18957         for (i = 0; i < rN; i++) {
18958            if (0 != (list & (1 << i)))
18959               valid = False;
18960         }
18961      }
18962      if (valid) {
18963         mk_skip_over_T16_if_cond_is_false(condT);
18964         condT = IRTemp_INVALID;
18965         put_ITSTATE(old_itstate);
18966         // now uncond
18967
18968         IRTemp oldRn = newTemp(Ity_I32);
18969         IRTemp base = newTemp(Ity_I32);
18970         assign(oldRn, getIRegT(rN));
18971         assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
18972         for (i = 0; i < 8; i++) {
18973            if (0 == (list & (1 << i)))
18974               continue;
18975            nRegs++;
18976            storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
18977                     getIRegT(i) );
18978         }
18979         /* Always do the writeback. */
18980         putIRegT(rN,
18981                  binop(Iop_Add32, mkexpr(oldRn),
18982                                   mkU32(nRegs * 4)),
18983                  IRTemp_INVALID);
18984
18985         /* Reinstate the ITSTATE update. */
18986         put_ITSTATE(new_itstate);
18987
18988         DIP("stmia r%u!, {0x%04x}\n", rN, list);
18989         goto decode_success;
18990      }
18991      break;
18992   }
18993
18994   case BITS5(0,0,0,0,0):   /* LSLS */
18995   case BITS5(0,0,0,0,1):   /* LSRS */
18996   case BITS5(0,0,0,1,0): { /* ASRS */
18997      /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
18998      /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
18999      /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
19000      UInt   rD   = INSN0(2,0);
19001      UInt   rM   = INSN0(5,3);
19002      UInt   imm5 = INSN0(10,6);
19003      IRTemp res  = newTemp(Ity_I32);
19004      IRTemp resC = newTemp(Ity_I32);
19005      IRTemp rMt  = newTemp(Ity_I32);
19006      IRTemp oldV = newTemp(Ity_I32);
19007      const HChar* wot  = "???";
19008      assign(rMt, getIRegT(rM));
19009      assign(oldV, mk_armg_calculate_flag_v());
19010      /* Looks like INSN0(12,11) are the standard 'how' encoding.
19011         Could compactify if the ROR case later appears. */
19012      switch (INSN0(15,11)) {
19013         case BITS5(0,0,0,0,0):
19014            compute_result_and_C_after_LSL_by_imm5(
19015               dis_buf, &res, &resC, rMt, imm5, rM
19016            );
19017            wot = "lsl";
19018            break;
19019         case BITS5(0,0,0,0,1):
19020            compute_result_and_C_after_LSR_by_imm5(
19021               dis_buf, &res, &resC, rMt, imm5, rM
19022            );
19023            wot = "lsr";
19024            break;
19025         case BITS5(0,0,0,1,0):
19026            compute_result_and_C_after_ASR_by_imm5(
19027               dis_buf, &res, &resC, rMt, imm5, rM
19028            );
19029            wot = "asr";
19030            break;
19031         default:
19032            /*NOTREACHED*/vassert(0);
19033      }
19034      // not safe to read guest state after this point
19035      putIRegT(rD, mkexpr(res), condT);
19036      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
19037                         cond_AND_notInIT_T );
19038      /* ignore buf and roll our own output */
19039      DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
19040      goto decode_success;
19041   }
19042
19043   case BITS5(1,1,1,0,0): {
19044      /* ---------------- B #simm11 ---------------- */
19045      Int  simm11 = INSN0(10,0);
19046           simm11 = (simm11 << 21) >> 20;
19047      UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
19048      /* Only allowed outside or last-in IT block; SIGILL if not so. */
19049      gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19050      // and skip this insn if not selected; being cleverer is too
19051      // difficult
19052      mk_skip_over_T16_if_cond_is_false(condT);
19053      condT = IRTemp_INVALID;
19054      // now uncond
19055      llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
19056      dres.jk_StopHere = Ijk_Boring;
19057      dres.whatNext    = Dis_StopHere;
19058      DIP("b 0x%x\n", dst);
19059      goto decode_success;
19060   }
19061
19062   default:
19063      break; /* examine the next shortest prefix */
19064
19065   }
19066
19067
19068   /* ================ 16-bit 15:12 cases ================ */
19069
19070   switch (INSN0(15,12)) {
19071
19072   case BITS4(1,1,0,1): {
19073      /* ---------------- Bcond #simm8 ---------------- */
19074      UInt cond  = INSN0(11,8);
19075      Int  simm8 = INSN0(7,0);
19076           simm8 = (simm8 << 24) >> 23;
19077      UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
19078      if (cond != ARMCondAL && cond != ARMCondNV) {
19079         /* Not allowed in an IT block; SIGILL if so. */
19080         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19081
19082         IRTemp kondT = newTemp(Ity_I32);
19083         assign( kondT, mk_armg_calculate_condition(cond) );
19084         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
19085                            Ijk_Boring,
19086                            IRConst_U32(dst | 1/*CPSR.T*/),
19087                            OFFB_R15T ));
19088         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
19089                              | 1 /*CPSR.T*/ ));
19090         dres.jk_StopHere = Ijk_Boring;
19091         dres.whatNext    = Dis_StopHere;
19092         DIP("b%s 0x%x\n", nCC(cond), dst);
19093         goto decode_success;
19094      }
19095      break;
19096   }
19097
19098   default:
19099      break; /* hmm, nothing matched */
19100
19101   }
19102
19103   /* ================ 16-bit misc cases ================ */
19104
19105   switch (INSN0(15,0)) {
19106      case 0xBF00:
19107         /* ------ NOP ------ */
19108         DIP("nop\n");
19109         goto decode_success;
19110      case 0xBF20:
19111         /* ------ WFE ------ */
19112         /* WFE gets used as a spin-loop hint.  Do the usual thing,
19113            which is to continue after yielding. */
19114         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
19115                            Ijk_Yield,
19116                            IRConst_U32((guest_R15_curr_instr_notENC + 2)
19117                                        | 1 /*CPSR.T*/),
19118                            OFFB_R15T ));
19119         DIP("wfe\n");
19120         goto decode_success;
19121      case 0xBF40:
19122         /* ------ SEV ------ */
19123         /* Treat this as a no-op.  Any matching WFEs won't really
19124            cause the host CPU to snooze; they just cause V to try to
19125            run some other thread for a while.  So there's no point in
19126            really doing anything for SEV. */
19127         DIP("sev\n");
19128         goto decode_success;
19129      default:
19130         break; /* fall through */
19131   }
19132
19133   /* ----------------------------------------------------------- */
19134   /* --                                                       -- */
19135   /* -- Thumb 32-bit integer instructions                     -- */
19136   /* --                                                       -- */
19137   /* ----------------------------------------------------------- */
19138
19139#  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
19140
19141   /* second 16 bits of the instruction, if any */
19142   vassert(insn1 == 0);
19143   insn1 = getUShortLittleEndianly( guest_instr+2 );
19144
19145   anOp   = Iop_INVALID; /* paranoia */
19146   anOpNm = NULL;        /* paranoia */
19147
19148   /* Change result defaults to suit 32-bit insns. */
19149   vassert(dres.whatNext   == Dis_Continue);
19150   vassert(dres.len        == 2);
19151   vassert(dres.continueAt == 0);
19152   dres.len = 4;
19153
19154   /* ---------------- BL/BLX simm26 ---------------- */
19155   if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
19156      UInt isBL = INSN1(12,12);
19157      UInt bS   = INSN0(10,10);
19158      UInt bJ1  = INSN1(13,13);
19159      UInt bJ2  = INSN1(11,11);
19160      UInt bI1  = 1 ^ (bJ1 ^ bS);
19161      UInt bI2  = 1 ^ (bJ2 ^ bS);
19162      Int simm25
19163         =   (bS          << (1 + 1 + 10 + 11 + 1))
19164           | (bI1         << (1 + 10 + 11 + 1))
19165           | (bI2         << (10 + 11 + 1))
19166           | (INSN0(9,0)  << (11 + 1))
19167           | (INSN1(10,0) << 1);
19168      simm25 = (simm25 << 7) >> 7;
19169
19170      vassert(0 == (guest_R15_curr_instr_notENC & 1));
19171      UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
19172
19173      /* One further validity case to check: in the case of BLX
19174         (not-BL), that insn1[0] must be zero. */
19175      Bool valid = True;
19176      if (isBL == 0 && INSN1(0,0) == 1) valid = False;
19177      if (valid) {
19178         /* Only allowed outside or last-in IT block; SIGILL if not so. */
19179         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19180         // and skip this insn if not selected; being cleverer is too
19181         // difficult
19182         mk_skip_over_T32_if_cond_is_false(condT);
19183         condT = IRTemp_INVALID;
19184         // now uncond
19185
19186         /* We're returning to Thumb code, hence "| 1" */
19187         putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
19188                   IRTemp_INVALID);
19189         if (isBL) {
19190            /* BL: unconditional T -> T call */
19191            /* we're calling Thumb code, hence "| 1" */
19192            llPutIReg(15, mkU32( dst | 1 ));
19193            DIP("bl 0x%x (stay in Thumb mode)\n", dst);
19194         } else {
19195            /* BLX: unconditional T -> A call */
19196            /* we're calling ARM code, hence "& 3" to align to a
19197               valid ARM insn address */
19198            llPutIReg(15, mkU32( dst & ~3 ));
19199            DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
19200         }
19201         dres.whatNext    = Dis_StopHere;
19202         dres.jk_StopHere = Ijk_Call;
19203         goto decode_success;
19204      }
19205   }
19206
19207   /* ---------------- {LD,ST}M{IA,DB} ---------------- */
19208   if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
19209       || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
19210      UInt bW      = INSN0(5,5); /* writeback Rn ? */
19211      UInt bL      = INSN0(4,4);
19212      UInt rN      = INSN0(3,0);
19213      UInt bP      = INSN1(15,15); /* reglist entry for r15 */
19214      UInt bM      = INSN1(14,14); /* reglist entry for r14 */
19215      UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
19216      UInt rL13    = INSN1(13,13); /* must be zero */
19217      UInt regList = 0;
19218      Bool valid   = True;
19219
19220      UInt bINC    = 1;
19221      UInt bBEFORE = 0;
19222      if (INSN0(15,6) == 0x3a4) {
19223         bINC    = 0;
19224         bBEFORE = 1;
19225      }
19226
19227      /* detect statically invalid cases, and construct the final
19228         reglist */
19229      if (rL13 == 1)
19230         valid = False;
19231
19232      if (bL == 1) {
19233         regList = (bP << 15) | (bM << 14) | rLmost;
19234         if (rN == 15)                       valid = False;
19235         if (popcount32(regList) < 2)        valid = False;
19236         if (bP == 1 && bM == 1)             valid = False;
19237         if (bW == 1 && (regList & (1<<rN))) valid = False;
19238      } else {
19239         regList = (bM << 14) | rLmost;
19240         if (bP == 1)                        valid = False;
19241         if (rN == 15)                       valid = False;
19242         if (popcount32(regList) < 2)        valid = False;
19243         if (bW == 1 && (regList & (1<<rN))) valid = False;
19244      }
19245
19246      if (valid) {
19247         if (bL == 1 && bP == 1) {
19248            // We'll be writing the PC.  Hence:
19249            /* Only allowed outside or last-in IT block; SIGILL if not so. */
19250            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19251         }
19252
19253         /* Go uncond: */
19254         mk_skip_over_T32_if_cond_is_false(condT);
19255         condT = IRTemp_INVALID;
19256         // now uncond
19257
19258         /* Generate the IR.  This might generate a write to R15. */
19259         mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
19260
19261         if (bL == 1 && (regList & (1<<15))) {
19262            // If we wrote to R15, we have an interworking return to
19263            // deal with.
19264            llPutIReg(15, llGetIReg(15));
19265            dres.jk_StopHere = Ijk_Ret;
19266            dres.whatNext    = Dis_StopHere;
19267         }
19268
19269         DIP("%sm%c%c r%u%s, {0x%04x}\n",
19270              bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
19271              rN, bW ? "!" : "", regList);
19272
19273         goto decode_success;
19274      }
19275   }
19276
19277   /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
19278   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19279       && INSN0(9,5) == BITS5(0,1,0,0,0)
19280       && INSN1(15,15) == 0) {
19281      UInt bS = INSN0(4,4);
19282      UInt rN = INSN0(3,0);
19283      UInt rD = INSN1(11,8);
19284      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19285      /* but allow "add.w reg, sp, #constT" for reg != PC */
19286      if (!valid && rD <= 14 && rN == 13)
19287         valid = True;
19288      if (valid) {
19289         IRTemp argL  = newTemp(Ity_I32);
19290         IRTemp argR  = newTemp(Ity_I32);
19291         IRTemp res   = newTemp(Ity_I32);
19292         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19293         assign(argL, getIRegT(rN));
19294         assign(argR, mkU32(imm32));
19295         assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
19296         putIRegT(rD, mkexpr(res), condT);
19297         if (bS == 1)
19298            setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
19299         DIP("add%s.w r%u, r%u, #%u\n",
19300             bS == 1 ? "s" : "", rD, rN, imm32);
19301         goto decode_success;
19302      }
19303   }
19304
19305   /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
19306   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19307       && INSN0(9,4) == BITS6(1,0,0,0,0,0)
19308       && INSN1(15,15) == 0) {
19309      UInt rN = INSN0(3,0);
19310      UInt rD = INSN1(11,8);
19311      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19312      /* but allow "addw reg, sp, #uimm12" for reg != PC */
19313      if (!valid && rD <= 14 && rN == 13)
19314         valid = True;
19315      if (valid) {
19316         IRTemp argL = newTemp(Ity_I32);
19317         IRTemp argR = newTemp(Ity_I32);
19318         IRTemp res  = newTemp(Ity_I32);
19319         UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
19320         assign(argL, getIRegT(rN));
19321         assign(argR, mkU32(imm12));
19322         assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
19323         putIRegT(rD, mkexpr(res), condT);
19324         DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
19325         goto decode_success;
19326      }
19327   }
19328
19329   /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
19330   /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
19331   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19332       && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
19333           || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
19334       && INSN1(15,15) == 0
19335       && INSN1(11,8) == BITS4(1,1,1,1)) {
19336      UInt rN = INSN0(3,0);
19337      if (rN != 15) {
19338         IRTemp argL  = newTemp(Ity_I32);
19339         IRTemp argR  = newTemp(Ity_I32);
19340         Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
19341         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19342         assign(argL, getIRegT(rN));
19343         assign(argR, mkU32(imm32));
19344         setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
19345                         argL, argR, condT );
19346         DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
19347         goto decode_success;
19348      }
19349   }
19350
19351   /* -------------- (T1) TST.W Rn, #constT -------------- */
19352   /* -------------- (T1) TEQ.W Rn, #constT -------------- */
19353   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19354       && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
19355           || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
19356       && INSN1(15,15) == 0
19357       && INSN1(11,8) == BITS4(1,1,1,1)) {
19358      UInt rN = INSN0(3,0);
19359      if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
19360         Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
19361         IRTemp argL  = newTemp(Ity_I32);
19362         IRTemp argR  = newTemp(Ity_I32);
19363         IRTemp res   = newTemp(Ity_I32);
19364         IRTemp oldV  = newTemp(Ity_I32);
19365         IRTemp oldC  = newTemp(Ity_I32);
19366         Bool   updC  = False;
19367         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
19368         assign(argL, getIRegT(rN));
19369         assign(argR, mkU32(imm32));
19370         assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
19371                            mkexpr(argL), mkexpr(argR)));
19372         assign( oldV, mk_armg_calculate_flag_v() );
19373         assign( oldC, updC
19374                       ? mkU32((imm32 >> 31) & 1)
19375                       : mk_armg_calculate_flag_c() );
19376         setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
19377         DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
19378         goto decode_success;
19379      }
19380   }
19381
19382   /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
19383   /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
19384   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19385       && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
19386           || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
19387       && INSN1(15,15) == 0) {
19388      Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
19389      UInt bS    = INSN0(4,4);
19390      UInt rN    = INSN0(3,0);
19391      UInt rD    = INSN1(11,8);
19392      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19393      /* but allow "sub{s}.w reg, sp, #constT
19394         this is (T2) of "SUB (SP minus immediate)" */
19395      if (!valid && !isRSB && rN == 13 && rD != 15)
19396         valid = True;
19397      if (valid) {
19398         IRTemp argL  = newTemp(Ity_I32);
19399         IRTemp argR  = newTemp(Ity_I32);
19400         IRTemp res   = newTemp(Ity_I32);
19401         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19402         assign(argL, getIRegT(rN));
19403         assign(argR, mkU32(imm32));
19404         assign(res,  isRSB
19405                      ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
19406                      : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
19407         putIRegT(rD, mkexpr(res), condT);
19408         if (bS == 1) {
19409            if (isRSB)
19410               setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
19411            else
19412               setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
19413         }
19414         DIP("%s%s.w r%u, r%u, #%u\n",
19415             isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
19416         goto decode_success;
19417      }
19418   }
19419
19420   /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
19421   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19422       && INSN0(9,4) == BITS6(1,0,1,0,1,0)
19423       && INSN1(15,15) == 0) {
19424      UInt rN = INSN0(3,0);
19425      UInt rD = INSN1(11,8);
19426      Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
19427      /* but allow "subw sp, sp, #uimm12" */
19428      if (!valid && rD == 13 && rN == 13)
19429         valid = True;
19430      if (valid) {
19431         IRTemp argL  = newTemp(Ity_I32);
19432         IRTemp argR  = newTemp(Ity_I32);
19433         IRTemp res   = newTemp(Ity_I32);
19434         UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
19435         assign(argL, getIRegT(rN));
19436         assign(argR, mkU32(imm12));
19437         assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
19438         putIRegT(rD, mkexpr(res), condT);
19439         DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
19440         goto decode_success;
19441      }
19442   }
19443
19444   /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
19445   /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
19446   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19447       && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
19448           || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
19449       && INSN1(15,15) == 0) {
19450      /* ADC:  Rd = Rn + constT + oldC */
19451      /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
19452      UInt bS    = INSN0(4,4);
19453      UInt rN    = INSN0(3,0);
19454      UInt rD    = INSN1(11,8);
19455      if (!isBadRegT(rN) && !isBadRegT(rD)) {
19456         IRTemp argL  = newTemp(Ity_I32);
19457         IRTemp argR  = newTemp(Ity_I32);
19458         IRTemp res   = newTemp(Ity_I32);
19459         IRTemp oldC  = newTemp(Ity_I32);
19460         UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
19461         assign(argL, getIRegT(rN));
19462         assign(argR, mkU32(imm32));
19463         assign(oldC, mk_armg_calculate_flag_c() );
19464         const HChar* nm  = "???";
19465         switch (INSN0(9,5)) {
19466            case BITS5(0,1,0,1,0): // ADC
19467               nm = "adc";
19468               assign(res,
19469                      binop(Iop_Add32,
19470                            binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
19471                            mkexpr(oldC) ));
19472               putIRegT(rD, mkexpr(res), condT);
19473               if (bS)
19474                  setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
19475                                     argL, argR, oldC, condT );
19476               break;
19477            case BITS5(0,1,0,1,1): // SBC
19478               nm = "sbc";
19479               assign(res,
19480                      binop(Iop_Sub32,
19481                            binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
19482                            binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
19483               putIRegT(rD, mkexpr(res), condT);
19484               if (bS)
19485                  setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
19486                                     argL, argR, oldC, condT );
19487               break;
19488            default:
19489              vassert(0);
19490         }
19491         DIP("%s%s.w r%u, r%u, #%u\n",
19492             nm, bS == 1 ? "s" : "", rD, rN, imm32);
19493         goto decode_success;
19494      }
19495   }
19496
19497   /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
19498   /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
19499   /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
19500   /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
19501   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19502       && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
19503           || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
19504           || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
19505           || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
19506           || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
19507       && INSN1(15,15) == 0) {
19508      UInt bS = INSN0(4,4);
19509      UInt rN = INSN0(3,0);
19510      UInt rD = INSN1(11,8);
19511      if (!isBadRegT(rN) && !isBadRegT(rD)) {
19512         Bool   notArgR = False;
19513         IROp   op      = Iop_INVALID;
19514         const HChar* nm = "???";
19515         switch (INSN0(9,5)) {
19516            case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
19517            case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
19518            case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
19519                                   notArgR = True; break;
19520            case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
19521            case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
19522                                   notArgR = True; break;
19523            default: vassert(0);
19524         }
19525         IRTemp argL  = newTemp(Ity_I32);
19526         IRTemp argR  = newTemp(Ity_I32);
19527         IRTemp res   = newTemp(Ity_I32);
19528         Bool   updC  = False;
19529         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
19530         assign(argL, getIRegT(rN));
19531         assign(argR, mkU32(notArgR ? ~imm32 : imm32));
19532         assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
19533         putIRegT(rD, mkexpr(res), condT);
19534         if (bS) {
19535            IRTemp oldV = newTemp(Ity_I32);
19536            IRTemp oldC = newTemp(Ity_I32);
19537            assign( oldV, mk_armg_calculate_flag_v() );
19538            assign( oldC, updC
19539                          ? mkU32((imm32 >> 31) & 1)
19540                          : mk_armg_calculate_flag_c() );
19541            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19542                               condT );
19543         }
19544         DIP("%s%s.w r%u, r%u, #%u\n",
19545             nm, bS == 1 ? "s" : "", rD, rN, imm32);
19546         goto decode_success;
19547      }
19548   }
19549
19550   /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
19551   /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
19552   /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
19553   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19554       && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
19555           || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
19556           || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
19557       && INSN1(15,15) == 0) {
19558      UInt rN   = INSN0(3,0);
19559      UInt rD   = INSN1(11,8);
19560      UInt rM   = INSN1(3,0);
19561      UInt bS   = INSN0(4,4);
19562      UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19563      UInt how  = INSN1(5,4);
19564
19565      Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
19566      /* but allow "add.w reg, sp, reg, lsl #N for N=0,1,2 or 3
19567         (T3) "ADD (SP plus register) */
19568      if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
19569          && rD != 15 && rN == 13 && imm5 <= 3 && how == 0) {
19570         valid = True;
19571      }
19572      /* also allow "sub.w reg, sp, reg   w/ no shift
19573         (T1) "SUB (SP minus register) */
19574      if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
19575          && rD != 15 && rN == 13 && imm5 == 0 && how == 0) {
19576         valid = True;
19577      }
19578      if (valid) {
19579         Bool   swap = False;
19580         IROp   op   = Iop_INVALID;
19581         const HChar* nm = "???";
19582         switch (INSN0(8,5)) {
19583            case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
19584            case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
19585            case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
19586                                 swap = True; break;
19587            default: vassert(0);
19588         }
19589
19590         IRTemp argL = newTemp(Ity_I32);
19591         assign(argL, getIRegT(rN));
19592
19593         IRTemp rMt = newTemp(Ity_I32);
19594         assign(rMt, getIRegT(rM));
19595
19596         IRTemp argR = newTemp(Ity_I32);
19597         compute_result_and_C_after_shift_by_imm5(
19598            dis_buf, &argR, NULL, rMt, how, imm5, rM
19599         );
19600
19601         IRTemp res = newTemp(Ity_I32);
19602         assign(res, swap
19603                     ? binop(op, mkexpr(argR), mkexpr(argL))
19604                     : binop(op, mkexpr(argL), mkexpr(argR)));
19605
19606         putIRegT(rD, mkexpr(res), condT);
19607         if (bS) {
19608            switch (op) {
19609               case Iop_Add32:
19610                  setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
19611                  break;
19612               case Iop_Sub32:
19613                  if (swap)
19614                     setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
19615                  else
19616                     setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
19617                  break;
19618               default:
19619                  vassert(0);
19620            }
19621         }
19622
19623         DIP("%s%s.w r%u, r%u, %s\n",
19624             nm, bS ? "s" : "", rD, rN, dis_buf);
19625         goto decode_success;
19626      }
19627   }
19628
19629   /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
19630   /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
19631   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19632       && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
19633           || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
19634       && INSN1(15,15) == 0) {
19635      /* ADC:  Rd = Rn + shifter_operand + oldC */
19636      /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
19637      UInt rN = INSN0(3,0);
19638      UInt rD = INSN1(11,8);
19639      UInt rM = INSN1(3,0);
19640      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
19641         UInt bS   = INSN0(4,4);
19642         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19643         UInt how  = INSN1(5,4);
19644
19645         IRTemp argL = newTemp(Ity_I32);
19646         assign(argL, getIRegT(rN));
19647
19648         IRTemp rMt = newTemp(Ity_I32);
19649         assign(rMt, getIRegT(rM));
19650
19651         IRTemp oldC = newTemp(Ity_I32);
19652         assign(oldC, mk_armg_calculate_flag_c());
19653
19654         IRTemp argR = newTemp(Ity_I32);
19655         compute_result_and_C_after_shift_by_imm5(
19656            dis_buf, &argR, NULL, rMt, how, imm5, rM
19657         );
19658
19659         const HChar* nm  = "???";
19660         IRTemp res = newTemp(Ity_I32);
19661         switch (INSN0(8,5)) {
19662            case BITS4(1,0,1,0): // ADC
19663               nm = "adc";
19664               assign(res,
19665                      binop(Iop_Add32,
19666                            binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
19667                            mkexpr(oldC) ));
19668               putIRegT(rD, mkexpr(res), condT);
19669               if (bS)
19670                  setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
19671                                     argL, argR, oldC, condT );
19672               break;
19673            case BITS4(1,0,1,1): // SBC
19674               nm = "sbc";
19675               assign(res,
19676                      binop(Iop_Sub32,
19677                            binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
19678                            binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
19679               putIRegT(rD, mkexpr(res), condT);
19680               if (bS)
19681                  setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
19682                                     argL, argR, oldC, condT );
19683               break;
19684            default:
19685               vassert(0);
19686         }
19687
19688         DIP("%s%s.w r%u, r%u, %s\n",
19689             nm, bS ? "s" : "", rD, rN, dis_buf);
19690         goto decode_success;
19691      }
19692   }
19693
19694   /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
19695   /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
19696   /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
19697   /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
19698   /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
19699   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19700       && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
19701           || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
19702           || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
19703           || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
19704           || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
19705       && INSN1(15,15) == 0) {
19706      UInt rN = INSN0(3,0);
19707      UInt rD = INSN1(11,8);
19708      UInt rM = INSN1(3,0);
19709      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
19710         Bool notArgR = False;
19711         IROp op      = Iop_INVALID;
19712         const HChar* nm  = "???";
19713         switch (INSN0(8,5)) {
19714            case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
19715            case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
19716            case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
19717            case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
19718                                 notArgR = True; break;
19719            case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
19720                                 notArgR = True; break;
19721            default: vassert(0);
19722         }
19723         UInt bS   = INSN0(4,4);
19724         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19725         UInt how  = INSN1(5,4);
19726
19727         IRTemp rNt = newTemp(Ity_I32);
19728         assign(rNt, getIRegT(rN));
19729
19730         IRTemp rMt = newTemp(Ity_I32);
19731         assign(rMt, getIRegT(rM));
19732
19733         IRTemp argR = newTemp(Ity_I32);
19734         IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19735
19736         compute_result_and_C_after_shift_by_imm5(
19737            dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
19738         );
19739
19740         IRTemp res = newTemp(Ity_I32);
19741         if (notArgR) {
19742            vassert(op == Iop_And32 || op == Iop_Or32);
19743            assign(res, binop(op, mkexpr(rNt),
19744                                  unop(Iop_Not32, mkexpr(argR))));
19745         } else {
19746            assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
19747         }
19748
19749         putIRegT(rD, mkexpr(res), condT);
19750         if (bS) {
19751            IRTemp oldV = newTemp(Ity_I32);
19752            assign( oldV, mk_armg_calculate_flag_v() );
19753            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19754                               condT );
19755         }
19756
19757         DIP("%s%s.w r%u, r%u, %s\n",
19758             nm, bS ? "s" : "", rD, rN, dis_buf);
19759         goto decode_success;
19760      }
19761   }
19762
19763   /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
19764   /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
19765   /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
19766   /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
19767   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
19768       && INSN1(15,12) == BITS4(1,1,1,1)
19769       && INSN1(7,4) == BITS4(0,0,0,0)) {
19770      UInt how = INSN0(6,5); // standard encoding
19771      UInt rN  = INSN0(3,0);
19772      UInt rD  = INSN1(11,8);
19773      UInt rM  = INSN1(3,0);
19774      UInt bS  = INSN0(4,4);
19775      Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
19776      if (valid) {
19777         IRTemp rNt    = newTemp(Ity_I32);
19778         IRTemp rMt    = newTemp(Ity_I32);
19779         IRTemp res    = newTemp(Ity_I32);
19780         IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19781         IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19782         const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
19783         const HChar* nm     = nms[how];
19784         assign(rNt, getIRegT(rN));
19785         assign(rMt, getIRegT(rM));
19786         compute_result_and_C_after_shift_by_reg(
19787            dis_buf, &res, bS ? &oldC : NULL,
19788            rNt, how, rMt, rN, rM
19789         );
19790         if (bS)
19791            assign(oldV, mk_armg_calculate_flag_v());
19792         putIRegT(rD, mkexpr(res), condT);
19793         if (bS) {
19794            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19795                               condT );
19796         }
19797         DIP("%s%s.w r%u, r%u, r%u\n",
19798             nm, bS ? "s" : "", rD, rN, rM);
19799         goto decode_success;
19800      }
19801   }
19802
19803   /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
19804   /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
19805   if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
19806       && INSN1(15,15) == 0) {
19807      UInt rD = INSN1(11,8);
19808      UInt rN = INSN1(3,0);
19809      if (!isBadRegT(rD) && !isBadRegT(rN)) {
19810         UInt bS    = INSN0(4,4);
19811         UInt isMVN = INSN0(5,5);
19812         UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
19813         UInt how   = INSN1(5,4);
19814
19815         IRTemp rNt = newTemp(Ity_I32);
19816         assign(rNt, getIRegT(rN));
19817
19818         IRTemp oldRn = newTemp(Ity_I32);
19819         IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
19820         compute_result_and_C_after_shift_by_imm5(
19821            dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
19822         );
19823
19824         IRTemp res = newTemp(Ity_I32);
19825         assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
19826                           : mkexpr(oldRn));
19827
19828         putIRegT(rD, mkexpr(res), condT);
19829         if (bS) {
19830            IRTemp oldV = newTemp(Ity_I32);
19831            assign( oldV, mk_armg_calculate_flag_v() );
19832            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
19833         }
19834         DIP("%s%s.w r%u, %s\n",
19835             isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
19836         goto decode_success;
19837      }
19838   }
19839
19840   /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
19841   /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
19842   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19843       && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
19844           || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
19845       && INSN1(15,15) == 0
19846       && INSN1(11,8) == BITS4(1,1,1,1)) {
19847      UInt rN = INSN0(3,0);
19848      UInt rM = INSN1(3,0);
19849      if (!isBadRegT(rN) && !isBadRegT(rM)) {
19850         Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
19851
19852         UInt how  = INSN1(5,4);
19853         UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
19854
19855         IRTemp argL = newTemp(Ity_I32);
19856         assign(argL, getIRegT(rN));
19857
19858         IRTemp rMt = newTemp(Ity_I32);
19859         assign(rMt, getIRegT(rM));
19860
19861         IRTemp argR = newTemp(Ity_I32);
19862         IRTemp oldC = newTemp(Ity_I32);
19863         compute_result_and_C_after_shift_by_imm5(
19864            dis_buf, &argR, &oldC, rMt, how, imm5, rM
19865         );
19866
19867         IRTemp oldV = newTemp(Ity_I32);
19868         assign( oldV, mk_armg_calculate_flag_v() );
19869
19870         IRTemp res = newTemp(Ity_I32);
19871         assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
19872                           mkexpr(argL), mkexpr(argR)));
19873
19874         setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19875                            condT );
19876         DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
19877         goto decode_success;
19878      }
19879   }
19880
19881   /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
19882   /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
19883   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
19884       && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
19885           || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
19886       && INSN1(15,15) == 0
19887       && INSN1(11,8) == BITS4(1,1,1,1)) {
19888      UInt rN = INSN0(3,0);
19889      UInt rM = INSN1(3,0);
19890      if (!isBadRegT(rN) && !isBadRegT(rM)) {
19891         Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
19892         UInt how   = INSN1(5,4);
19893         UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
19894
19895         IRTemp argL = newTemp(Ity_I32);
19896         assign(argL, getIRegT(rN));
19897
19898         IRTemp rMt = newTemp(Ity_I32);
19899         assign(rMt, getIRegT(rM));
19900
19901         IRTemp argR = newTemp(Ity_I32);
19902         compute_result_and_C_after_shift_by_imm5(
19903            dis_buf, &argR, NULL, rMt, how, imm5, rM
19904         );
19905
19906         setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
19907                         argL, argR, condT );
19908
19909         DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
19910         goto decode_success;
19911      }
19912   }
19913
19914   /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
19915   /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
19916   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19917       && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
19918           || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
19919       && INSN0(3,0) == BITS4(1,1,1,1)
19920       && INSN1(15,15) == 0) {
19921      UInt rD = INSN1(11,8);
19922      if (!isBadRegT(rD)) {
19923         Bool   updC  = False;
19924         UInt   bS    = INSN0(4,4);
19925         Bool   isMVN = INSN0(5,5) == 1;
19926         UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
19927         IRTemp res   = newTemp(Ity_I32);
19928         assign(res, mkU32(isMVN ? ~imm32 : imm32));
19929         putIRegT(rD, mkexpr(res), condT);
19930         if (bS) {
19931            IRTemp oldV = newTemp(Ity_I32);
19932            IRTemp oldC = newTemp(Ity_I32);
19933            assign( oldV, mk_armg_calculate_flag_v() );
19934            assign( oldC, updC
19935                          ? mkU32((imm32 >> 31) & 1)
19936                          : mk_armg_calculate_flag_c() );
19937            setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19938                               condT );
19939         }
19940         DIP("%s%s.w r%u, #%u\n",
19941             isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
19942         goto decode_success;
19943      }
19944   }
19945
19946   /* -------------- (T3) MOVW Rd, #imm16 -------------- */
19947   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19948       && INSN0(9,4) == BITS6(1,0,0,1,0,0)
19949       && INSN1(15,15) == 0) {
19950      UInt rD = INSN1(11,8);
19951      if (!isBadRegT(rD)) {
19952         UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
19953                      | (INSN1(14,12) << 8) | INSN1(7,0);
19954         putIRegT(rD, mkU32(imm16), condT);
19955         DIP("movw r%u, #%u\n", rD, imm16);
19956         goto decode_success;
19957      }
19958   }
19959
19960   /* ---------------- MOVT Rd, #imm16 ---------------- */
19961   if (INSN0(15,11) == BITS5(1,1,1,1,0)
19962       && INSN0(9,4) == BITS6(1,0,1,1,0,0)
19963       && INSN1(15,15) == 0) {
19964      UInt rD = INSN1(11,8);
19965      if (!isBadRegT(rD)) {
19966         UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
19967                      | (INSN1(14,12) << 8) | INSN1(7,0);
19968         IRTemp res = newTemp(Ity_I32);
19969         assign(res,
19970                binop(Iop_Or32,
19971                      binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
19972                      mkU32(imm16 << 16)));
19973         putIRegT(rD, mkexpr(res), condT);
19974         DIP("movt r%u, #%u\n", rD, imm16);
19975         goto decode_success;
19976      }
19977   }
19978
19979   /* ---------------- LD/ST reg+/-#imm8 ---------------- */
19980   /* Loads and stores of the form:
19981         op  Rt, [Rn, #-imm8]      or
19982         op  Rt, [Rn], #+/-imm8    or
19983         op  Rt, [Rn, #+/-imm8]!
19984      where op is one of
19985         ldrb ldrh ldr  ldrsb ldrsh
19986         strb strh str
19987   */
19988   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
19989      Bool   valid  = True;
19990      Bool   syned  = False;
19991      Bool   isST   = False;
19992      IRType ty     = Ity_I8;
19993      const HChar* nm = "???";
19994
19995      switch (INSN0(8,4)) {
19996         case BITS5(0,0,0,0,0):   // strb
19997            nm = "strb"; isST = True; break;
19998         case BITS5(0,0,0,0,1):   // ldrb
19999            nm = "ldrb"; break;
20000         case BITS5(1,0,0,0,1):   // ldrsb
20001            nm = "ldrsb"; syned = True; break;
20002         case BITS5(0,0,0,1,0):   // strh
20003            nm = "strh"; ty = Ity_I16; isST = True; break;
20004         case BITS5(0,0,0,1,1):   // ldrh
20005            nm = "ldrh"; ty = Ity_I16; break;
20006         case BITS5(1,0,0,1,1):   // ldrsh
20007            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
20008         case BITS5(0,0,1,0,0):   // str
20009            nm = "str"; ty = Ity_I32; isST = True; break;
20010         case BITS5(0,0,1,0,1):
20011            nm = "ldr"; ty = Ity_I32; break;  // ldr
20012         default:
20013            valid = False; break;
20014      }
20015
20016      UInt rN      = INSN0(3,0);
20017      UInt rT      = INSN1(15,12);
20018      UInt bP      = INSN1(10,10);
20019      UInt bU      = INSN1(9,9);
20020      UInt bW      = INSN1(8,8);
20021      UInt imm8    = INSN1(7,0);
20022      Bool loadsPC = False;
20023
20024      if (valid) {
20025         if (bP == 1 && bU == 1 && bW == 0)
20026            valid = False;
20027         if (bP == 0 && bW == 0)
20028            valid = False;
20029         if (rN == 15)
20030            valid = False;
20031         if (bW == 1 && rN == rT)
20032            valid = False;
20033         if (ty == Ity_I8 || ty == Ity_I16) {
20034            if (isBadRegT(rT))
20035               valid = False;
20036         } else {
20037            /* ty == Ity_I32 */
20038            if (isST && rT == 15)
20039               valid = False;
20040            if (!isST && rT == 15)
20041               loadsPC = True;
20042         }
20043      }
20044
20045      if (valid) {
20046         // if it's a branch, it can't happen in the middle of an IT block
20047         // Also, if it is a branch, make it unconditional at this point.
20048         // Doing conditional branches in-line is too complex (for now)
20049         if (loadsPC) {
20050            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20051            // go uncond
20052            mk_skip_over_T32_if_cond_is_false(condT);
20053            condT = IRTemp_INVALID;
20054            // now uncond
20055         }
20056
20057         IRTemp preAddr = newTemp(Ity_I32);
20058         assign(preAddr, getIRegT(rN));
20059
20060         IRTemp postAddr = newTemp(Ity_I32);
20061         assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
20062                                mkexpr(preAddr), mkU32(imm8)));
20063
20064         IRTemp transAddr = bP == 1 ? postAddr : preAddr;
20065
20066         if (isST) {
20067
20068            /* Store.  If necessary, update the base register before
20069               the store itself, so that the common idiom of "str rX,
20070               [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
20071               a.k.a "push rX") doesn't cause Memcheck to complain
20072               that the access is below the stack pointer.  Also, not
20073               updating sp before the store confuses Valgrind's
20074               dynamic stack-extending logic.  So do it before the
20075               store.  Hence we need to snarf the store data before
20076               doing the basereg update. */
20077
20078            /* get hold of the data to be stored */
20079            IRTemp oldRt = newTemp(Ity_I32);
20080            assign(oldRt, getIRegT(rT));
20081
20082            /* Update Rn if necessary. */
20083            if (bW == 1) {
20084               vassert(rN != rT); // assured by validity check above
20085               putIRegT(rN, mkexpr(postAddr), condT);
20086            }
20087
20088            /* generate the transfer */
20089            IRExpr* data = NULL;
20090            switch (ty) {
20091               case Ity_I8:
20092                  data = unop(Iop_32to8, mkexpr(oldRt));
20093                  break;
20094               case Ity_I16:
20095                  data = unop(Iop_32to16, mkexpr(oldRt));
20096                  break;
20097               case Ity_I32:
20098                  data = mkexpr(oldRt);
20099                  break;
20100               default:
20101                  vassert(0);
20102            }
20103            storeGuardedLE(mkexpr(transAddr), data, condT);
20104
20105         } else {
20106
20107            /* Load. */
20108            IRTemp llOldRt = newTemp(Ity_I32);
20109            assign(llOldRt, llGetIReg(rT));
20110
20111            /* generate the transfer */
20112            IRTemp    newRt = newTemp(Ity_I32);
20113            IRLoadGOp widen = ILGop_INVALID;
20114            switch (ty) {
20115               case Ity_I8:
20116                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
20117               case Ity_I16:
20118                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
20119               case Ity_I32:
20120                  widen = ILGop_Ident32; break;
20121               default:
20122                  vassert(0);
20123            }
20124            loadGuardedLE(newRt, widen,
20125                          mkexpr(transAddr), mkexpr(llOldRt), condT);
20126            if (rT == 15) {
20127               vassert(loadsPC);
20128               /* We'll do the write to the PC just below */
20129            } else {
20130               vassert(!loadsPC);
20131               /* IRTemp_INVALID is OK here because in the case where
20132                  condT is false at run time, we're just putting the
20133                  old rT value back. */
20134               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
20135            }
20136
20137            /* Update Rn if necessary. */
20138            if (bW == 1) {
20139               vassert(rN != rT); // assured by validity check above
20140               putIRegT(rN, mkexpr(postAddr), condT);
20141            }
20142
20143            if (loadsPC) {
20144               /* Presumably this is an interworking branch. */
20145               vassert(rN != 15); // assured by validity check above
20146               vassert(rT == 15);
20147               vassert(condT == IRTemp_INVALID); /* due to check above */
20148               llPutIReg(15, mkexpr(newRt));
20149               dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
20150               dres.whatNext    = Dis_StopHere;
20151            }
20152         }
20153
20154         if (bP == 1 && bW == 0) {
20155            DIP("%s.w r%u, [r%u, #%c%u]\n",
20156                nm, rT, rN, bU ? '+' : '-', imm8);
20157         }
20158         else if (bP == 1 && bW == 1) {
20159            DIP("%s.w r%u, [r%u, #%c%u]!\n",
20160                nm, rT, rN, bU ? '+' : '-', imm8);
20161         }
20162         else {
20163            vassert(bP == 0 && bW == 1);
20164            DIP("%s.w r%u, [r%u], #%c%u\n",
20165                nm, rT, rN, bU ? '+' : '-', imm8);
20166         }
20167
20168         goto decode_success;
20169      }
20170   }
20171
20172   /* ------------- LD/ST reg+(reg<<imm2) ------------- */
20173   /* Loads and stores of the form:
20174         op  Rt, [Rn, Rm, LSL #imm8]
20175      where op is one of
20176         ldrb ldrh ldr  ldrsb ldrsh
20177         strb strh str
20178   */
20179   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
20180       && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
20181      Bool   valid  = True;
20182      Bool   syned  = False;
20183      Bool   isST   = False;
20184      IRType ty     = Ity_I8;
20185      const HChar* nm = "???";
20186
20187      switch (INSN0(8,4)) {
20188         case BITS5(0,0,0,0,0):   // strb
20189            nm = "strb"; isST = True; break;
20190         case BITS5(0,0,0,0,1):   // ldrb
20191            nm = "ldrb"; break;
20192         case BITS5(1,0,0,0,1):   // ldrsb
20193            nm = "ldrsb"; syned = True; break;
20194         case BITS5(0,0,0,1,0):   // strh
20195            nm = "strh"; ty = Ity_I16; isST = True; break;
20196         case BITS5(0,0,0,1,1):   // ldrh
20197            nm = "ldrh"; ty = Ity_I16; break;
20198         case BITS5(1,0,0,1,1):   // ldrsh
20199            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
20200         case BITS5(0,0,1,0,0):   // str
20201            nm = "str"; ty = Ity_I32; isST = True; break;
20202         case BITS5(0,0,1,0,1):
20203            nm = "ldr"; ty = Ity_I32; break;  // ldr
20204         default:
20205            valid = False; break;
20206      }
20207
20208      UInt rN      = INSN0(3,0);
20209      UInt rM      = INSN1(3,0);
20210      UInt rT      = INSN1(15,12);
20211      UInt imm2    = INSN1(5,4);
20212      Bool loadsPC = False;
20213
20214      if (ty == Ity_I8 || ty == Ity_I16) {
20215         /* all 8- and 16-bit load and store cases have the
20216            same exclusion set. */
20217         if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
20218            valid = False;
20219      } else {
20220         vassert(ty == Ity_I32);
20221         if (rN == 15 || isBadRegT(rM))
20222            valid = False;
20223         if (isST && rT == 15)
20224            valid = False;
20225         /* If it is a load and rT is 15, that's only allowable if we
20226            not in an IT block, or are the last in it.  Need to insert
20227            a dynamic check for that. */
20228         if (!isST && rT == 15)
20229            loadsPC = True;
20230      }
20231
20232      if (valid) {
20233         // if it's a branch, it can't happen in the middle of an IT block
20234         // Also, if it is a branch, make it unconditional at this point.
20235         // Doing conditional branches in-line is too complex (for now)
20236         if (loadsPC) {
20237            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20238            // go uncond
20239            mk_skip_over_T32_if_cond_is_false(condT);
20240            condT = IRTemp_INVALID;
20241            // now uncond
20242         }
20243
20244         IRTemp transAddr = newTemp(Ity_I32);
20245         assign(transAddr,
20246                binop( Iop_Add32,
20247                       getIRegT(rN),
20248                       binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
20249
20250         if (isST) {
20251
20252            /* get hold of the data to be stored */
20253            IRTemp oldRt = newTemp(Ity_I32);
20254            assign(oldRt, getIRegT(rT));
20255
20256            /* generate the transfer */
20257            IRExpr* data = NULL;
20258            switch (ty) {
20259               case Ity_I8:
20260                  data = unop(Iop_32to8, mkexpr(oldRt));
20261                  break;
20262               case Ity_I16:
20263                  data = unop(Iop_32to16, mkexpr(oldRt));
20264                  break;
20265              case Ity_I32:
20266                  data = mkexpr(oldRt);
20267                  break;
20268              default:
20269                 vassert(0);
20270            }
20271            storeGuardedLE(mkexpr(transAddr), data, condT);
20272
20273         } else {
20274
20275            /* Load. */
20276            IRTemp llOldRt = newTemp(Ity_I32);
20277            assign(llOldRt, llGetIReg(rT));
20278
20279            /* generate the transfer */
20280            IRTemp    newRt = newTemp(Ity_I32);
20281            IRLoadGOp widen = ILGop_INVALID;
20282            switch (ty) {
20283               case Ity_I8:
20284                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
20285               case Ity_I16:
20286                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
20287               case Ity_I32:
20288                  widen = ILGop_Ident32; break;
20289               default:
20290                  vassert(0);
20291            }
20292            loadGuardedLE(newRt, widen,
20293                          mkexpr(transAddr), mkexpr(llOldRt), condT);
20294
20295            if (rT == 15) {
20296               vassert(loadsPC);
20297               /* We'll do the write to the PC just below */
20298            } else {
20299               vassert(!loadsPC);
20300               /* IRTemp_INVALID is OK here because in the case where
20301                  condT is false at run time, we're just putting the
20302                  old rT value back. */
20303               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
20304            }
20305
20306            if (loadsPC) {
20307               /* Presumably this is an interworking branch. */
20308               vassert(rN != 15); // assured by validity check above
20309               vassert(rT == 15);
20310               vassert(condT == IRTemp_INVALID); /* due to check above */
20311               llPutIReg(15, mkexpr(newRt));
20312               dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
20313               dres.whatNext    = Dis_StopHere;
20314            }
20315         }
20316
20317         DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
20318             nm, rT, rN, rM, imm2);
20319
20320         goto decode_success;
20321      }
20322   }
20323
20324   /* --------------- LD/ST reg+imm12 --------------- */
20325   /* Loads and stores of the form:
20326         op  Rt, [Rn, +#imm12]
20327      where op is one of
20328         ldrb ldrh ldr  ldrsb ldrsh
20329         strb strh str
20330   */
20331   if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
20332      Bool   valid  = True;
20333      Bool   syned  = False;
20334      Bool   isST   = False;
20335      IRType ty     = Ity_I8;
20336      const HChar* nm = "???";
20337
20338      switch (INSN0(8,4)) {
20339         case BITS5(0,1,0,0,0):   // strb
20340            nm = "strb"; isST = True; break;
20341         case BITS5(0,1,0,0,1):   // ldrb
20342            nm = "ldrb"; break;
20343         case BITS5(1,1,0,0,1):   // ldrsb
20344            nm = "ldrsb"; syned = True; break;
20345         case BITS5(0,1,0,1,0):   // strh
20346            nm = "strh"; ty = Ity_I16; isST = True; break;
20347         case BITS5(0,1,0,1,1):   // ldrh
20348            nm = "ldrh"; ty = Ity_I16; break;
20349         case BITS5(1,1,0,1,1):   // ldrsh
20350            nm = "ldrsh"; ty = Ity_I16; syned = True; break;
20351         case BITS5(0,1,1,0,0):   // str
20352            nm = "str"; ty = Ity_I32; isST = True; break;
20353         case BITS5(0,1,1,0,1):
20354            nm = "ldr"; ty = Ity_I32; break;  // ldr
20355         default:
20356            valid = False; break;
20357      }
20358
20359      UInt rN      = INSN0(3,0);
20360      UInt rT      = INSN1(15,12);
20361      UInt imm12   = INSN1(11,0);
20362      Bool loadsPC = False;
20363
20364      if (ty == Ity_I8 || ty == Ity_I16) {
20365         /* all 8- and 16-bit load and store cases have the
20366            same exclusion set. */
20367         if (rN == 15 || isBadRegT(rT))
20368            valid = False;
20369      } else {
20370         vassert(ty == Ity_I32);
20371         if (isST) {
20372            if (rN == 15 || rT == 15)
20373               valid = False;
20374         } else {
20375            /* For a 32-bit load, rT == 15 is only allowable if we not
20376               in an IT block, or are the last in it.  Need to insert
20377               a dynamic check for that.  Also, in this particular
20378               case, rN == 15 is allowable.  In this case however, the
20379               value obtained for rN is (apparently)
20380               "word-align(address of current insn + 4)". */
20381            if (rT == 15)
20382               loadsPC = True;
20383         }
20384      }
20385
20386      if (valid) {
20387         // if it's a branch, it can't happen in the middle of an IT block
20388         // Also, if it is a branch, make it unconditional at this point.
20389         // Doing conditional branches in-line is too complex (for now)
20390         if (loadsPC) {
20391            gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20392            // go uncond
20393            mk_skip_over_T32_if_cond_is_false(condT);
20394            condT = IRTemp_INVALID;
20395            // now uncond
20396         }
20397
20398         IRTemp rNt = newTemp(Ity_I32);
20399         if (rN == 15) {
20400            vassert(ty == Ity_I32 && !isST);
20401            assign(rNt, binop(Iop_And32, getIRegT(rN), mkU32(~3)));
20402         } else {
20403            assign(rNt, getIRegT(rN));
20404         }
20405
20406         IRTemp transAddr = newTemp(Ity_I32);
20407         assign(transAddr,
20408                binop( Iop_Add32, mkexpr(rNt), mkU32(imm12) ));
20409
20410         IRTemp oldRt = newTemp(Ity_I32);
20411         assign(oldRt, getIRegT(rT));
20412
20413         IRTemp llOldRt = newTemp(Ity_I32);
20414         assign(llOldRt, llGetIReg(rT));
20415
20416         if (isST) {
20417            IRExpr* data = NULL;
20418            switch (ty) {
20419               case Ity_I8:
20420                  data = unop(Iop_32to8, mkexpr(oldRt));
20421                  break;
20422               case Ity_I16:
20423                  data = unop(Iop_32to16, mkexpr(oldRt));
20424                  break;
20425              case Ity_I32:
20426                  data = mkexpr(oldRt);
20427                  break;
20428              default:
20429                 vassert(0);
20430            }
20431            storeGuardedLE(mkexpr(transAddr), data, condT);
20432         } else {
20433            IRTemp    newRt = newTemp(Ity_I32);
20434            IRLoadGOp widen = ILGop_INVALID;
20435            switch (ty) {
20436               case Ity_I8:
20437                  widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
20438               case Ity_I16:
20439                  widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
20440               case Ity_I32:
20441                  widen = ILGop_Ident32; break;
20442               default:
20443                  vassert(0);
20444            }
20445            loadGuardedLE(newRt, widen,
20446                          mkexpr(transAddr), mkexpr(llOldRt), condT);
20447            if (rT == 15) {
20448               vassert(loadsPC);
20449               /* We'll do the write to the PC just below */
20450            } else {
20451               vassert(!loadsPC);
20452               /* IRTemp_INVALID is OK here because in the case where
20453                  condT is false at run time, we're just putting the
20454                  old rT value back. */
20455               putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
20456            }
20457
20458            if (loadsPC) {
20459               /* Presumably this is an interworking branch. */
20460               vassert(rT == 15);
20461               vassert(condT == IRTemp_INVALID); /* due to check above */
20462               llPutIReg(15, mkexpr(newRt));
20463               irsb->next = mkexpr(newRt);
20464               irsb->jumpkind = Ijk_Boring;  /* or _Ret ? */
20465               dres.whatNext  = Dis_StopHere;
20466            }
20467         }
20468
20469         DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
20470
20471         goto decode_success;
20472      }
20473   }
20474
20475   /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
20476   /* Doubleword loads and stores of the form:
20477         ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
20478         ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
20479         ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
20480   */
20481   if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
20482      UInt bP   = INSN0(8,8);
20483      UInt bU   = INSN0(7,7);
20484      UInt bW   = INSN0(5,5);
20485      UInt bL   = INSN0(4,4);  // 1: load  0: store
20486      UInt rN   = INSN0(3,0);
20487      UInt rT   = INSN1(15,12);
20488      UInt rT2  = INSN1(11,8);
20489      UInt imm8 = INSN1(7,0);
20490
20491      Bool valid = True;
20492      if (bP == 0 && bW == 0)                 valid = False;
20493      if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
20494      if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
20495      if (bL == 1 && rT == rT2)               valid = False;
20496      /* It's OK to use PC as the base register only in the
20497         following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
20498      if (rN == 15 && (bL == 0/*store*/
20499                       || bW == 1/*wb*/))     valid = False;
20500
20501      if (valid) {
20502         IRTemp preAddr = newTemp(Ity_I32);
20503         assign(preAddr, getIRegT(rN));
20504
20505         IRTemp postAddr = newTemp(Ity_I32);
20506         assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
20507                                mkexpr(preAddr), mkU32(imm8 << 2)));
20508
20509         IRTemp transAddr = bP == 1 ? postAddr : preAddr;
20510
20511         /* For almost all cases, we do the writeback after the transfers.
20512            However, that leaves the stack "uncovered" in this case:
20513               strd    rD, [sp, #-8]
20514            In which case, do the writeback to SP now, instead of later.
20515            This is bad in that it makes the insn non-restartable if the
20516            accesses fault, but at least keeps Memcheck happy. */
20517         Bool writeback_already_done = False;
20518         if (bL == 0/*store*/ && bW == 1/*wb*/
20519             && rN == 13 && rN != rT && rN != rT2
20520             && bU == 0/*minus*/ && (imm8 << 2) == 8) {
20521            putIRegT(rN, mkexpr(postAddr), condT);
20522            writeback_already_done = True;
20523         }
20524
20525         if (bL == 0) {
20526            IRTemp oldRt  = newTemp(Ity_I32);
20527            IRTemp oldRt2 = newTemp(Ity_I32);
20528            assign(oldRt,  getIRegT(rT));
20529            assign(oldRt2, getIRegT(rT2));
20530            storeGuardedLE( mkexpr(transAddr),
20531                            mkexpr(oldRt), condT );
20532            storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
20533                            mkexpr(oldRt2), condT );
20534         } else {
20535            IRTemp oldRt  = newTemp(Ity_I32);
20536            IRTemp oldRt2 = newTemp(Ity_I32);
20537            IRTemp newRt  = newTemp(Ity_I32);
20538            IRTemp newRt2 = newTemp(Ity_I32);
20539            assign(oldRt,  llGetIReg(rT));
20540            assign(oldRt2, llGetIReg(rT2));
20541            loadGuardedLE( newRt, ILGop_Ident32,
20542                           mkexpr(transAddr),
20543                           mkexpr(oldRt), condT );
20544            loadGuardedLE( newRt2, ILGop_Ident32,
20545                           binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
20546                           mkexpr(oldRt2), condT );
20547            /* Put unconditionally, since we already switched on the condT
20548               in the guarded loads. */
20549            putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
20550            putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
20551         }
20552
20553         if (bW == 1 && !writeback_already_done) {
20554            putIRegT(rN, mkexpr(postAddr), condT);
20555         }
20556
20557         const HChar* nm = bL ? "ldrd" : "strd";
20558
20559         if (bP == 1 && bW == 0) {
20560            DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
20561                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
20562         }
20563         else if (bP == 1 && bW == 1) {
20564            DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
20565                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
20566         }
20567         else {
20568            vassert(bP == 0 && bW == 1);
20569            DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
20570                nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
20571         }
20572
20573         goto decode_success;
20574      }
20575   }
20576
20577   /* -------------- (T3) Bcond.W label -------------- */
20578   /* This variant carries its own condition, so can't be part of an
20579      IT block ... */
20580   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20581       && INSN1(15,14) == BITS2(1,0)
20582       && INSN1(12,12) == 0) {
20583      UInt cond = INSN0(9,6);
20584      if (cond != ARMCondAL && cond != ARMCondNV) {
20585         Int simm21
20586            =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
20587              | (INSN1(11,11) << (1 + 6 + 11 + 1))
20588              | (INSN1(13,13) << (6 + 11 + 1))
20589              | (INSN0(5,0)   << (11 + 1))
20590              | (INSN1(10,0)  << 1);
20591         simm21 = (simm21 << 11) >> 11;
20592
20593         vassert(0 == (guest_R15_curr_instr_notENC & 1));
20594         UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
20595
20596         /* Not allowed in an IT block; SIGILL if so. */
20597         gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
20598
20599         IRTemp kondT = newTemp(Ity_I32);
20600         assign( kondT, mk_armg_calculate_condition(cond) );
20601         stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
20602                            Ijk_Boring,
20603                            IRConst_U32(dst | 1/*CPSR.T*/),
20604                            OFFB_R15T ));
20605         llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
20606                              | 1 /*CPSR.T*/ ));
20607         dres.jk_StopHere = Ijk_Boring;
20608         dres.whatNext    = Dis_StopHere;
20609         DIP("b%s.w 0x%x\n", nCC(cond), dst);
20610         goto decode_success;
20611      }
20612   }
20613
20614   /* ---------------- (T4) B.W label ---------------- */
20615   /* ... whereas this variant doesn't carry its own condition, so it
20616      has to be either unconditional or the conditional by virtue of
20617      being the last in an IT block.  The upside is that there's 4
20618      more bits available for the jump offset, so it has a 16-times
20619      greater branch range than the T3 variant. */
20620   if (INSN0(15,11) == BITS5(1,1,1,1,0)
20621       && INSN1(15,14) == BITS2(1,0)
20622       && INSN1(12,12) == 1) {
20623      if (1) {
20624         UInt bS  = INSN0(10,10);
20625         UInt bJ1 = INSN1(13,13);
20626         UInt bJ2 = INSN1(11,11);
20627         UInt bI1 = 1 ^ (bJ1 ^ bS);
20628         UInt bI2 = 1 ^ (bJ2 ^ bS);
20629         Int simm25
20630            =   (bS          << (1 + 1 + 10 + 11 + 1))
20631              | (bI1         << (1 + 10 + 11 + 1))
20632              | (bI2         << (10 + 11 + 1))
20633              | (INSN0(9,0)  << (11 + 1))
20634              | (INSN1(10,0) << 1);
20635         simm25 = (simm25 << 7) >> 7;
20636
20637         vassert(0 == (guest_R15_curr_instr_notENC & 1));
20638         UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
20639
20640         /* If in an IT block, must be the last insn. */
20641         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20642
20643         // go uncond
20644         mk_skip_over_T32_if_cond_is_false(condT);
20645         condT = IRTemp_INVALID;
20646         // now uncond
20647
20648         // branch to dst
20649         llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
20650         dres.jk_StopHere = Ijk_Boring;
20651         dres.whatNext    = Dis_StopHere;
20652         DIP("b.w 0x%x\n", dst);
20653         goto decode_success;
20654      }
20655   }
20656
20657   /* ------------------ TBB, TBH ------------------ */
20658   if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
20659      UInt rN = INSN0(3,0);
20660      UInt rM = INSN1(3,0);
20661      UInt bH = INSN1(4,4);
20662      if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
20663         /* Must be last or not-in IT block */
20664         gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20665         /* Go uncond */
20666         mk_skip_over_T32_if_cond_is_false(condT);
20667         condT = IRTemp_INVALID;
20668
20669         IRExpr* ea
20670             = binop(Iop_Add32,
20671                     getIRegT(rN),
20672                     bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
20673                        : getIRegT(rM));
20674
20675         IRTemp delta = newTemp(Ity_I32);
20676         if (bH) {
20677            assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
20678         } else {
20679            assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
20680         }
20681
20682         llPutIReg(
20683            15,
20684            binop(Iop_Or32,
20685                  binop(Iop_Add32,
20686                        getIRegT(15),
20687                        binop(Iop_Shl32, mkexpr(delta), mkU8(1))
20688                  ),
20689                  mkU32(1)
20690         ));
20691         dres.jk_StopHere = Ijk_Boring;
20692         dres.whatNext    = Dis_StopHere;
20693         DIP("tb%c [r%u, r%u%s]\n",
20694             bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
20695         goto decode_success;
20696      }
20697   }
20698
20699   /* ------------------ UBFX ------------------ */
20700   /* ------------------ SBFX ------------------ */
20701   /* There's also ARM versions of same, but it doesn't seem worth the
20702      hassle to common up the handling (it's only a couple of C
20703      statements). */
20704   if ((INSN0(15,4) == 0xF3C // UBFX
20705        || INSN0(15,4) == 0xF34) // SBFX
20706       && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
20707      UInt rN  = INSN0(3,0);
20708      UInt rD  = INSN1(11,8);
20709      UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
20710      UInt wm1 = INSN1(4,0);
20711      UInt msb =  lsb + wm1;
20712      if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
20713         Bool   isU  = INSN0(15,4) == 0xF3C;
20714         IRTemp src  = newTemp(Ity_I32);
20715         IRTemp tmp  = newTemp(Ity_I32);
20716         IRTemp res  = newTemp(Ity_I32);
20717         UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
20718         vassert(msb >= 0 && msb <= 31);
20719         vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
20720
20721         assign(src, getIRegT(rN));
20722         assign(tmp, binop(Iop_And32,
20723                           binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
20724                           mkU32(mask)));
20725         assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
20726                           binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
20727                           mkU8(31-wm1)));
20728
20729         putIRegT(rD, mkexpr(res), condT);
20730
20731         DIP("%s r%u, r%u, #%u, #%u\n",
20732             isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
20733         goto decode_success;
20734      }
20735   }
20736
20737   /* ------------------ UXTB ------------------ */
20738   /* ------------------ UXTH ------------------ */
20739   /* ------------------ SXTB ------------------ */
20740   /* ------------------ SXTH ------------------ */
20741   /* ----------------- UXTB16 ----------------- */
20742   /* ----------------- SXTB16 ----------------- */
20743   /* FIXME: this is an exact duplicate of the ARM version.  They
20744      should be commoned up. */
20745   if ((INSN0(15,0) == 0xFA5F     // UXTB
20746        || INSN0(15,0) == 0xFA1F  // UXTH
20747        || INSN0(15,0) == 0xFA4F  // SXTB
20748        || INSN0(15,0) == 0xFA0F  // SXTH
20749        || INSN0(15,0) == 0xFA3F  // UXTB16
20750        || INSN0(15,0) == 0xFA2F) // SXTB16
20751       && INSN1(15,12) == BITS4(1,1,1,1)
20752       && INSN1(7,6) == BITS2(1,0)) {
20753      UInt rD = INSN1(11,8);
20754      UInt rM = INSN1(3,0);
20755      UInt rot = INSN1(5,4);
20756      if (!isBadRegT(rD) && !isBadRegT(rM)) {
20757         const HChar* nm = "???";
20758         IRTemp srcT = newTemp(Ity_I32);
20759         IRTemp rotT = newTemp(Ity_I32);
20760         IRTemp dstT = newTemp(Ity_I32);
20761         assign(srcT, getIRegT(rM));
20762         assign(rotT, genROR32(srcT, 8 * rot));
20763         switch (INSN0(15,0)) {
20764            case 0xFA5F: // UXTB
20765               nm = "uxtb";
20766               assign(dstT, unop(Iop_8Uto32,
20767                                 unop(Iop_32to8, mkexpr(rotT))));
20768               break;
20769            case 0xFA1F: // UXTH
20770               nm = "uxth";
20771               assign(dstT, unop(Iop_16Uto32,
20772                                 unop(Iop_32to16, mkexpr(rotT))));
20773               break;
20774            case 0xFA4F: // SXTB
20775               nm = "sxtb";
20776               assign(dstT, unop(Iop_8Sto32,
20777                                 unop(Iop_32to8, mkexpr(rotT))));
20778               break;
20779            case 0xFA0F: // SXTH
20780               nm = "sxth";
20781               assign(dstT, unop(Iop_16Sto32,
20782                                 unop(Iop_32to16, mkexpr(rotT))));
20783               break;
20784            case 0xFA3F: // UXTB16
20785               nm = "uxtb16";
20786               assign(dstT, binop(Iop_And32, mkexpr(rotT),
20787                                             mkU32(0x00FF00FF)));
20788               break;
20789            case 0xFA2F: { // SXTB16
20790               nm = "sxtb16";
20791               IRTemp lo32 = newTemp(Ity_I32);
20792               IRTemp hi32 = newTemp(Ity_I32);
20793               assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
20794               assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
20795               assign(
20796                  dstT,
20797                  binop(Iop_Or32,
20798                        binop(Iop_And32,
20799                              unop(Iop_8Sto32,
20800                                   unop(Iop_32to8, mkexpr(lo32))),
20801                              mkU32(0xFFFF)),
20802                        binop(Iop_Shl32,
20803                              unop(Iop_8Sto32,
20804                                   unop(Iop_32to8, mkexpr(hi32))),
20805                              mkU8(16))
20806               ));
20807               break;
20808            }
20809            default:
20810               vassert(0);
20811         }
20812         putIRegT(rD, mkexpr(dstT), condT);
20813         DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
20814         goto decode_success;
20815      }
20816   }
20817
20818   /* -------------- MUL.W Rd, Rn, Rm -------------- */
20819   if (INSN0(15,4) == 0xFB0
20820       && (INSN1(15,0) & 0xF0F0) == 0xF000) {
20821      UInt rN = INSN0(3,0);
20822      UInt rD = INSN1(11,8);
20823      UInt rM = INSN1(3,0);
20824      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
20825         IRTemp res = newTemp(Ity_I32);
20826         assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
20827         putIRegT(rD, mkexpr(res), condT);
20828         DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
20829         goto decode_success;
20830      }
20831   }
20832
20833   /* -------------- SDIV.W Rd, Rn, Rm -------------- */
20834   if (INSN0(15,4) == 0xFB9
20835       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
20836      UInt rN = INSN0(3,0);
20837      UInt rD = INSN1(11,8);
20838      UInt rM = INSN1(3,0);
20839      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
20840         IRTemp res  = newTemp(Ity_I32);
20841         IRTemp argL = newTemp(Ity_I32);
20842         IRTemp argR = newTemp(Ity_I32);
20843         assign(argL, getIRegT(rN));
20844         assign(argR, getIRegT(rM));
20845         assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
20846         putIRegT(rD, mkexpr(res), condT);
20847         DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
20848         goto decode_success;
20849      }
20850   }
20851
20852   /* -------------- UDIV.W Rd, Rn, Rm -------------- */
20853   if (INSN0(15,4) == 0xFBB
20854       && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
20855      UInt rN = INSN0(3,0);
20856      UInt rD = INSN1(11,8);
20857      UInt rM = INSN1(3,0);
20858      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
20859         IRTemp res  = newTemp(Ity_I32);
20860         IRTemp argL = newTemp(Ity_I32);
20861         IRTemp argR = newTemp(Ity_I32);
20862         assign(argL, getIRegT(rN));
20863         assign(argR, getIRegT(rM));
20864         assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
20865         putIRegT(rD, mkexpr(res), condT);
20866         DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
20867         goto decode_success;
20868      }
20869   }
20870
20871   /* ------------------ {U,S}MULL ------------------ */
20872   if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
20873       && INSN1(7,4) == BITS4(0,0,0,0)) {
20874      UInt isU  = INSN0(5,5);
20875      UInt rN   = INSN0(3,0);
20876      UInt rDlo = INSN1(15,12);
20877      UInt rDhi = INSN1(11,8);
20878      UInt rM   = INSN1(3,0);
20879      if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
20880          && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
20881         IRTemp res   = newTemp(Ity_I64);
20882         assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
20883                           getIRegT(rN), getIRegT(rM)));
20884         putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
20885         putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
20886         DIP("%cmull r%u, r%u, r%u, r%u\n",
20887             isU ? 'u' : 's', rDlo, rDhi, rN, rM);
20888         goto decode_success;
20889      }
20890   }
20891
20892   /* ------------------ ML{A,S} ------------------ */
20893   if (INSN0(15,4) == 0xFB0
20894       && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
20895           || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
20896      UInt rN = INSN0(3,0);
20897      UInt rA = INSN1(15,12);
20898      UInt rD = INSN1(11,8);
20899      UInt rM = INSN1(3,0);
20900      if (!isBadRegT(rD) && !isBadRegT(rN)
20901          && !isBadRegT(rM) && !isBadRegT(rA)) {
20902         Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
20903         IRTemp res   = newTemp(Ity_I32);
20904         assign(res,
20905                binop(isMLA ? Iop_Add32 : Iop_Sub32,
20906                      getIRegT(rA),
20907                      binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
20908         putIRegT(rD, mkexpr(res), condT);
20909         DIP("%s r%u, r%u, r%u, r%u\n",
20910             isMLA ? "mla" : "mls", rD, rN, rM, rA);
20911         goto decode_success;
20912      }
20913   }
20914
20915   /* ------------------ (T3) ADR ------------------ */
20916   if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
20917       && INSN1(15,15) == 0) {
20918      /* rD = align4(PC) + imm32 */
20919      UInt rD = INSN1(11,8);
20920      if (!isBadRegT(rD)) {
20921         UInt imm32 = (INSN0(10,10) << 11)
20922                      | (INSN1(14,12) << 8) | INSN1(7,0);
20923         putIRegT(rD, binop(Iop_Add32,
20924                            binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20925                            mkU32(imm32)),
20926                      condT);
20927         DIP("add r%u, pc, #%u\n", rD, imm32);
20928         goto decode_success;
20929      }
20930   }
20931
20932   /* ----------------- (T1) UMLAL ----------------- */
20933   /* ----------------- (T1) SMLAL ----------------- */
20934   if ((INSN0(15,4) == 0xFBE // UMLAL
20935        || INSN0(15,4) == 0xFBC) // SMLAL
20936       && INSN1(7,4) == BITS4(0,0,0,0)) {
20937      UInt rN   = INSN0(3,0);
20938      UInt rDlo = INSN1(15,12);
20939      UInt rDhi = INSN1(11,8);
20940      UInt rM   = INSN1(3,0);
20941      if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
20942          && !isBadRegT(rM) && rDhi != rDlo) {
20943         Bool   isS   = INSN0(15,4) == 0xFBC;
20944         IRTemp argL  = newTemp(Ity_I32);
20945         IRTemp argR  = newTemp(Ity_I32);
20946         IRTemp old   = newTemp(Ity_I64);
20947         IRTemp res   = newTemp(Ity_I64);
20948         IRTemp resHi = newTemp(Ity_I32);
20949         IRTemp resLo = newTemp(Ity_I32);
20950         IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
20951         assign( argL, getIRegT(rM));
20952         assign( argR, getIRegT(rN));
20953         assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
20954         assign( res, binop(Iop_Add64,
20955                            mkexpr(old),
20956                            binop(mulOp, mkexpr(argL), mkexpr(argR))) );
20957         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
20958         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
20959         putIRegT( rDhi, mkexpr(resHi), condT );
20960         putIRegT( rDlo, mkexpr(resLo), condT );
20961         DIP("%cmlal r%u, r%u, r%u, r%u\n",
20962             isS ? 's' : 'u', rDlo, rDhi, rN, rM);
20963         goto decode_success;
20964      }
20965   }
20966
20967   /* ------------------ (T1) UMAAL ------------------ */
20968   if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
20969      UInt rN   = INSN0(3,0);
20970      UInt rDlo = INSN1(15,12);
20971      UInt rDhi = INSN1(11,8);
20972      UInt rM   = INSN1(3,0);
20973      if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
20974          && !isBadRegT(rM) && rDhi != rDlo) {
20975         IRTemp argN   = newTemp(Ity_I32);
20976         IRTemp argM   = newTemp(Ity_I32);
20977         IRTemp argDhi = newTemp(Ity_I32);
20978         IRTemp argDlo = newTemp(Ity_I32);
20979         IRTemp res    = newTemp(Ity_I64);
20980         IRTemp resHi  = newTemp(Ity_I32);
20981         IRTemp resLo  = newTemp(Ity_I32);
20982         assign( argN,   getIRegT(rN) );
20983         assign( argM,   getIRegT(rM) );
20984         assign( argDhi, getIRegT(rDhi) );
20985         assign( argDlo, getIRegT(rDlo) );
20986         assign( res,
20987                 binop(Iop_Add64,
20988                       binop(Iop_Add64,
20989                             binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
20990                             unop(Iop_32Uto64, mkexpr(argDhi))),
20991                       unop(Iop_32Uto64, mkexpr(argDlo))) );
20992         assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
20993         assign( resLo, unop(Iop_64to32, mkexpr(res)) );
20994         putIRegT( rDhi, mkexpr(resHi), condT );
20995         putIRegT( rDlo, mkexpr(resLo), condT );
20996         DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
20997         goto decode_success;
20998      }
20999   }
21000
21001   /* ------------------- (T1) SMMUL{R} ------------------ */
21002   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
21003       && INSN0(6,4) == BITS3(1,0,1)
21004       && INSN1(15,12) == BITS4(1,1,1,1)
21005       && INSN1(7,5) == BITS3(0,0,0)) {
21006      UInt bitR = INSN1(4,4);
21007      UInt rD = INSN1(11,8);
21008      UInt rM = INSN1(3,0);
21009      UInt rN = INSN0(3,0);
21010      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21011         IRExpr* res
21012         = unop(Iop_64HIto32,
21013                binop(Iop_Add64,
21014                      binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
21015                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
21016         putIRegT(rD, res, condT);
21017         DIP("smmul%s r%u, r%u, r%u\n",
21018             bitR ? "r" : "", rD, rN, rM);
21019         goto decode_success;
21020      }
21021   }
21022
21023   /* ------------------- (T1) SMMLA{R} ------------------ */
21024   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
21025       && INSN0(6,4) == BITS3(1,0,1)
21026       && INSN1(7,5) == BITS3(0,0,0)) {
21027      UInt bitR = INSN1(4,4);
21028      UInt rA = INSN1(15,12);
21029      UInt rD = INSN1(11,8);
21030      UInt rM = INSN1(3,0);
21031      UInt rN = INSN0(3,0);
21032      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
21033         IRExpr* res
21034         = unop(Iop_64HIto32,
21035                binop(Iop_Add64,
21036                      binop(Iop_Add64,
21037                            binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
21038                            binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
21039                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
21040         putIRegT(rD, res, condT);
21041         DIP("smmla%s r%u, r%u, r%u, r%u\n",
21042             bitR ? "r" : "", rD, rN, rM, rA);
21043         goto decode_success;
21044      }
21045   }
21046
21047   /* ------------------ (T2) ADR ------------------ */
21048   if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
21049       && INSN1(15,15) == 0) {
21050      /* rD = align4(PC) - imm32 */
21051      UInt rD = INSN1(11,8);
21052      if (!isBadRegT(rD)) {
21053         UInt imm32 = (INSN0(10,10) << 11)
21054                      | (INSN1(14,12) << 8) | INSN1(7,0);
21055         putIRegT(rD, binop(Iop_Sub32,
21056                            binop(Iop_And32, getIRegT(15), mkU32(~3U)),
21057                            mkU32(imm32)),
21058                      condT);
21059         DIP("sub r%u, pc, #%u\n", rD, imm32);
21060         goto decode_success;
21061      }
21062   }
21063
21064   /* ------------------- (T1) BFI ------------------- */
21065   /* ------------------- (T1) BFC ------------------- */
21066   if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
21067      UInt rD  = INSN1(11,8);
21068      UInt rN  = INSN0(3,0);
21069      UInt msb = INSN1(4,0);
21070      UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
21071      if (isBadRegT(rD) || rN == 13 || msb < lsb) {
21072         /* undecodable; fall through */
21073      } else {
21074         IRTemp src    = newTemp(Ity_I32);
21075         IRTemp olddst = newTemp(Ity_I32);
21076         IRTemp newdst = newTemp(Ity_I32);
21077         UInt   mask = 1 << (msb - lsb);
21078         mask = (mask - 1) + mask;
21079         vassert(mask != 0); // guaranteed by "msb < lsb" check above
21080         mask <<= lsb;
21081
21082         assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
21083         assign(olddst, getIRegT(rD));
21084         assign(newdst,
21085                binop(Iop_Or32,
21086                   binop(Iop_And32,
21087                         binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
21088                         mkU32(mask)),
21089                   binop(Iop_And32,
21090                         mkexpr(olddst),
21091                         mkU32(~mask)))
21092               );
21093
21094         putIRegT(rD, mkexpr(newdst), condT);
21095
21096         if (rN == 15) {
21097            DIP("bfc r%u, #%u, #%u\n",
21098                rD, lsb, msb-lsb+1);
21099         } else {
21100            DIP("bfi r%u, r%u, #%u, #%u\n",
21101                rD, rN, lsb, msb-lsb+1);
21102         }
21103         goto decode_success;
21104      }
21105   }
21106
21107   /* ------------------- (T1) SXTAH ------------------- */
21108   /* ------------------- (T1) UXTAH ------------------- */
21109   if ((INSN0(15,4) == 0xFA1      // UXTAH
21110        || INSN0(15,4) == 0xFA0)  // SXTAH
21111       && INSN1(15,12) == BITS4(1,1,1,1)
21112       && INSN1(7,6) == BITS2(1,0)) {
21113      Bool isU = INSN0(15,4) == 0xFA1;
21114      UInt rN  = INSN0(3,0);
21115      UInt rD  = INSN1(11,8);
21116      UInt rM  = INSN1(3,0);
21117      UInt rot = INSN1(5,4);
21118      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21119         IRTemp srcL = newTemp(Ity_I32);
21120         IRTemp srcR = newTemp(Ity_I32);
21121         IRTemp res  = newTemp(Ity_I32);
21122         assign(srcR, getIRegT(rM));
21123         assign(srcL, getIRegT(rN));
21124         assign(res,  binop(Iop_Add32,
21125                            mkexpr(srcL),
21126                            unop(isU ? Iop_16Uto32 : Iop_16Sto32,
21127                                 unop(Iop_32to16,
21128                                      genROR32(srcR, 8 * rot)))));
21129         putIRegT(rD, mkexpr(res), condT);
21130         DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
21131             isU ? 'u' : 's', rD, rN, rM, rot);
21132         goto decode_success;
21133      }
21134   }
21135
21136   /* ------------------- (T1) SXTAB ------------------- */
21137   /* ------------------- (T1) UXTAB ------------------- */
21138   if ((INSN0(15,4) == 0xFA5      // UXTAB
21139        || INSN0(15,4) == 0xFA4)  // SXTAB
21140       && INSN1(15,12) == BITS4(1,1,1,1)
21141       && INSN1(7,6) == BITS2(1,0)) {
21142      Bool isU = INSN0(15,4) == 0xFA5;
21143      UInt rN  = INSN0(3,0);
21144      UInt rD  = INSN1(11,8);
21145      UInt rM  = INSN1(3,0);
21146      UInt rot = INSN1(5,4);
21147      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21148         IRTemp srcL = newTemp(Ity_I32);
21149         IRTemp srcR = newTemp(Ity_I32);
21150         IRTemp res  = newTemp(Ity_I32);
21151         assign(srcR, getIRegT(rM));
21152         assign(srcL, getIRegT(rN));
21153         assign(res,  binop(Iop_Add32,
21154                            mkexpr(srcL),
21155                            unop(isU ? Iop_8Uto32 : Iop_8Sto32,
21156                                 unop(Iop_32to8,
21157                                      genROR32(srcR, 8 * rot)))));
21158         putIRegT(rD, mkexpr(res), condT);
21159         DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
21160             isU ? 'u' : 's', rD, rN, rM, rot);
21161         goto decode_success;
21162      }
21163   }
21164
21165   /* ------------------- (T1) CLZ ------------------- */
21166   if (INSN0(15,4) == 0xFAB
21167       && INSN1(15,12) == BITS4(1,1,1,1)
21168       && INSN1(7,4) == BITS4(1,0,0,0)) {
21169      UInt rM1 = INSN0(3,0);
21170      UInt rD  = INSN1(11,8);
21171      UInt rM2 = INSN1(3,0);
21172      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21173         IRTemp arg = newTemp(Ity_I32);
21174         IRTemp res = newTemp(Ity_I32);
21175         assign(arg, getIRegT(rM1));
21176         assign(res, IRExpr_ITE(
21177                        binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
21178                        mkU32(32),
21179                        unop(Iop_Clz32, mkexpr(arg))
21180         ));
21181         putIRegT(rD, mkexpr(res), condT);
21182         DIP("clz r%u, r%u\n", rD, rM1);
21183         goto decode_success;
21184      }
21185   }
21186
21187   /* ------------------- (T1) RBIT ------------------- */
21188   if (INSN0(15,4) == 0xFA9
21189       && INSN1(15,12) == BITS4(1,1,1,1)
21190       && INSN1(7,4) == BITS4(1,0,1,0)) {
21191      UInt rM1 = INSN0(3,0);
21192      UInt rD  = INSN1(11,8);
21193      UInt rM2 = INSN1(3,0);
21194      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21195         IRTemp arg = newTemp(Ity_I32);
21196         assign(arg, getIRegT(rM1));
21197         IRTemp res = gen_BITREV(arg);
21198         putIRegT(rD, mkexpr(res), condT);
21199         DIP("rbit r%u, r%u\n", rD, rM1);
21200         goto decode_success;
21201      }
21202   }
21203
21204   /* ------------------- (T2) REV   ------------------- */
21205   /* ------------------- (T2) REV16 ------------------- */
21206   if (INSN0(15,4) == 0xFA9
21207       && INSN1(15,12) == BITS4(1,1,1,1)
21208       && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
21209           || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
21210      UInt rM1   = INSN0(3,0);
21211      UInt rD    = INSN1(11,8);
21212      UInt rM2   = INSN1(3,0);
21213      Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
21214      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21215         IRTemp arg = newTemp(Ity_I32);
21216         assign(arg, getIRegT(rM1));
21217         IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
21218         putIRegT(rD, mkexpr(res), condT);
21219         DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
21220         goto decode_success;
21221      }
21222   }
21223
21224   /* ------------------- (T2) REVSH ------------------ */
21225   if (INSN0(15,4) == 0xFA9
21226       && INSN1(15,12) == BITS4(1,1,1,1)
21227       && INSN1(7,4) == BITS4(1,0,1,1)) {
21228      UInt rM1 = INSN0(3,0);
21229      UInt rM2 = INSN1(3,0);
21230      UInt rD  = INSN1(11,8);
21231      if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
21232         IRTemp irt_rM  = newTemp(Ity_I32);
21233         IRTemp irt_hi  = newTemp(Ity_I32);
21234         IRTemp irt_low = newTemp(Ity_I32);
21235         IRTemp irt_res = newTemp(Ity_I32);
21236         assign(irt_rM, getIRegT(rM1));
21237         assign(irt_hi,
21238                binop(Iop_Sar32,
21239                      binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
21240                      mkU8(16)
21241                )
21242         );
21243         assign(irt_low,
21244                binop(Iop_And32,
21245                      binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
21246                      mkU32(0xFF)
21247                )
21248         );
21249         assign(irt_res,
21250                binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
21251         );
21252         putIRegT(rD, mkexpr(irt_res), condT);
21253         DIP("revsh r%u, r%u\n", rD, rM1);
21254         goto decode_success;
21255      }
21256   }
21257
21258   /* -------------- (T1) MSR apsr, reg -------------- */
21259   if (INSN0(15,4) == 0xF38
21260       && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
21261      UInt rN          = INSN0(3,0);
21262      UInt write_ge    = INSN1(10,10);
21263      UInt write_nzcvq = INSN1(11,11);
21264      if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
21265         IRTemp rNt = newTemp(Ity_I32);
21266         assign(rNt, getIRegT(rN));
21267         desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
21268         DIP("msr cpsr_%s%s, r%u\n",
21269             write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
21270         goto decode_success;
21271      }
21272   }
21273
21274   /* -------------- (T1) MRS reg, apsr -------------- */
21275   if (INSN0(15,0) == 0xF3EF
21276       && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
21277      UInt rD = INSN1(11,8);
21278      if (!isBadRegT(rD)) {
21279         IRTemp apsr = synthesise_APSR();
21280         putIRegT( rD, mkexpr(apsr), condT );
21281         DIP("mrs r%u, cpsr\n", rD);
21282         goto decode_success;
21283      }
21284   }
21285
21286   /* ----------------- (T1) LDREX ----------------- */
21287   if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
21288      UInt rN   = INSN0(3,0);
21289      UInt rT   = INSN1(15,12);
21290      UInt imm8 = INSN1(7,0);
21291      if (!isBadRegT(rT) && rN != 15) {
21292         IRTemp res;
21293         // go uncond
21294         mk_skip_over_T32_if_cond_is_false( condT );
21295         // now uncond
21296         res = newTemp(Ity_I32);
21297         stmt( IRStmt_LLSC(Iend_LE,
21298                           res,
21299                           binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
21300                           NULL/*this is a load*/ ));
21301         putIRegT(rT, mkexpr(res), IRTemp_INVALID);
21302         DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
21303         goto decode_success;
21304      }
21305   }
21306
21307   /* --------------- (T1) LDREX{B,H} --------------- */
21308   if (INSN0(15,4) == 0xE8D
21309       && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
21310      UInt rN  = INSN0(3,0);
21311      UInt rT  = INSN1(15,12);
21312      Bool isH = INSN1(11,0) == 0xF5F;
21313      if (!isBadRegT(rT) && rN != 15) {
21314         IRTemp res;
21315         // go uncond
21316         mk_skip_over_T32_if_cond_is_false( condT );
21317         // now uncond
21318         res = newTemp(isH ? Ity_I16 : Ity_I8);
21319         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
21320                           NULL/*this is a load*/ ));
21321         putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
21322                      IRTemp_INVALID);
21323         DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
21324         goto decode_success;
21325      }
21326   }
21327
21328   /* --------------- (T1) LDREXD --------------- */
21329   if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
21330      UInt rN  = INSN0(3,0);
21331      UInt rT  = INSN1(15,12);
21332      UInt rT2 = INSN1(11,8);
21333      if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
21334         IRTemp res;
21335         // go uncond
21336         mk_skip_over_T32_if_cond_is_false( condT );
21337         // now uncond
21338         res = newTemp(Ity_I64);
21339         // FIXME: assumes little-endian guest
21340         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
21341                           NULL/*this is a load*/ ));
21342         // FIXME: assumes little-endian guest
21343         putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
21344         putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
21345         DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
21346         goto decode_success;
21347      }
21348   }
21349
21350   /* ----------------- (T1) STREX ----------------- */
21351   if (INSN0(15,4) == 0xE84) {
21352      UInt rN   = INSN0(3,0);
21353      UInt rT   = INSN1(15,12);
21354      UInt rD   = INSN1(11,8);
21355      UInt imm8 = INSN1(7,0);
21356      if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
21357          && rD != rN && rD != rT) {
21358         IRTemp resSC1, resSC32;
21359         // go uncond
21360         mk_skip_over_T32_if_cond_is_false( condT );
21361         // now uncond
21362         /* Ok, now we're unconditional.  Do the store. */
21363         resSC1 = newTemp(Ity_I1);
21364         stmt( IRStmt_LLSC(Iend_LE,
21365                           resSC1,
21366                           binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
21367                           getIRegT(rT)) );
21368         /* Set rD to 1 on failure, 0 on success.  Currently we have
21369            resSC1 == 0 on failure, 1 on success. */
21370         resSC32 = newTemp(Ity_I32);
21371         assign(resSC32,
21372                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
21373         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
21374         DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
21375         goto decode_success;
21376      }
21377   }
21378
21379   /* --------------- (T1) STREX{B,H} --------------- */
21380   if (INSN0(15,4) == 0xE8C
21381       && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
21382      UInt rN  = INSN0(3,0);
21383      UInt rT  = INSN1(15,12);
21384      UInt rD  = INSN1(3,0);
21385      Bool isH = INSN1(11,4) == 0xF5;
21386      if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
21387          && rD != rN && rD != rT) {
21388         IRTemp resSC1, resSC32;
21389         // go uncond
21390         mk_skip_over_T32_if_cond_is_false( condT );
21391         // now uncond
21392         /* Ok, now we're unconditional.  Do the store. */
21393         resSC1 = newTemp(Ity_I1);
21394         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
21395                           unop(isH ? Iop_32to16 : Iop_32to8,
21396                                getIRegT(rT))) );
21397         /* Set rD to 1 on failure, 0 on success.  Currently we have
21398            resSC1 == 0 on failure, 1 on success. */
21399         resSC32 = newTemp(Ity_I32);
21400         assign(resSC32,
21401                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
21402         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
21403         DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
21404         goto decode_success;
21405      }
21406   }
21407
21408   /* ---------------- (T1) STREXD ---------------- */
21409   if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
21410      UInt rN  = INSN0(3,0);
21411      UInt rT  = INSN1(15,12);
21412      UInt rT2 = INSN1(11,8);
21413      UInt rD  = INSN1(3,0);
21414      if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
21415          && rN != 15 && rD != rN && rD != rT && rD != rT) {
21416         IRTemp resSC1, resSC32, data;
21417         // go uncond
21418         mk_skip_over_T32_if_cond_is_false( condT );
21419         // now uncond
21420         /* Ok, now we're unconditional.  Do the store. */
21421         resSC1 = newTemp(Ity_I1);
21422         data = newTemp(Ity_I64);
21423         // FIXME: assumes little-endian guest
21424         assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
21425         // FIXME: assumes little-endian guest
21426         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
21427         /* Set rD to 1 on failure, 0 on success.  Currently we have
21428            resSC1 == 0 on failure, 1 on success. */
21429         resSC32 = newTemp(Ity_I32);
21430         assign(resSC32,
21431                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
21432         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
21433         DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
21434         goto decode_success;
21435      }
21436   }
21437
21438   /* -------------- v7 barrier insns -------------- */
21439   if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
21440      /* FIXME: should this be unconditional? */
21441      /* XXX this isn't really right, is it?  The generated IR does
21442         them unconditionally.  I guess it doesn't matter since it
21443         doesn't do any harm to do them even when the guarding
21444         condition is false -- it's just a performance loss. */
21445      switch (INSN1(7,0)) {
21446         case 0x4F: /* DSB sy */
21447         case 0x4E: /* DSB st */
21448         case 0x4B: /* DSB ish */
21449         case 0x4A: /* DSB ishst */
21450         case 0x47: /* DSB nsh */
21451         case 0x46: /* DSB nshst */
21452         case 0x43: /* DSB osh */
21453         case 0x42: /* DSB oshst */
21454            stmt( IRStmt_MBE(Imbe_Fence) );
21455            DIP("DSB\n");
21456            goto decode_success;
21457         case 0x5F: /* DMB sy */
21458         case 0x5E: /* DMB st */
21459         case 0x5B: /* DMB ish */
21460         case 0x5A: /* DMB ishst */
21461         case 0x57: /* DMB nsh */
21462         case 0x56: /* DMB nshst */
21463         case 0x53: /* DMB osh */
21464         case 0x52: /* DMB oshst */
21465            stmt( IRStmt_MBE(Imbe_Fence) );
21466            DIP("DMB\n");
21467            goto decode_success;
21468         case 0x6F: /* ISB */
21469            stmt( IRStmt_MBE(Imbe_Fence) );
21470            DIP("ISB\n");
21471            goto decode_success;
21472         default:
21473            break;
21474      }
21475   }
21476
21477   /* ---------------------- PLD{,W} ---------------------- */
21478   if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
21479      /* FIXME: should this be unconditional? */
21480      /* PLD/PLDW immediate, encoding T1 */
21481      UInt rN    = INSN0(3,0);
21482      UInt bW    = INSN0(5,5);
21483      UInt imm12 = INSN1(11,0);
21484      DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
21485      goto decode_success;
21486   }
21487
21488   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
21489      /* FIXME: should this be unconditional? */
21490      /* PLD/PLDW immediate, encoding T2 */
21491      UInt rN    = INSN0(3,0);
21492      UInt bW    = INSN0(5,5);
21493      UInt imm8  = INSN1(7,0);
21494      DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
21495      goto decode_success;
21496   }
21497
21498   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
21499      /* FIXME: should this be unconditional? */
21500      /* PLD/PLDW register, encoding T1 */
21501      UInt rN   = INSN0(3,0);
21502      UInt rM   = INSN1(3,0);
21503      UInt bW   = INSN0(5,5);
21504      UInt imm2 = INSN1(5,4);
21505      if (!isBadRegT(rM)) {
21506         DIP("pld%s [r%u, r%u, lsl %d]\n", bW ? "w" : "", rN, rM, imm2);
21507         goto decode_success;
21508      }
21509      /* fall through */
21510   }
21511
21512   /* -------------- read CP15 TPIDRURO register ------------- */
21513   /* mrc     p15, 0,  r0, c13, c0, 3  up to
21514      mrc     p15, 0, r14, c13, c0, 3
21515   */
21516   /* I don't know whether this is really v7-only.  But anyway, we
21517      have to support it since arm-linux uses TPIDRURO as a thread
21518      state register. */
21519   if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
21520      /* FIXME: should this be unconditional? */
21521      UInt rD = INSN1(15,12);
21522      if (!isBadRegT(rD)) {
21523         putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), IRTemp_INVALID);
21524         DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
21525         goto decode_success;
21526      }
21527      /* fall through */
21528   }
21529
21530   /* ------------------- CLREX ------------------ */
21531   if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
21532      /* AFAICS, this simply cancels a (all?) reservations made by a
21533         (any?) preceding LDREX(es).  Arrange to hand it through to
21534         the back end. */
21535      mk_skip_over_T32_if_cond_is_false( condT );
21536      stmt( IRStmt_MBE(Imbe_CancelReservation) );
21537      DIP("clrex\n");
21538      goto decode_success;
21539   }
21540
21541   /* ------------------- NOP ------------------ */
21542   if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
21543      DIP("nop\n");
21544      goto decode_success;
21545   }
21546
21547   /* -------------- (T1) LDRT reg+#imm8 -------------- */
21548   /* Load Register Unprivileged:
21549      ldrt Rt, [Rn, #imm8]
21550   */
21551   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
21552       && INSN1(11,8) == BITS4(1,1,1,0)) {
21553      UInt rT    = INSN1(15,12);
21554      UInt rN    = INSN0(3,0);
21555      UInt imm8  = INSN1(7,0);
21556      Bool valid = True;
21557      if (rN == 15 || isBadRegT(rT)) valid = False;
21558      if (valid) {
21559         put_ITSTATE(old_itstate);
21560         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21561         IRTemp newRt = newTemp(Ity_I32);
21562         loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
21563         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21564         put_ITSTATE(new_itstate);
21565         DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
21566         goto decode_success;
21567      }
21568   }
21569
21570   /* -------------- (T1) STRT reg+#imm8 -------------- */
21571   /* Store Register Unprivileged:
21572      strt Rt, [Rn, #imm8]
21573   */
21574   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
21575       && INSN1(11,8) == BITS4(1,1,1,0)) {
21576      UInt rT    = INSN1(15,12);
21577      UInt rN    = INSN0(3,0);
21578      UInt imm8  = INSN1(7,0);
21579      Bool valid = True;
21580      if (rN == 15 || isBadRegT(rT)) valid = False;
21581      if (valid) {
21582         put_ITSTATE(old_itstate);
21583         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21584         storeGuardedLE( address, llGetIReg(rT), condT );
21585         put_ITSTATE(new_itstate);
21586         DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
21587         goto decode_success;
21588      }
21589   }
21590
21591   /* -------------- (T1) STRBT reg+#imm8 -------------- */
21592   /* Store Register Byte Unprivileged:
21593      strbt Rt, [Rn, #imm8]
21594   */
21595   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
21596       && INSN1(11,8) == BITS4(1,1,1,0)) {
21597      UInt rT    = INSN1(15,12);
21598      UInt rN    = INSN0(3,0);
21599      UInt imm8  = INSN1(7,0);
21600      Bool valid = True;
21601      if (rN == 15 || isBadRegT(rT)) valid = False;
21602      if (valid) {
21603         put_ITSTATE(old_itstate);
21604         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21605         IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
21606         storeGuardedLE( address, data, condT );
21607         put_ITSTATE(new_itstate);
21608         DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
21609         goto decode_success;
21610      }
21611   }
21612
21613   /* -------------- (T1) LDRHT reg+#imm8 -------------- */
21614   /* Load Register Halfword Unprivileged:
21615      ldrht Rt, [Rn, #imm8]
21616   */
21617   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
21618       && INSN1(11,8) == BITS4(1,1,1,0)) {
21619      UInt rN    = INSN0(3,0);
21620      Bool valid = True;
21621      if (rN == 15) {
21622         /* In this case our instruction is LDRH (literal), in fact:
21623            LDRH (literal) was realized earlier, so we don't want to
21624            make it twice. */
21625         valid = False;
21626      }
21627      UInt rT    = INSN1(15,12);
21628      UInt imm8  = INSN1(7,0);
21629      if (isBadRegT(rT)) valid = False;
21630      if (valid) {
21631         put_ITSTATE(old_itstate);
21632         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21633         IRTemp newRt = newTemp(Ity_I32);
21634         loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
21635         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21636         put_ITSTATE(new_itstate);
21637         DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
21638         goto decode_success;
21639      }
21640   }
21641
21642   /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
21643   /* Load Register Signed Halfword Unprivileged:
21644      ldrsht Rt, [Rn, #imm8]
21645   */
21646   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
21647       && INSN1(11,8) == BITS4(1,1,1,0)) {
21648      UInt rN    = INSN0(3,0);
21649      Bool valid = True;
21650      if (rN == 15) {
21651         /* In this case our instruction is LDRSH (literal), in fact:
21652            LDRSH (literal) was realized earlier, so we don't want to
21653            make it twice. */
21654         valid = False;
21655      }
21656      UInt rT    = INSN1(15,12);
21657      UInt imm8  = INSN1(7,0);
21658      if (isBadRegT(rT)) valid = False;
21659      if (valid) {
21660         put_ITSTATE(old_itstate);
21661         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21662         IRTemp newRt = newTemp(Ity_I32);
21663         loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
21664         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21665         put_ITSTATE(new_itstate);
21666         DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
21667         goto decode_success;
21668      }
21669   }
21670
21671   /* -------------- (T1) STRHT reg+#imm8 -------------- */
21672   /* Store Register Halfword Unprivileged:
21673      strht Rt, [Rn, #imm8]
21674   */
21675   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
21676       && INSN1(11,8) == BITS4(1,1,1,0)) {
21677      UInt rT    = INSN1(15,12);
21678      UInt rN    = INSN0(3,0);
21679      UInt imm8  = INSN1(7,0);
21680      Bool valid = True;
21681      if (rN == 15 || isBadRegT(rT)) valid = False;
21682      if (valid) {
21683         put_ITSTATE(old_itstate);
21684         IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21685         IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
21686         storeGuardedLE( address, data, condT );
21687         put_ITSTATE(new_itstate);
21688         DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
21689         goto decode_success;
21690      }
21691   }
21692
21693   /* -------------- (T1) LDRBT reg+#imm8 -------------- */
21694   /* Load Register Byte Unprivileged:
21695      ldrbt Rt, [Rn, #imm8]
21696   */
21697   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
21698       && INSN1(11,8) == BITS4(1,1,1,0)) {
21699      UInt rN    = INSN0(3,0);
21700      UInt rT    = INSN1(15,12);
21701      UInt imm8  = INSN1(7,0);
21702      Bool valid = True;
21703      if (rN == 15 /* insn is LDRB (literal) */) valid = False;
21704      if (isBadRegT(rT)) valid = False;
21705      if (valid) {
21706         put_ITSTATE(old_itstate);
21707         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21708         IRTemp newRt = newTemp(Ity_I32);
21709         loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
21710         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21711         put_ITSTATE(new_itstate);
21712         DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
21713         goto decode_success;
21714      }
21715   }
21716
21717   /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
21718   /* Load Register Signed Byte Unprivileged:
21719      ldrsbt Rt, [Rn, #imm8]
21720   */
21721   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
21722       && INSN1(11,8) == BITS4(1,1,1,0)) {
21723      UInt rN    = INSN0(3,0);
21724      Bool valid = True;
21725      UInt rT    = INSN1(15,12);
21726      UInt imm8  = INSN1(7,0);
21727      if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
21728      if (isBadRegT(rT)) valid = False;
21729      if (valid) {
21730         put_ITSTATE(old_itstate);
21731         IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
21732         IRTemp newRt = newTemp(Ity_I32);
21733         loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
21734         putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21735         put_ITSTATE(new_itstate);
21736         DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
21737         goto decode_success;
21738      }
21739   }
21740
21741   /* -------------- (T1) PLI reg+#imm12 -------------- */
21742   /* Preload Instruction:
21743      pli [Rn, #imm12]
21744   */
21745   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
21746       && INSN1(15,12) == BITS4(1,1,1,1)) {
21747      UInt rN    = INSN0(3,0);
21748      UInt imm12 = INSN1(11,0);
21749      if (rN != 15) {
21750         DIP("pli [r%u, #%u]\n", rN, imm12);
21751         goto decode_success;
21752      }
21753   }
21754
21755   /* -------------- (T2) PLI reg-#imm8 -------------- */
21756   /* Preload Instruction:
21757      pli [Rn, #-imm8]
21758   */
21759   if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
21760       && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
21761      UInt rN   = INSN0(3,0);
21762      UInt imm8 = INSN1(7,0);
21763      if (rN != 15) {
21764         DIP("pli [r%u, #-%u]\n", rN, imm8);
21765         goto decode_success;
21766      }
21767   }
21768
21769   /* -------------- (T3) PLI PC+/-#imm12 -------------- */
21770   /* Preload Instruction:
21771      pli [PC, #+/-imm12]
21772   */
21773   if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
21774       && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
21775       && INSN1(15,12) == BITS4(1,1,1,1)) {
21776      UInt imm12 = INSN1(11,0);
21777      UInt bU    = INSN0(7,7);
21778      DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
21779      goto decode_success;
21780   }
21781
21782   /* ------------------- (T1) SMMLA{R} ------------------ */
21783   if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
21784       && INSN0(6,4) == BITS3(1,0,1)
21785       && INSN1(7,5) == BITS3(0,0,0)) {
21786      UInt bitR = INSN1(4,4);
21787      UInt rA = INSN1(15,12);
21788      UInt rD = INSN1(11,8);
21789      UInt rM = INSN1(3,0);
21790      UInt rN = INSN0(3,0);
21791      if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
21792         IRExpr* res
21793         = unop(Iop_64HIto32,
21794                binop(Iop_Add64,
21795                      binop(Iop_Add64,
21796                            binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
21797                            binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
21798                      mkU64(bitR ? 0x80000000ULL : 0ULL)));
21799         putIRegT(rD, res, condT);
21800         DIP("smmla%s r%u, r%u, r%u, r%u\n",
21801             bitR ? "r" : "", rD, rN, rM, rA);
21802         goto decode_success;
21803      }
21804   }
21805
21806   /* ----------------------------------------------------------- */
21807   /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
21808   /* ----------------------------------------------------------- */
21809
21810   if (INSN0(15,12) == BITS4(1,1,1,0)) {
21811      UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
21812      Bool ok_vfp = decode_CP10_CP11_instruction (
21813                       &dres, insn28, condT, ARMCondAL/*bogus*/,
21814                       True/*isT*/
21815                    );
21816      if (ok_vfp)
21817         goto decode_success;
21818   }
21819
21820   /* ----------------------------------------------------------- */
21821   /* -- NEON instructions (in Thumb mode)                     -- */
21822   /* ----------------------------------------------------------- */
21823
21824   if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
21825      UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
21826      Bool ok_neon = decode_NEON_instruction(
21827                        &dres, insn32, condT, True/*isT*/
21828                     );
21829      if (ok_neon)
21830         goto decode_success;
21831   }
21832
21833   /* ----------------------------------------------------------- */
21834   /* -- v6 media instructions (in Thumb mode)                 -- */
21835   /* ----------------------------------------------------------- */
21836
21837   { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
21838     Bool ok_v6m = decode_V6MEDIA_instruction(
21839                      &dres, insn32, condT, ARMCondAL/*bogus*/,
21840                      True/*isT*/
21841                   );
21842     if (ok_v6m)
21843        goto decode_success;
21844   }
21845
21846   /* ----------------------------------------------------------- */
21847   /* -- Undecodable                                           -- */
21848   /* ----------------------------------------------------------- */
21849
21850   goto decode_failure;
21851   /*NOTREACHED*/
21852
21853  decode_failure:
21854   /* All decode failures end up here. */
21855   if (sigill_diag)
21856      vex_printf("disInstr(thumb): unhandled instruction: "
21857                 "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
21858
21859   /* Back up ITSTATE to the initial value for this instruction.
21860      If we don't do that, any subsequent restart of the instruction
21861      will restart with the wrong value. */
21862   if (old_itstate != IRTemp_INVALID)
21863      put_ITSTATE(old_itstate);
21864
21865   /* Tell the dispatcher that this insn cannot be decoded, and so has
21866      not been executed, and (is currently) the next to be executed.
21867      R15 should be up-to-date since it made so at the start of each
21868      insn, but nevertheless be paranoid and update it again right
21869      now. */
21870   vassert(0 == (guest_R15_curr_instr_notENC & 1));
21871   llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
21872   dres.whatNext    = Dis_StopHere;
21873   dres.jk_StopHere = Ijk_NoDecode;
21874   dres.len         = 0;
21875   return dres;
21876
21877  decode_success:
21878   /* All decode successes end up here. */
21879   vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
21880   switch (dres.whatNext) {
21881      case Dis_Continue:
21882         llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
21883         break;
21884      case Dis_ResteerU:
21885      case Dis_ResteerC:
21886         llPutIReg(15, mkU32(dres.continueAt));
21887         break;
21888      case Dis_StopHere:
21889         break;
21890      default:
21891         vassert(0);
21892   }
21893
21894   DIP("\n");
21895
21896   return dres;
21897
21898#  undef INSN0
21899#  undef INSN1
21900}
21901
21902#undef DIP
21903#undef DIS
21904
21905
21906/* Helper table for figuring out how many insns an IT insn
21907   conditionalises.
21908
21909   An ITxyz instruction of the format "1011 1111 firstcond mask"
21910   conditionalises some number of instructions, as indicated by the
21911   following table.  A value of zero indicates the instruction is
21912   invalid in some way.
21913
21914   mask = 0 means this isn't an IT instruction
21915   fc = 15 (NV) means unpredictable
21916
21917   The line fc = 14 (AL) is different from the others; there are
21918   additional constraints in this case.
21919
21920          mask(0 ..                   15)
21921        +--------------------------------
21922   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21923   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21924        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21925        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21926        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21927        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21928        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21929        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21930        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21931        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21932        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21933        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21934        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21935        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21936        | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
21937   15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
21938
21939   To be conservative with the analysis, let's rule out the mask = 0
21940   case, since that isn't an IT insn at all.  But for all the other
21941   cases where the table contains zero, that means unpredictable, so
21942   let's say 4 to be conservative.  Hence we have a safe value for any
21943   IT (mask,fc) pair that the CPU would actually identify as an IT
21944   instruction.  The final table is
21945
21946          mask(0 ..                   15)
21947        +--------------------------------
21948   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21949   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21950        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21951        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21952        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21953        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21954        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21955        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21956        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21957        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21958        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21959        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21960        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21961        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
21962        | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
21963   15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
21964*/
21965static const UChar it_length_table[256]
21966   = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21967       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21968       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21969       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21970       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21971       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21972       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21973       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21974       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21975       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21976       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21977       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21978       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21979       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
21980       0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
21981       0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
21982     };
21983
21984
21985/*------------------------------------------------------------*/
21986/*--- Top-level fn                                         ---*/
21987/*------------------------------------------------------------*/
21988
21989/* Disassemble a single instruction into IR.  The instruction
21990   is located in host memory at &guest_code[delta]. */
21991
21992DisResult disInstr_ARM ( IRSB*        irsb_IN,
21993                         Bool         (*resteerOkFn) ( void*, Addr64 ),
21994                         Bool         resteerCisOk,
21995                         void*        callback_opaque,
21996                         UChar*       guest_code_IN,
21997                         Long         delta_ENCODED,
21998                         Addr64       guest_IP_ENCODED,
21999                         VexArch      guest_arch,
22000                         VexArchInfo* archinfo,
22001                         VexAbiInfo*  abiinfo,
22002                         Bool         host_bigendian_IN,
22003                         Bool         sigill_diag_IN )
22004{
22005   DisResult dres;
22006   Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
22007
22008   /* Set globals (see top of this file) */
22009   vassert(guest_arch == VexArchARM);
22010
22011   irsb              = irsb_IN;
22012   host_is_bigendian = host_bigendian_IN;
22013   __curr_is_Thumb   = isThumb;
22014
22015   if (isThumb) {
22016      guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
22017   } else {
22018      guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
22019   }
22020
22021   if (isThumb) {
22022      dres = disInstr_THUMB_WRK ( resteerOkFn,
22023                                  resteerCisOk, callback_opaque,
22024                                  &guest_code_IN[delta_ENCODED - 1],
22025                                  archinfo, abiinfo, sigill_diag_IN );
22026   } else {
22027      dres = disInstr_ARM_WRK ( resteerOkFn,
22028                                resteerCisOk, callback_opaque,
22029                                &guest_code_IN[delta_ENCODED],
22030                                archinfo, abiinfo, sigill_diag_IN );
22031   }
22032
22033   return dres;
22034}
22035
22036/* Test program for the conversion of IRCmpF64Result values to VFP
22037   nzcv values.  See handling of FCMPD et al above. */
22038/*
22039UInt foo ( UInt x )
22040{
22041   UInt ix    = ((x >> 5) & 3) | (x & 1);
22042   UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
22043   UInt termR = (ix & (ix >> 1) & 1);
22044   return termL  -  termR;
22045}
22046
22047void try ( char* s, UInt ir, UInt req )
22048{
22049   UInt act = foo(ir);
22050   printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
22051          s, ir, (req >> 3) & 1, (req >> 2) & 1,
22052                 (req >> 1) & 1, (req >> 0) & 1,
22053                 (act >> 3) & 1, (act >> 2) & 1,
22054                 (act >> 1) & 1, (act >> 0) & 1, act);
22055
22056}
22057
22058int main ( void )
22059{
22060   printf("\n");
22061   try("UN", 0x45, 0b0011);
22062   try("LT", 0x01, 0b1000);
22063   try("GT", 0x00, 0b0010);
22064   try("EQ", 0x40, 0b0110);
22065   printf("\n");
22066   return 0;
22067}
22068*/
22069
22070/* Spare code for doing reference implementations of various 64-bit
22071   SIMD interleaves/deinterleaves/concatenation ops. */
22072/*
22073// Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
22074// the top halves guaranteed to be zero.
22075static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
22076                           IRTemp* out0, IRTemp v64 )
22077{
22078  if (out3) *out3 = newTemp(Ity_I32);
22079  if (out2) *out2 = newTemp(Ity_I32);
22080  if (out1) *out1 = newTemp(Ity_I32);
22081  if (out0) *out0 = newTemp(Ity_I32);
22082  IRTemp hi32 = newTemp(Ity_I32);
22083  IRTemp lo32 = newTemp(Ity_I32);
22084  assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
22085  assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
22086  if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
22087  if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
22088  if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
22089  if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
22090}
22091
22092// Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
22093// IRTemp.
22094static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
22095{
22096  IRTemp hi32 = newTemp(Ity_I32);
22097  IRTemp lo32 = newTemp(Ity_I32);
22098  assign(hi32,
22099         binop(Iop_Or32,
22100               binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
22101               binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
22102  assign(lo32,
22103         binop(Iop_Or32,
22104               binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
22105               binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
22106  IRTemp res = newTemp(Ity_I64);
22107  assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
22108  return res;
22109}
22110
22111static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
22112{
22113  // returns a1 b1 a0 b0
22114  IRTemp a1, a0, b1, b0;
22115  break64to16s(NULL, NULL, &a1, &a0, a3210);
22116  break64to16s(NULL, NULL, &b1, &b0, b3210);
22117  return mkexpr(mk64from16s(a1, b1, a0, b0));
22118}
22119
22120static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
22121{
22122  // returns a3 b3 a2 b2
22123  IRTemp a3, a2, b3, b2;
22124  break64to16s(&a3, &a2, NULL, NULL, a3210);
22125  break64to16s(&b3, &b2, NULL, NULL, b3210);
22126  return mkexpr(mk64from16s(a3, b3, a2, b2));
22127}
22128
22129static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22130{
22131  // returns a2 a0 b2 b0
22132  IRTemp a2, a0, b2, b0;
22133  break64to16s(NULL, &a2, NULL, &a0, a3210);
22134  break64to16s(NULL, &b2, NULL, &b0, b3210);
22135  return mkexpr(mk64from16s(a2, a0, b2, b0));
22136}
22137
22138static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22139{
22140  // returns a3 a1 b3 b1
22141  IRTemp a3, a1, b3, b1;
22142  break64to16s(&a3, NULL, &a1, NULL, a3210);
22143  break64to16s(&b3, NULL, &b1, NULL, b3210);
22144  return mkexpr(mk64from16s(a3, a1, b3, b1));
22145}
22146
22147static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22148{
22149  // returns a3 b3 a1 b1
22150  IRTemp a3, b3, a1, b1;
22151  break64to16s(&a3, NULL, &a1, NULL, a3210);
22152  break64to16s(&b3, NULL, &b1, NULL, b3210);
22153  return mkexpr(mk64from16s(a3, b3, a1, b1));
22154}
22155
22156static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
22157{
22158  // returns a2 b2 a0 b0
22159  IRTemp a2, b2, a0, b0;
22160  break64to16s(NULL, &a2, NULL, &a0, a3210);
22161  break64to16s(NULL, &b2, NULL, &b0, b3210);
22162  return mkexpr(mk64from16s(a2, b2, a0, b0));
22163}
22164
22165static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
22166                          IRTemp* out4, IRTemp* out3, IRTemp* out2,
22167                          IRTemp* out1,IRTemp* out0, IRTemp v64 )
22168{
22169  if (out7) *out7 = newTemp(Ity_I32);
22170  if (out6) *out6 = newTemp(Ity_I32);
22171  if (out5) *out5 = newTemp(Ity_I32);
22172  if (out4) *out4 = newTemp(Ity_I32);
22173  if (out3) *out3 = newTemp(Ity_I32);
22174  if (out2) *out2 = newTemp(Ity_I32);
22175  if (out1) *out1 = newTemp(Ity_I32);
22176  if (out0) *out0 = newTemp(Ity_I32);
22177  IRTemp hi32 = newTemp(Ity_I32);
22178  IRTemp lo32 = newTemp(Ity_I32);
22179  assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
22180  assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
22181  if (out7)
22182    assign(*out7, binop(Iop_And32,
22183                        binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
22184                        mkU32(0xFF)));
22185  if (out6)
22186    assign(*out6, binop(Iop_And32,
22187                        binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
22188                        mkU32(0xFF)));
22189  if (out5)
22190    assign(*out5, binop(Iop_And32,
22191                        binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
22192                        mkU32(0xFF)));
22193  if (out4)
22194    assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
22195  if (out3)
22196    assign(*out3, binop(Iop_And32,
22197                        binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
22198                        mkU32(0xFF)));
22199  if (out2)
22200    assign(*out2, binop(Iop_And32,
22201                        binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
22202                        mkU32(0xFF)));
22203  if (out1)
22204    assign(*out1, binop(Iop_And32,
22205                        binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
22206                        mkU32(0xFF)));
22207  if (out0)
22208    assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
22209}
22210
22211static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
22212                           IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
22213{
22214  IRTemp hi32 = newTemp(Ity_I32);
22215  IRTemp lo32 = newTemp(Ity_I32);
22216  assign(hi32,
22217         binop(Iop_Or32,
22218               binop(Iop_Or32,
22219                     binop(Iop_Shl32,
22220                           binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
22221                           mkU8(24)),
22222                     binop(Iop_Shl32,
22223                           binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
22224                           mkU8(16))),
22225               binop(Iop_Or32,
22226                     binop(Iop_Shl32,
22227                           binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
22228                     binop(Iop_And32,
22229                           mkexpr(in4), mkU32(0xFF)))));
22230  assign(lo32,
22231         binop(Iop_Or32,
22232               binop(Iop_Or32,
22233                     binop(Iop_Shl32,
22234                           binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
22235                           mkU8(24)),
22236                     binop(Iop_Shl32,
22237                           binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
22238                           mkU8(16))),
22239               binop(Iop_Or32,
22240                     binop(Iop_Shl32,
22241                           binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
22242                     binop(Iop_And32,
22243                           mkexpr(in0), mkU32(0xFF)))));
22244  IRTemp res = newTemp(Ity_I64);
22245  assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
22246  return res;
22247}
22248
22249static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
22250{
22251  // returns a3 b3 a2 b2 a1 b1 a0 b0
22252  IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
22253  break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
22254  break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
22255  return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
22256}
22257
22258static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
22259{
22260  // returns a7 b7 a6 b6 a5 b5 a4 b4
22261  IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
22262  break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
22263  break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
22264  return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
22265}
22266
22267static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22268{
22269  // returns a6 a4 a2 a0 b6 b4 b2 b0
22270  IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
22271  break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
22272  break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
22273  return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
22274}
22275
22276static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22277{
22278  // returns a7 a5 a3 a1 b7 b5 b3 b1
22279  IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
22280  break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
22281  break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
22282  return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
22283}
22284
22285static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22286{
22287  // returns a6 b6 a4 b4 a2 b2 a0 b0
22288  IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
22289  break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
22290  break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
22291  return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
22292}
22293
22294static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
22295{
22296  // returns a7 b7 a5 b5 a3 b3 a1 b1
22297  IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
22298  break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
22299  break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
22300  return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
22301}
22302
22303static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
22304{
22305  // returns a0 b0
22306  return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
22307                             unop(Iop_64to32, mkexpr(b10)));
22308}
22309
22310static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
22311{
22312  // returns a1 b1
22313  return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
22314                             unop(Iop_64HIto32, mkexpr(b10)));
22315}
22316*/
22317
22318/*--------------------------------------------------------------------*/
22319/*--- end                                         guest_arm_toIR.c ---*/
22320/*--------------------------------------------------------------------*/
22321