1
2/*--------------------------------------------------------------------*/
3/*--- begin                                       guest_arm_toIR.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2013 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36/* XXXX thumb to check:
37   that all cases where putIRegT writes r15, we generate a jump.
38
39   All uses of newTemp assign to an IRTemp and not a UInt
40
41   For all thumb loads and stores, including VFP ones, new-ITSTATE is
42   backed out before the memory op, and restored afterwards.  This
43   needs to happen even after we go uncond.  (and for sure it doesn't
44   happen for VFP loads/stores right now).
45
46   VFP on thumb: check that we exclude all r13/r15 cases that we
47   should.
48
49   XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
50   taking into account the number of insns guarded by an IT.
51
52   remove the nasty hack, in the spechelper, of looking for Or32(...,
53   0xE0) in as the first arg to armg_calculate_condition, and instead
54   use Slice44 as specified in comments in the spechelper.
55
56   add specialisations for armg_calculate_flag_c and _v, as they
57   are moderately often needed in Thumb code.
58
59   Correctness: ITSTATE handling in Thumb SVCs is wrong.
60
61   Correctness (obscure): in m_transtab, when invalidating code
62   address ranges, invalidate up to 18 bytes after the end of the
63   range.  This is because the ITSTATE optimisation at the top of
64   _THUMB_WRK below analyses up to 18 bytes before the start of any
65   given instruction, and so might depend on the invalidated area.
66*/
67
68/* Limitations, etc
69
70   - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
71     These instructions are non-restartable in the case where the
72     transfer(s) fault.
73
74   - SWP: the restart jump back is Ijk_Boring; it should be
75     Ijk_NoRedir but that's expensive.  See comments on casLE() in
76     guest_x86_toIR.c.
77*/
78
79/* "Special" instructions.
80
81   This instruction decoder can decode four special instructions
82   which mean nothing natively (are no-ops as far as regs/mem are
83   concerned) but have meaning for supporting Valgrind.  A special
84   instruction is flagged by a 16-byte preamble:
85
86      E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
87      (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
88       mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
89
90   Following that, one of the following 3 are allowed
91   (standard interpretation in parentheses):
92
93      E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
94      E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
95      E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
96      E1899009 (orr r9,r9,r9)      IR injection
97
98   Any other bytes following the 16-byte preamble are illegal and
99   constitute a failure in instruction decoding.  This all assumes
100   that the preamble will never occur except in specific code
101   fragments designed for Valgrind to catch.
102*/
103
104/* Translates ARM(v5) code to IR. */
105
106#include "libvex_basictypes.h"
107#include "libvex_ir.h"
108#include "libvex.h"
109#include "libvex_guest_arm.h"
110
111#include "main_util.h"
112#include "main_globals.h"
113#include "guest_generic_bb_to_IR.h"
114#include "guest_arm_defs.h"
115
116
117/*------------------------------------------------------------*/
118/*--- Globals                                              ---*/
119/*------------------------------------------------------------*/
120
121/* These are set at the start of the translation of a instruction, so
122   that we don't have to pass them around endlessly.  CONST means does
123   not change during translation of the instruction.
124*/
125
126/* CONST: is the host bigendian?  This has to do with float vs double
127   register accesses on VFP, but it's complex and not properly thought
128   out. */
129static Bool host_is_bigendian;
130
131/* CONST: The guest address for the instruction currently being
132   translated.  This is the real, "decoded" address (not subject
133   to the CPSR.T kludge). */
134static Addr32 guest_R15_curr_instr_notENC;
135
136/* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
137   insn is Thumb (True) or ARM (False). */
138static Bool __curr_is_Thumb;
139
140/* MOD: The IRSB* into which we're generating code. */
141static IRSB* irsb;
142
143/* These are to do with handling writes to r15.  They are initially
144   set at the start of disInstr_ARM_WRK to indicate no update,
145   possibly updated during the routine, and examined again at the end.
146   If they have been set to indicate a r15 update then a jump is
147   generated.  Note, "explicit" jumps (b, bx, etc) are generated
148   directly, not using this mechanism -- this is intended to handle
149   the implicit-style jumps resulting from (eg) assigning to r15 as
150   the result of insns we wouldn't normally consider branchy. */
151
152/* MOD.  Initially False; set to True iff abovementioned handling is
153   required. */
154static Bool r15written;
155
156/* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
157   is conditional, this holds the gating IRTemp :: Ity_I32.  If the
158   branch to be generated is unconditional, this remains
159   IRTemp_INVALID. */
160static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
161
162/* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
163   this holds the jump kind. */
164static IRTemp r15kind;
165
166
167/*------------------------------------------------------------*/
168/*--- Debugging output                                     ---*/
169/*------------------------------------------------------------*/
170
171#define DIP(format, args...)           \
172   if (vex_traceflags & VEX_TRACE_FE)  \
173      vex_printf(format, ## args)
174
175#define DIS(buf, format, args...)      \
176   if (vex_traceflags & VEX_TRACE_FE)  \
177      vex_sprintf(buf, format, ## args)
178
179#define ASSERT_IS_THUMB \
180   do { vassert(__curr_is_Thumb); } while (0)
181
182#define ASSERT_IS_ARM \
183   do { vassert(! __curr_is_Thumb); } while (0)
184
185
186/*------------------------------------------------------------*/
187/*--- Helper bits and pieces for deconstructing the        ---*/
188/*--- arm insn stream.                                     ---*/
189/*------------------------------------------------------------*/
190
191/* Do a little-endian load of a 32-bit word, regardless of the
192   endianness of the underlying host. */
193static inline UInt getUIntLittleEndianly ( UChar* p )
194{
195   UInt w = 0;
196   w = (w << 8) | p[3];
197   w = (w << 8) | p[2];
198   w = (w << 8) | p[1];
199   w = (w << 8) | p[0];
200   return w;
201}
202
203/* Do a little-endian load of a 16-bit word, regardless of the
204   endianness of the underlying host. */
205static inline UShort getUShortLittleEndianly ( UChar* p )
206{
207   UShort w = 0;
208   w = (w << 8) | p[1];
209   w = (w << 8) | p[0];
210   return w;
211}
212
213static UInt ROR32 ( UInt x, UInt sh ) {
214   vassert(sh >= 0 && sh < 32);
215   if (sh == 0)
216      return x;
217   else
218      return (x << (32-sh)) | (x >> sh);
219}
220
221static Int popcount32 ( UInt x )
222{
223   Int res = 0, i;
224   for (i = 0; i < 32; i++) {
225      res += (x & 1);
226      x >>= 1;
227   }
228   return res;
229}
230
231static UInt setbit32 ( UInt x, Int ix, UInt b )
232{
233   UInt mask = 1 << ix;
234   x &= ~mask;
235   x |= ((b << ix) & mask);
236   return x;
237}
238
239#define BITS2(_b1,_b0) \
240   (((_b1) << 1) | (_b0))
241
242#define BITS3(_b2,_b1,_b0)                      \
243  (((_b2) << 2) | ((_b1) << 1) | (_b0))
244
245#define BITS4(_b3,_b2,_b1,_b0) \
246   (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
247
248#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
249   ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
250    | BITS4((_b3),(_b2),(_b1),(_b0)))
251
252#define BITS5(_b4,_b3,_b2,_b1,_b0)  \
253   (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
254#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
255   (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
256#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
257   (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
258
259#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
260   (((_b8) << 8) \
261    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
262
263#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
264   (((_b9) << 9) | ((_b8) << 8)                                \
265    | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
266
267/* produces _uint[_bMax:_bMin] */
268#define SLICE_UInt(_uint,_bMax,_bMin) \
269   (( ((UInt)(_uint)) >> (_bMin)) \
270    & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
271
272
273/*------------------------------------------------------------*/
274/*--- Helper bits and pieces for creating IR fragments.    ---*/
275/*------------------------------------------------------------*/
276
277static IRExpr* mkU64 ( ULong i )
278{
279   return IRExpr_Const(IRConst_U64(i));
280}
281
282static IRExpr* mkU32 ( UInt i )
283{
284   return IRExpr_Const(IRConst_U32(i));
285}
286
287static IRExpr* mkU8 ( UInt i )
288{
289   vassert(i < 256);
290   return IRExpr_Const(IRConst_U8( (UChar)i ));
291}
292
293static IRExpr* mkexpr ( IRTemp tmp )
294{
295   return IRExpr_RdTmp(tmp);
296}
297
298static IRExpr* unop ( IROp op, IRExpr* a )
299{
300   return IRExpr_Unop(op, a);
301}
302
303static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
304{
305   return IRExpr_Binop(op, a1, a2);
306}
307
308static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
309{
310   return IRExpr_Triop(op, a1, a2, a3);
311}
312
313static IRExpr* loadLE ( IRType ty, IRExpr* addr )
314{
315   return IRExpr_Load(Iend_LE, ty, addr);
316}
317
318/* Add a statement to the list held by "irbb". */
319static void stmt ( IRStmt* st )
320{
321   addStmtToIRSB( irsb, st );
322}
323
324static void assign ( IRTemp dst, IRExpr* e )
325{
326   stmt( IRStmt_WrTmp(dst, e) );
327}
328
329static void storeLE ( IRExpr* addr, IRExpr* data )
330{
331   stmt( IRStmt_Store(Iend_LE, addr, data) );
332}
333
334static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
335{
336   if (guardT == IRTemp_INVALID) {
337      /* unconditional */
338      storeLE(addr, data);
339   } else {
340      stmt( IRStmt_StoreG(Iend_LE, addr, data,
341                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
342   }
343}
344
345static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
346                            IRExpr* addr, IRExpr* alt,
347                            IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
348{
349   if (guardT == IRTemp_INVALID) {
350      /* unconditional */
351      IRExpr* loaded = NULL;
352      switch (cvt) {
353         case ILGop_Ident32:
354            loaded = loadLE(Ity_I32, addr); break;
355         case ILGop_8Uto32:
356            loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
357         case ILGop_8Sto32:
358            loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
359         case ILGop_16Uto32:
360            loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
361         case ILGop_16Sto32:
362            loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
363         default:
364            vassert(0);
365      }
366      vassert(loaded != NULL);
367      assign(dst, loaded);
368   } else {
369      /* Generate a guarded load into 'dst', but apply 'cvt' to the
370         loaded data before putting the data in 'dst'.  If the load
371         does not take place, 'alt' is placed directly in 'dst'. */
372      stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
373                         binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
374   }
375}
376
377/* Generate a new temporary of the given type. */
378static IRTemp newTemp ( IRType ty )
379{
380   vassert(isPlausibleIRType(ty));
381   return newIRTemp( irsb->tyenv, ty );
382}
383
384/* Produces a value in 0 .. 3, which is encoded as per the type
385   IRRoundingMode. */
386static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
387{
388   return mkU32(Irrm_NEAREST);
389}
390
391/* Generate an expression for SRC rotated right by ROT. */
392static IRExpr* genROR32( IRTemp src, Int rot )
393{
394   vassert(rot >= 0 && rot < 32);
395   if (rot == 0)
396      return mkexpr(src);
397   return
398      binop(Iop_Or32,
399            binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
400            binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
401}
402
403static IRExpr* mkU128 ( ULong i )
404{
405   return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
406}
407
408/* Generate a 4-aligned version of the given expression if
409   the given condition is true.  Else return it unchanged. */
410static IRExpr* align4if ( IRExpr* e, Bool b )
411{
412   if (b)
413      return binop(Iop_And32, e, mkU32(~3));
414   else
415      return e;
416}
417
418
419/*------------------------------------------------------------*/
420/*--- Helpers for accessing guest registers.               ---*/
421/*------------------------------------------------------------*/
422
423#define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
424#define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
425#define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
426#define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
427#define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
428#define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
429#define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
430#define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
431#define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
432#define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
433#define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
434#define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
435#define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
436#define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
437#define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
438#define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
439
440#define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
441#define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
442#define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
443#define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
444#define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
445
446#define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
447#define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
448#define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
449#define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
450#define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
451#define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
452#define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
453#define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
454#define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
455#define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
456#define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
457#define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
458#define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
459#define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
460#define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
461#define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
462#define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
463#define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
464#define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
465#define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
466#define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
467#define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
468#define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
469#define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
470#define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
471#define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
472#define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
473#define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
474#define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
475#define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
476#define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
477#define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
478
479#define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
480#define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
481#define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
482#define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
483#define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
484#define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
485#define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
486#define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
487
488#define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
489#define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
490
491
492/* ---------------- Integer registers ---------------- */
493
494static Int integerGuestRegOffset ( UInt iregNo )
495{
496   /* Do we care about endianness here?  We do if sub-parts of integer
497      registers are accessed, but I don't think that ever happens on
498      ARM. */
499   switch (iregNo) {
500      case 0:  return OFFB_R0;
501      case 1:  return OFFB_R1;
502      case 2:  return OFFB_R2;
503      case 3:  return OFFB_R3;
504      case 4:  return OFFB_R4;
505      case 5:  return OFFB_R5;
506      case 6:  return OFFB_R6;
507      case 7:  return OFFB_R7;
508      case 8:  return OFFB_R8;
509      case 9:  return OFFB_R9;
510      case 10: return OFFB_R10;
511      case 11: return OFFB_R11;
512      case 12: return OFFB_R12;
513      case 13: return OFFB_R13;
514      case 14: return OFFB_R14;
515      case 15: return OFFB_R15T;
516      default: vassert(0);
517   }
518}
519
520/* Plain ("low level") read from a reg; no +8 offset magic for r15. */
521static IRExpr* llGetIReg ( UInt iregNo )
522{
523   vassert(iregNo < 16);
524   return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
525}
526
527/* Architected read from a reg in ARM mode.  This automagically adds 8
528   to all reads of r15. */
529static IRExpr* getIRegA ( UInt iregNo )
530{
531   IRExpr* e;
532   ASSERT_IS_ARM;
533   vassert(iregNo < 16);
534   if (iregNo == 15) {
535      /* If asked for r15, don't read the guest state value, as that
536         may not be up to date in the case where loop unrolling has
537         happened, because the first insn's write to the block is
538         omitted; hence in the 2nd and subsequent unrollings we don't
539         have a correct value in guest r15.  Instead produce the
540         constant that we know would be produced at this point. */
541      vassert(0 == (guest_R15_curr_instr_notENC & 3));
542      e = mkU32(guest_R15_curr_instr_notENC + 8);
543   } else {
544      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
545   }
546   return e;
547}
548
549/* Architected read from a reg in Thumb mode.  This automagically adds
550   4 to all reads of r15. */
551static IRExpr* getIRegT ( UInt iregNo )
552{
553   IRExpr* e;
554   ASSERT_IS_THUMB;
555   vassert(iregNo < 16);
556   if (iregNo == 15) {
557      /* Ditto comment in getIReg. */
558      vassert(0 == (guest_R15_curr_instr_notENC & 1));
559      e = mkU32(guest_R15_curr_instr_notENC + 4);
560   } else {
561      e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
562   }
563   return e;
564}
565
566/* Plain ("low level") write to a reg; no jump or alignment magic for
567   r15. */
568static void llPutIReg ( UInt iregNo, IRExpr* e )
569{
570   vassert(iregNo < 16);
571   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
572   stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
573}
574
575/* Architected write to an integer register in ARM mode.  If it is to
576   r15, record info so at the end of this insn's translation, a branch
577   to it can be made.  Also handles conditional writes to the
578   register: if guardT == IRTemp_INVALID then the write is
579   unconditional.  If writing r15, also 4-align it. */
580static void putIRegA ( UInt       iregNo,
581                       IRExpr*    e,
582                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
583                       IRJumpKind jk /* if a jump is generated */ )
584{
585   /* if writing r15, force e to be 4-aligned. */
586   // INTERWORKING FIXME.  this needs to be relaxed so that
587   // puts caused by LDMxx which load r15 interwork right.
588   // but is no aligned too relaxed?
589   //if (iregNo == 15)
590   //   e = binop(Iop_And32, e, mkU32(~3));
591   ASSERT_IS_ARM;
592   /* So, generate either an unconditional or a conditional write to
593      the reg. */
594   if (guardT == IRTemp_INVALID) {
595      /* unconditional write */
596      llPutIReg( iregNo, e );
597   } else {
598      llPutIReg( iregNo,
599                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
600                             e, llGetIReg(iregNo) ));
601   }
602   if (iregNo == 15) {
603      // assert against competing r15 updates.  Shouldn't
604      // happen; should be ruled out by the instr matching
605      // logic.
606      vassert(r15written == False);
607      vassert(r15guard   == IRTemp_INVALID);
608      vassert(r15kind    == Ijk_Boring);
609      r15written = True;
610      r15guard   = guardT;
611      r15kind    = jk;
612   }
613}
614
615
616/* Architected write to an integer register in Thumb mode.  Writes to
617   r15 are not allowed.  Handles conditional writes to the register:
618   if guardT == IRTemp_INVALID then the write is unconditional. */
619static void putIRegT ( UInt       iregNo,
620                       IRExpr*    e,
621                       IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
622{
623   /* So, generate either an unconditional or a conditional write to
624      the reg. */
625   ASSERT_IS_THUMB;
626   vassert(iregNo >= 0 && iregNo <= 14);
627   if (guardT == IRTemp_INVALID) {
628      /* unconditional write */
629      llPutIReg( iregNo, e );
630   } else {
631      llPutIReg( iregNo,
632                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
633                             e, llGetIReg(iregNo) ));
634   }
635}
636
637
638/* Thumb16 and Thumb32 only.
639   Returns true if reg is 13 or 15.  Implements the BadReg
640   predicate in the ARM ARM. */
641static Bool isBadRegT ( UInt r )
642{
643   vassert(r <= 15);
644   ASSERT_IS_THUMB;
645   return r == 13 || r == 15;
646}
647
648
649/* ---------------- Double registers ---------------- */
650
651static Int doubleGuestRegOffset ( UInt dregNo )
652{
653   /* Do we care about endianness here?  Probably do if we ever get
654      into the situation of dealing with the single-precision VFP
655      registers. */
656   switch (dregNo) {
657      case 0:  return OFFB_D0;
658      case 1:  return OFFB_D1;
659      case 2:  return OFFB_D2;
660      case 3:  return OFFB_D3;
661      case 4:  return OFFB_D4;
662      case 5:  return OFFB_D5;
663      case 6:  return OFFB_D6;
664      case 7:  return OFFB_D7;
665      case 8:  return OFFB_D8;
666      case 9:  return OFFB_D9;
667      case 10: return OFFB_D10;
668      case 11: return OFFB_D11;
669      case 12: return OFFB_D12;
670      case 13: return OFFB_D13;
671      case 14: return OFFB_D14;
672      case 15: return OFFB_D15;
673      case 16: return OFFB_D16;
674      case 17: return OFFB_D17;
675      case 18: return OFFB_D18;
676      case 19: return OFFB_D19;
677      case 20: return OFFB_D20;
678      case 21: return OFFB_D21;
679      case 22: return OFFB_D22;
680      case 23: return OFFB_D23;
681      case 24: return OFFB_D24;
682      case 25: return OFFB_D25;
683      case 26: return OFFB_D26;
684      case 27: return OFFB_D27;
685      case 28: return OFFB_D28;
686      case 29: return OFFB_D29;
687      case 30: return OFFB_D30;
688      case 31: return OFFB_D31;
689      default: vassert(0);
690   }
691}
692
693/* Plain ("low level") read from a VFP Dreg. */
694static IRExpr* llGetDReg ( UInt dregNo )
695{
696   vassert(dregNo < 32);
697   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
698}
699
700/* Architected read from a VFP Dreg. */
701static IRExpr* getDReg ( UInt dregNo ) {
702   return llGetDReg( dregNo );
703}
704
705/* Plain ("low level") write to a VFP Dreg. */
706static void llPutDReg ( UInt dregNo, IRExpr* e )
707{
708   vassert(dregNo < 32);
709   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
710   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
711}
712
713/* Architected write to a VFP Dreg.  Handles conditional writes to the
714   register: if guardT == IRTemp_INVALID then the write is
715   unconditional. */
716static void putDReg ( UInt    dregNo,
717                      IRExpr* e,
718                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
719{
720   /* So, generate either an unconditional or a conditional write to
721      the reg. */
722   if (guardT == IRTemp_INVALID) {
723      /* unconditional write */
724      llPutDReg( dregNo, e );
725   } else {
726      llPutDReg( dregNo,
727                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
728                             e, llGetDReg(dregNo) ));
729   }
730}
731
732/* And now exactly the same stuff all over again, but this time
733   taking/returning I64 rather than F64, to support 64-bit Neon
734   ops. */
735
736/* Plain ("low level") read from a Neon Integer Dreg. */
737static IRExpr* llGetDRegI64 ( UInt dregNo )
738{
739   vassert(dregNo < 32);
740   return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
741}
742
743/* Architected read from a Neon Integer Dreg. */
744static IRExpr* getDRegI64 ( UInt dregNo ) {
745   return llGetDRegI64( dregNo );
746}
747
748/* Plain ("low level") write to a Neon Integer Dreg. */
749static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
750{
751   vassert(dregNo < 32);
752   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
753   stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
754}
755
756/* Architected write to a Neon Integer Dreg.  Handles conditional
757   writes to the register: if guardT == IRTemp_INVALID then the write
758   is unconditional. */
759static void putDRegI64 ( UInt    dregNo,
760                         IRExpr* e,
761                         IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
762{
763   /* So, generate either an unconditional or a conditional write to
764      the reg. */
765   if (guardT == IRTemp_INVALID) {
766      /* unconditional write */
767      llPutDRegI64( dregNo, e );
768   } else {
769      llPutDRegI64( dregNo,
770                    IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
771                                e, llGetDRegI64(dregNo) ));
772   }
773}
774
775/* ---------------- Quad registers ---------------- */
776
777static Int quadGuestRegOffset ( UInt qregNo )
778{
779   /* Do we care about endianness here?  Probably do if we ever get
780      into the situation of dealing with the 64 bit Neon registers. */
781   switch (qregNo) {
782      case 0:  return OFFB_D0;
783      case 1:  return OFFB_D2;
784      case 2:  return OFFB_D4;
785      case 3:  return OFFB_D6;
786      case 4:  return OFFB_D8;
787      case 5:  return OFFB_D10;
788      case 6:  return OFFB_D12;
789      case 7:  return OFFB_D14;
790      case 8:  return OFFB_D16;
791      case 9:  return OFFB_D18;
792      case 10: return OFFB_D20;
793      case 11: return OFFB_D22;
794      case 12: return OFFB_D24;
795      case 13: return OFFB_D26;
796      case 14: return OFFB_D28;
797      case 15: return OFFB_D30;
798      default: vassert(0);
799   }
800}
801
802/* Plain ("low level") read from a Neon Qreg. */
803static IRExpr* llGetQReg ( UInt qregNo )
804{
805   vassert(qregNo < 16);
806   return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
807}
808
809/* Architected read from a Neon Qreg. */
810static IRExpr* getQReg ( UInt qregNo ) {
811   return llGetQReg( qregNo );
812}
813
814/* Plain ("low level") write to a Neon Qreg. */
815static void llPutQReg ( UInt qregNo, IRExpr* e )
816{
817   vassert(qregNo < 16);
818   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
819   stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
820}
821
822/* Architected write to a Neon Qreg.  Handles conditional writes to the
823   register: if guardT == IRTemp_INVALID then the write is
824   unconditional. */
825static void putQReg ( UInt    qregNo,
826                      IRExpr* e,
827                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
828{
829   /* So, generate either an unconditional or a conditional write to
830      the reg. */
831   if (guardT == IRTemp_INVALID) {
832      /* unconditional write */
833      llPutQReg( qregNo, e );
834   } else {
835      llPutQReg( qregNo,
836                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
837                             e, llGetQReg(qregNo) ));
838   }
839}
840
841
842/* ---------------- Float registers ---------------- */
843
844static Int floatGuestRegOffset ( UInt fregNo )
845{
846   /* Start with the offset of the containing double, and then correct
847      for endianness.  Actually this is completely bogus and needs
848      careful thought. */
849   Int off;
850   vassert(fregNo < 32);
851   off = doubleGuestRegOffset(fregNo >> 1);
852   if (host_is_bigendian) {
853      vassert(0);
854   } else {
855      if (fregNo & 1)
856         off += 4;
857   }
858   return off;
859}
860
861/* Plain ("low level") read from a VFP Freg. */
862static IRExpr* llGetFReg ( UInt fregNo )
863{
864   vassert(fregNo < 32);
865   return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
866}
867
868/* Architected read from a VFP Freg. */
869static IRExpr* getFReg ( UInt fregNo ) {
870   return llGetFReg( fregNo );
871}
872
873/* Plain ("low level") write to a VFP Freg. */
874static void llPutFReg ( UInt fregNo, IRExpr* e )
875{
876   vassert(fregNo < 32);
877   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
878   stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
879}
880
881/* Architected write to a VFP Freg.  Handles conditional writes to the
882   register: if guardT == IRTemp_INVALID then the write is
883   unconditional. */
884static void putFReg ( UInt    fregNo,
885                      IRExpr* e,
886                      IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
887{
888   /* So, generate either an unconditional or a conditional write to
889      the reg. */
890   if (guardT == IRTemp_INVALID) {
891      /* unconditional write */
892      llPutFReg( fregNo, e );
893   } else {
894      llPutFReg( fregNo,
895                 IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
896                             e, llGetFReg(fregNo) ));
897   }
898}
899
900
901/* ---------------- Misc registers ---------------- */
902
903static void putMiscReg32 ( UInt    gsoffset,
904                           IRExpr* e, /* :: Ity_I32 */
905                           IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
906{
907   switch (gsoffset) {
908      case OFFB_FPSCR:   break;
909      case OFFB_QFLAG32: break;
910      case OFFB_GEFLAG0: break;
911      case OFFB_GEFLAG1: break;
912      case OFFB_GEFLAG2: break;
913      case OFFB_GEFLAG3: break;
914      default: vassert(0); /* awaiting more cases */
915   }
916   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
917
918   if (guardT == IRTemp_INVALID) {
919      /* unconditional write */
920      stmt(IRStmt_Put(gsoffset, e));
921   } else {
922      stmt(IRStmt_Put(
923         gsoffset,
924         IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
925                     e, IRExpr_Get(gsoffset, Ity_I32) )
926      ));
927   }
928}
929
930static IRTemp get_ITSTATE ( void )
931{
932   ASSERT_IS_THUMB;
933   IRTemp t = newTemp(Ity_I32);
934   assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
935   return t;
936}
937
938static void put_ITSTATE ( IRTemp t )
939{
940   ASSERT_IS_THUMB;
941   stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
942}
943
944static IRTemp get_QFLAG32 ( void )
945{
946   IRTemp t = newTemp(Ity_I32);
947   assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
948   return t;
949}
950
951static void put_QFLAG32 ( IRTemp t, IRTemp condT )
952{
953   putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
954}
955
956/* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
957   Status Register) to indicate that overflow or saturation occurred.
958   Nb: t must be zero to denote no saturation, and any nonzero
959   value to indicate saturation. */
960static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
961{
962   IRTemp old = get_QFLAG32();
963   IRTemp nyu = newTemp(Ity_I32);
964   assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
965   put_QFLAG32(nyu, condT);
966}
967
968/* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
969   flagNo: which flag bit to set [3...0]
970   lowbits_to_ignore:  0 = look at all 32 bits
971                       8 = look at top 24 bits only
972                      16 = look at top 16 bits only
973                      31 = look at the top bit only
974   e: input value to be evaluated.
975   The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
976   masked out.  If the resulting value is zero then the GE flag is
977   set to 0; any other value sets the flag to 1. */
978static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
979                           Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
980                           IRExpr* e,             /* Ity_I32 */
981                           IRTemp condT )
982{
983   vassert( flagNo >= 0 && flagNo <= 3 );
984   vassert( lowbits_to_ignore == 0  ||
985            lowbits_to_ignore == 8  ||
986            lowbits_to_ignore == 16 ||
987            lowbits_to_ignore == 31 );
988   IRTemp masked = newTemp(Ity_I32);
989   assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
990
991   switch (flagNo) {
992      case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
993      case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
994      case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
995      case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
996      default: vassert(0);
997   }
998}
999
1000/* Return the (32-bit, zero-or-nonzero representation scheme) of
1001   the specified GE flag. */
1002static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
1003{
1004   switch (flagNo) {
1005      case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
1006      case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
1007      case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
1008      case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
1009      default: vassert(0);
1010   }
1011}
1012
1013/* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
1014   2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
1015   15 of the value.  All other bits are ignored. */
1016static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
1017{
1018   IRTemp ge10 = newTemp(Ity_I32);
1019   IRTemp ge32 = newTemp(Ity_I32);
1020   assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1021   assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1022   put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
1023   put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
1024   put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
1025   put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
1026}
1027
1028
1029/* Set all 4 GE flags from the given 32-bit value as follows: GE 3
1030   from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
1031   bit 7.  All other bits are ignored. */
1032static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
1033{
1034   IRTemp ge0 = newTemp(Ity_I32);
1035   IRTemp ge1 = newTemp(Ity_I32);
1036   IRTemp ge2 = newTemp(Ity_I32);
1037   IRTemp ge3 = newTemp(Ity_I32);
1038   assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
1039   assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1040   assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
1041   assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1042   put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
1043   put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
1044   put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
1045   put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
1046}
1047
1048
1049/* ---------------- FPSCR stuff ---------------- */
1050
1051/* Generate IR to get hold of the rounding mode bits in FPSCR, and
1052   convert them to IR format.  Bind the final result to the
1053   returned temp. */
1054static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1055{
1056   /* The ARMvfp encoding for rounding mode bits is:
1057         00  to nearest
1058         01  to +infinity
1059         10  to -infinity
1060         11  to zero
1061      We need to convert that to the IR encoding:
1062         00  to nearest (the default)
1063         10  to +infinity
1064         01  to -infinity
1065         11  to zero
1066      Which can be done by swapping bits 0 and 1.
1067      The rmode bits are at 23:22 in FPSCR.
1068   */
1069   IRTemp armEncd = newTemp(Ity_I32);
1070   IRTemp swapped = newTemp(Ity_I32);
1071   /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
1072      we don't zero out bits 24 and above, since the assignment to
1073      'swapped' will mask them out anyway. */
1074   assign(armEncd,
1075          binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
1076   /* Now swap them. */
1077   assign(swapped,
1078          binop(Iop_Or32,
1079                binop(Iop_And32,
1080                      binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1081                      mkU32(2)),
1082                binop(Iop_And32,
1083                      binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1084                      mkU32(1))
1085         ));
1086   return swapped;
1087}
1088
1089
1090/*------------------------------------------------------------*/
1091/*--- Helpers for flag handling and conditional insns      ---*/
1092/*------------------------------------------------------------*/
1093
1094static const HChar* name_ARMCondcode ( ARMCondcode cond )
1095{
1096   switch (cond) {
1097      case ARMCondEQ:  return "{eq}";
1098      case ARMCondNE:  return "{ne}";
1099      case ARMCondHS:  return "{hs}";  // or 'cs'
1100      case ARMCondLO:  return "{lo}";  // or 'cc'
1101      case ARMCondMI:  return "{mi}";
1102      case ARMCondPL:  return "{pl}";
1103      case ARMCondVS:  return "{vs}";
1104      case ARMCondVC:  return "{vc}";
1105      case ARMCondHI:  return "{hi}";
1106      case ARMCondLS:  return "{ls}";
1107      case ARMCondGE:  return "{ge}";
1108      case ARMCondLT:  return "{lt}";
1109      case ARMCondGT:  return "{gt}";
1110      case ARMCondLE:  return "{le}";
1111      case ARMCondAL:  return ""; // {al}: is the default
1112      case ARMCondNV:  return "{nv}";
1113      default: vpanic("name_ARMCondcode");
1114   }
1115}
1116/* and a handy shorthand for it */
1117static const HChar* nCC ( ARMCondcode cond ) {
1118   return name_ARMCondcode(cond);
1119}
1120
1121
1122/* Build IR to calculate some particular condition from stored
1123   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1124   Ity_I32, suitable for narrowing.  Although the return type is
1125   Ity_I32, the returned value is either 0 or 1.  'cond' must be
1126   :: Ity_I32 and must denote the condition to compute in
1127   bits 7:4, and be zero everywhere else.
1128*/
1129static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
1130{
1131   vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
1132   /* And 'cond' had better produce a value in which only bits 7:4 are
1133      nonzero.  However, obviously we can't assert for that. */
1134
1135   /* So what we're constructing for the first argument is
1136      "(cond << 4) | stored-operation".
1137      However, as per comments above, 'cond' must be supplied
1138      pre-shifted to this function.
1139
1140      This pairing scheme requires that the ARM_CC_OP_ values all fit
1141      in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1142      8 bits of the first argument. */
1143   IRExpr** args
1144      = mkIRExprVec_4(
1145           binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
1146           IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1147           IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1148           IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
1149        );
1150   IRExpr* call
1151      = mkIRExprCCall(
1152           Ity_I32,
1153           0/*regparm*/,
1154           "armg_calculate_condition", &armg_calculate_condition,
1155           args
1156        );
1157
1158   /* Exclude the requested condition, OP and NDEP from definedness
1159      checking.  We're only interested in DEP1 and DEP2. */
1160   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1161   return call;
1162}
1163
1164
1165/* Build IR to calculate some particular condition from stored
1166   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1167   Ity_I32, suitable for narrowing.  Although the return type is
1168   Ity_I32, the returned value is either 0 or 1.
1169*/
1170static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
1171{
1172  /* First arg is "(cond << 4) | condition".  This requires that the
1173     ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1174     (COND, OP) pair in the lowest 8 bits of the first argument. */
1175   vassert(cond >= 0 && cond <= 15);
1176   return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
1177}
1178
1179
1180/* Build IR to calculate just the carry flag from stored
1181   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1182   Ity_I32. */
1183static IRExpr* mk_armg_calculate_flag_c ( void )
1184{
1185   IRExpr** args
1186      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1187                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1188                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1189                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1190   IRExpr* call
1191      = mkIRExprCCall(
1192           Ity_I32,
1193           0/*regparm*/,
1194           "armg_calculate_flag_c", &armg_calculate_flag_c,
1195           args
1196        );
1197   /* Exclude OP and NDEP from definedness checking.  We're only
1198      interested in DEP1 and DEP2. */
1199   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1200   return call;
1201}
1202
1203
1204/* Build IR to calculate just the overflow flag from stored
1205   CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1206   Ity_I32. */
1207static IRExpr* mk_armg_calculate_flag_v ( void )
1208{
1209   IRExpr** args
1210      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1211                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1212                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1213                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1214   IRExpr* call
1215      = mkIRExprCCall(
1216           Ity_I32,
1217           0/*regparm*/,
1218           "armg_calculate_flag_v", &armg_calculate_flag_v,
1219           args
1220        );
1221   /* Exclude OP and NDEP from definedness checking.  We're only
1222      interested in DEP1 and DEP2. */
1223   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1224   return call;
1225}
1226
1227
1228/* Build IR to calculate N Z C V in bits 31:28 of the
1229   returned word. */
1230static IRExpr* mk_armg_calculate_flags_nzcv ( void )
1231{
1232   IRExpr** args
1233      = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1234                       IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1235                       IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1236                       IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1237   IRExpr* call
1238      = mkIRExprCCall(
1239           Ity_I32,
1240           0/*regparm*/,
1241           "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
1242           args
1243        );
1244   /* Exclude OP and NDEP from definedness checking.  We're only
1245      interested in DEP1 and DEP2. */
1246   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1247   return call;
1248}
1249
1250static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
1251{
1252   IRExpr** args1;
1253   IRExpr** args2;
1254   IRExpr *call1, *call2, *res;
1255
1256   if (Q) {
1257      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
1258                              binop(Iop_GetElem32x4, resL, mkU8(1)),
1259                              binop(Iop_GetElem32x4, resR, mkU8(0)),
1260                              binop(Iop_GetElem32x4, resR, mkU8(1)) );
1261      args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
1262                              binop(Iop_GetElem32x4, resL, mkU8(3)),
1263                              binop(Iop_GetElem32x4, resR, mkU8(2)),
1264                              binop(Iop_GetElem32x4, resR, mkU8(3)) );
1265   } else {
1266      args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
1267                              binop(Iop_GetElem32x2, resL, mkU8(1)),
1268                              binop(Iop_GetElem32x2, resR, mkU8(0)),
1269                              binop(Iop_GetElem32x2, resR, mkU8(1)) );
1270   }
1271
1272   call1 = mkIRExprCCall(
1273             Ity_I32,
1274             0/*regparm*/,
1275             "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1276             args1
1277          );
1278   if (Q) {
1279      call2 = mkIRExprCCall(
1280                Ity_I32,
1281                0/*regparm*/,
1282                "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1283                args2
1284             );
1285   }
1286   if (Q) {
1287      res = binop(Iop_Or32, call1, call2);
1288   } else {
1289      res = call1;
1290   }
1291   return res;
1292}
1293
1294// FIXME: this is named wrongly .. looks like a sticky set of
1295// QC, not a write to it.
1296static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
1297                         IRTemp condT )
1298{
1299   putMiscReg32 (OFFB_FPSCR,
1300                 binop(Iop_Or32,
1301                       IRExpr_Get(OFFB_FPSCR, Ity_I32),
1302                       binop(Iop_Shl32,
1303                             mk_armg_calculate_flag_qc(resL, resR, Q),
1304                             mkU8(27))),
1305                 condT);
1306}
1307
1308/* Build IR to conditionally set the flags thunk.  As with putIReg, if
1309   guard is IRTemp_INVALID then it's unconditional, else it holds a
1310   condition :: Ity_I32. */
1311static
1312void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
1313                         IRTemp t_dep2, IRTemp t_ndep,
1314                         IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1315{
1316   vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
1317   vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
1318   vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
1319   vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
1320   if (guardT == IRTemp_INVALID) {
1321      /* unconditional */
1322      stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
1323      stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1324      stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1325      stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1326   } else {
1327      /* conditional */
1328      IRTemp c1 = newTemp(Ity_I1);
1329      assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
1330      stmt( IRStmt_Put(
1331               OFFB_CC_OP,
1332               IRExpr_ITE( mkexpr(c1),
1333                           mkU32(cc_op),
1334                           IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
1335      stmt( IRStmt_Put(
1336               OFFB_CC_DEP1,
1337               IRExpr_ITE( mkexpr(c1),
1338                           mkexpr(t_dep1),
1339                           IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
1340      stmt( IRStmt_Put(
1341               OFFB_CC_DEP2,
1342               IRExpr_ITE( mkexpr(c1),
1343                           mkexpr(t_dep2),
1344                           IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
1345      stmt( IRStmt_Put(
1346               OFFB_CC_NDEP,
1347               IRExpr_ITE( mkexpr(c1),
1348                           mkexpr(t_ndep),
1349                           IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
1350   }
1351}
1352
1353
1354/* Minor variant of the above that sets NDEP to zero (if it
1355   sets it at all) */
1356static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1357                             IRTemp t_dep2,
1358                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1359{
1360   IRTemp z32 = newTemp(Ity_I32);
1361   assign( z32, mkU32(0) );
1362   setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1363}
1364
1365
1366/* Minor variant of the above that sets DEP2 to zero (if it
1367   sets it at all) */
1368static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1369                             IRTemp t_ndep,
1370                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1371{
1372   IRTemp z32 = newTemp(Ity_I32);
1373   assign( z32, mkU32(0) );
1374   setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1375}
1376
1377
1378/* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1379   sets them at all) */
1380static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1381                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1382{
1383   IRTemp z32 = newTemp(Ity_I32);
1384   assign( z32, mkU32(0) );
1385   setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1386}
1387
1388
1389/* ARM only */
1390/* Generate a side-exit to the next instruction, if the given guard
1391   expression :: Ity_I32 is 0 (note!  the side exit is taken if the
1392   condition is false!)  This is used to skip over conditional
1393   instructions which we can't generate straight-line code for, either
1394   because they are too complex or (more likely) they potentially
1395   generate exceptions.
1396*/
1397static void mk_skip_over_A32_if_cond_is_false (
1398               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1399            )
1400{
1401   ASSERT_IS_ARM;
1402   vassert(guardT != IRTemp_INVALID);
1403   vassert(0 == (guest_R15_curr_instr_notENC & 3));
1404   stmt( IRStmt_Exit(
1405            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1406            Ijk_Boring,
1407            IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
1408            OFFB_R15T
1409       ));
1410}
1411
1412/* Thumb16 only */
1413/* ditto, but jump over a 16-bit thumb insn */
1414static void mk_skip_over_T16_if_cond_is_false (
1415               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1416            )
1417{
1418   ASSERT_IS_THUMB;
1419   vassert(guardT != IRTemp_INVALID);
1420   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1421   stmt( IRStmt_Exit(
1422            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1423            Ijk_Boring,
1424            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
1425            OFFB_R15T
1426       ));
1427}
1428
1429
1430/* Thumb32 only */
1431/* ditto, but jump over a 32-bit thumb insn */
1432static void mk_skip_over_T32_if_cond_is_false (
1433               IRTemp guardT /* :: Ity_I32, 0 or 1 */
1434            )
1435{
1436   ASSERT_IS_THUMB;
1437   vassert(guardT != IRTemp_INVALID);
1438   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1439   stmt( IRStmt_Exit(
1440            unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1441            Ijk_Boring,
1442            IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
1443            OFFB_R15T
1444       ));
1445}
1446
1447
1448/* Thumb16 and Thumb32 only
1449   Generate a SIGILL followed by a restart of the current instruction
1450   if the given temp is nonzero. */
1451static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
1452{
1453   ASSERT_IS_THUMB;
1454   vassert(t != IRTemp_INVALID);
1455   vassert(0 == (guest_R15_curr_instr_notENC & 1));
1456   stmt(
1457      IRStmt_Exit(
1458         binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
1459         Ijk_NoDecode,
1460         IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
1461         OFFB_R15T
1462      )
1463   );
1464}
1465
1466
1467/* Inspect the old_itstate, and generate a SIGILL if it indicates that
1468   we are currently in an IT block and are not the last in the block.
1469   This also rolls back guest_ITSTATE to its old value before the exit
1470   and restores it to its new value afterwards.  This is so that if
1471   the exit is taken, we have an up to date version of ITSTATE
1472   available.  Without doing that, we have no hope of making precise
1473   exceptions work. */
1474static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
1475               IRTemp old_itstate /* :: Ity_I32 */,
1476               IRTemp new_itstate /* :: Ity_I32 */
1477            )
1478{
1479   ASSERT_IS_THUMB;
1480   put_ITSTATE(old_itstate); // backout
1481   IRTemp guards_for_next3 = newTemp(Ity_I32);
1482   assign(guards_for_next3,
1483          binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
1484   gen_SIGILL_T_if_nonzero(guards_for_next3);
1485   put_ITSTATE(new_itstate); //restore
1486}
1487
1488
1489/* Simpler version of the above, which generates a SIGILL if
1490   we're anywhere within an IT block. */
1491static void gen_SIGILL_T_if_in_ITBlock (
1492               IRTemp old_itstate /* :: Ity_I32 */,
1493               IRTemp new_itstate /* :: Ity_I32 */
1494            )
1495{
1496   put_ITSTATE(old_itstate); // backout
1497   gen_SIGILL_T_if_nonzero(old_itstate);
1498   put_ITSTATE(new_itstate); //restore
1499}
1500
1501
1502/* Generate an APSR value, from the NZCV thunk, and
1503   from QFLAG32 and GEFLAG0 .. GEFLAG3. */
1504static IRTemp synthesise_APSR ( void )
1505{
1506   IRTemp res1 = newTemp(Ity_I32);
1507   // Get NZCV
1508   assign( res1, mk_armg_calculate_flags_nzcv() );
1509   // OR in the Q value
1510   IRTemp res2 = newTemp(Ity_I32);
1511   assign(
1512      res2,
1513      binop(Iop_Or32,
1514            mkexpr(res1),
1515            binop(Iop_Shl32,
1516                  unop(Iop_1Uto32,
1517                       binop(Iop_CmpNE32,
1518                             mkexpr(get_QFLAG32()),
1519                             mkU32(0))),
1520                  mkU8(ARMG_CC_SHIFT_Q)))
1521   );
1522   // OR in GE0 .. GE3
1523   IRExpr* ge0
1524      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
1525   IRExpr* ge1
1526      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
1527   IRExpr* ge2
1528      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
1529   IRExpr* ge3
1530      = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
1531   IRTemp res3 = newTemp(Ity_I32);
1532   assign(res3,
1533          binop(Iop_Or32,
1534                mkexpr(res2),
1535                binop(Iop_Or32,
1536                      binop(Iop_Or32,
1537                            binop(Iop_Shl32, ge0, mkU8(16)),
1538                            binop(Iop_Shl32, ge1, mkU8(17))),
1539                      binop(Iop_Or32,
1540                            binop(Iop_Shl32, ge2, mkU8(18)),
1541                            binop(Iop_Shl32, ge3, mkU8(19))) )));
1542   return res3;
1543}
1544
1545
1546/* and the inverse transformation: given an APSR value,
1547   set the NZCV thunk, the Q flag, and the GE flags. */
1548static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
1549                                IRTemp apsrT, IRTemp condT )
1550{
1551   vassert(write_nzcvq || write_ge);
1552   if (write_nzcvq) {
1553      // Do NZCV
1554      IRTemp immT = newTemp(Ity_I32);
1555      assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
1556      setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
1557      // Do Q
1558      IRTemp qnewT = newTemp(Ity_I32);
1559      assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
1560      put_QFLAG32(qnewT, condT);
1561   }
1562   if (write_ge) {
1563      // Do GE3..0
1564      put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
1565                   condT);
1566      put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
1567                   condT);
1568      put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
1569                   condT);
1570      put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
1571                   condT);
1572   }
1573}
1574
1575
1576/*------------------------------------------------------------*/
1577/*--- Helpers for saturation                               ---*/
1578/*------------------------------------------------------------*/
1579
1580/* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
1581   (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
1582   (b) the floor is computed from the value of imm5.  these two fnsn
1583   should be commoned up. */
1584
1585/* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
1586   Optionally return flag resQ saying whether saturation occurred.
1587   See definition in manual, section A2.2.1, page 41
1588   (bits(N), boolean) UnsignedSatQ( integer i, integer N )
1589   {
1590     if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
1591     elsif ( i < 0 )    { result = 0; saturated = TRUE; }
1592     else               { result = i; saturated = FALSE; }
1593     return ( result<N-1:0>, saturated );
1594   }
1595*/
1596static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
1597                             IRTemp* resQ, /* OUT - Ity_I32  */
1598                             IRTemp regT,  /* value to clamp - Ity_I32 */
1599                             UInt imm5 )   /* saturation ceiling */
1600{
1601   UInt ceil  = (1 << imm5) - 1;    // (2^imm5)-1
1602   UInt floor = 0;
1603
1604   IRTemp nd0 = newTemp(Ity_I32);
1605   IRTemp nd1 = newTemp(Ity_I32);
1606   IRTemp nd2 = newTemp(Ity_I1);
1607   IRTemp nd3 = newTemp(Ity_I32);
1608   IRTemp nd4 = newTemp(Ity_I32);
1609   IRTemp nd5 = newTemp(Ity_I1);
1610   IRTemp nd6 = newTemp(Ity_I32);
1611
1612   assign( nd0, mkexpr(regT) );
1613   assign( nd1, mkU32(ceil) );
1614   assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1615   assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
1616   assign( nd4, mkU32(floor) );
1617   assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1618   assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
1619   assign( *res, mkexpr(nd6) );
1620
1621   /* if saturation occurred, then resQ is set to some nonzero value
1622      if sat did not occur, resQ is guaranteed to be zero. */
1623   if (resQ) {
1624      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1625   }
1626}
1627
1628
1629/* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
1630   Optionally return flag resQ saying whether saturation occurred.
1631   - see definition in manual, section A2.2.1, page 41
1632   (bits(N), boolean ) SignedSatQ( integer i, integer N )
1633   {
1634     if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
1635     elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
1636     else                      { result = i;           saturated = FALSE; }
1637     return ( result[N-1:0], saturated );
1638   }
1639*/
1640static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
1641                           UInt imm5,      /* saturation ceiling */
1642                           IRTemp* res,    /* OUT - Ity_I32 */
1643                           IRTemp* resQ )  /* OUT - Ity_I32  */
1644{
1645   Int ceil  =  (1 << (imm5-1)) - 1;  //  (2^(imm5-1))-1
1646   Int floor = -(1 << (imm5-1));      // -(2^(imm5-1))
1647
1648   IRTemp nd0 = newTemp(Ity_I32);
1649   IRTemp nd1 = newTemp(Ity_I32);
1650   IRTemp nd2 = newTemp(Ity_I1);
1651   IRTemp nd3 = newTemp(Ity_I32);
1652   IRTemp nd4 = newTemp(Ity_I32);
1653   IRTemp nd5 = newTemp(Ity_I1);
1654   IRTemp nd6 = newTemp(Ity_I32);
1655
1656   assign( nd0, mkexpr(regT) );
1657   assign( nd1, mkU32(ceil) );
1658   assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1659   assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
1660   assign( nd4, mkU32(floor) );
1661   assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1662   assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
1663   assign( *res, mkexpr(nd6) );
1664
1665   /* if saturation occurred, then resQ is set to some nonzero value
1666      if sat did not occur, resQ is guaranteed to be zero. */
1667   if (resQ) {
1668     assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1669   }
1670}
1671
1672
1673/* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
1674   overflow occurred for 32-bit addition.  Needs both args and the
1675   result.  HD p27. */
1676static
1677IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
1678                                      IRTemp argL, IRTemp argR )
1679{
1680   IRTemp res = newTemp(Ity_I32);
1681   assign(res, resE);
1682   return
1683      binop( Iop_Shr32,
1684             binop( Iop_And32,
1685                    binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
1686                    binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
1687             mkU8(31) );
1688}
1689
1690/* Similarly .. also from HD p27 .. */
1691static
1692IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
1693                                      IRTemp argL, IRTemp argR )
1694{
1695   IRTemp res = newTemp(Ity_I32);
1696   assign(res, resE);
1697   return
1698      binop( Iop_Shr32,
1699             binop( Iop_And32,
1700                    binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
1701                    binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
1702             mkU8(31) );
1703}
1704
1705
1706/*------------------------------------------------------------*/
1707/*--- Larger helpers                                       ---*/
1708/*------------------------------------------------------------*/
1709
1710/* Compute both the result and new C flag value for a LSL by an imm5
1711   or by a register operand.  May generate reads of the old C value
1712   (hence only safe to use before any writes to guest state happen).
1713   Are factored out so can be used by both ARM and Thumb.
1714
1715   Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
1716   "res" (the result)  is a.k.a. "shop", shifter operand
1717   "newC" (the new C)  is a.k.a. "shco", shifter carry out
1718
1719   The calling convention for res and newC is a bit funny.  They could
1720   be passed by value, but instead are passed by ref.
1721
1722   The C (shco) value computed must be zero in bits 31:1, as the IR
1723   optimisations for flag handling (guest_arm_spechelper) rely on
1724   that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
1725   for it.  Same applies to all these functions that compute shco
1726   after a shift or rotate, not just this one.
1727*/
1728
1729static void compute_result_and_C_after_LSL_by_imm5 (
1730               /*OUT*/HChar* buf,
1731               IRTemp* res,
1732               IRTemp* newC,
1733               IRTemp rMt, UInt shift_amt, /* operands */
1734               UInt rM      /* only for debug printing */
1735            )
1736{
1737   if (shift_amt == 0) {
1738      if (newC) {
1739         assign( *newC, mk_armg_calculate_flag_c() );
1740      }
1741      assign( *res, mkexpr(rMt) );
1742      DIS(buf, "r%u", rM);
1743   } else {
1744      vassert(shift_amt >= 1 && shift_amt <= 31);
1745      if (newC) {
1746         assign( *newC,
1747                 binop(Iop_And32,
1748                       binop(Iop_Shr32, mkexpr(rMt),
1749                                        mkU8(32 - shift_amt)),
1750                       mkU32(1)));
1751      }
1752      assign( *res,
1753              binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
1754      DIS(buf, "r%u, LSL #%u", rM, shift_amt);
1755   }
1756}
1757
1758
1759static void compute_result_and_C_after_LSL_by_reg (
1760               /*OUT*/HChar* buf,
1761               IRTemp* res,
1762               IRTemp* newC,
1763               IRTemp rMt, IRTemp rSt,  /* operands */
1764               UInt rM,    UInt rS      /* only for debug printing */
1765            )
1766{
1767   // shift left in range 0 .. 255
1768   // amt  = rS & 255
1769   // res  = amt < 32 ?  Rm << amt  : 0
1770   // newC = amt == 0     ? oldC  :
1771   //        amt in 1..32 ?  Rm[32-amt]  : 0
1772   IRTemp amtT = newTemp(Ity_I32);
1773   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1774   if (newC) {
1775      /* mux0X(amt == 0,
1776               mux0X(amt < 32,
1777                     0,
1778                     Rm[(32-amt) & 31]),
1779               oldC)
1780      */
1781      /* About the best you can do is pray that iropt is able
1782         to nuke most or all of the following junk. */
1783      IRTemp oldC = newTemp(Ity_I32);
1784      assign(oldC, mk_armg_calculate_flag_c() );
1785      assign(
1786         *newC,
1787         IRExpr_ITE(
1788            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1789            mkexpr(oldC),
1790            IRExpr_ITE(
1791               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1792               binop(Iop_And32,
1793                     binop(Iop_Shr32,
1794                           mkexpr(rMt),
1795                           unop(Iop_32to8,
1796                                binop(Iop_And32,
1797                                      binop(Iop_Sub32,
1798                                            mkU32(32),
1799                                            mkexpr(amtT)),
1800                                      mkU32(31)
1801                                )
1802                           )
1803                     ),
1804                     mkU32(1)
1805                     ),
1806               mkU32(0)
1807            )
1808         )
1809      );
1810   }
1811   // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1812   // Lhs of the & limits the shift to 31 bits, so as to
1813   // give known IR semantics.  Rhs of the & is all 1s for
1814   // Rs <= 31 and all 0s for Rs >= 32.
1815   assign(
1816      *res,
1817      binop(
1818         Iop_And32,
1819         binop(Iop_Shl32,
1820               mkexpr(rMt),
1821               unop(Iop_32to8,
1822                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1823         binop(Iop_Sar32,
1824               binop(Iop_Sub32,
1825                     mkexpr(amtT),
1826                     mkU32(32)),
1827               mkU8(31))));
1828    DIS(buf, "r%u, LSL r%u", rM, rS);
1829}
1830
1831
1832static void compute_result_and_C_after_LSR_by_imm5 (
1833               /*OUT*/HChar* buf,
1834               IRTemp* res,
1835               IRTemp* newC,
1836               IRTemp rMt, UInt shift_amt, /* operands */
1837               UInt rM      /* only for debug printing */
1838            )
1839{
1840   if (shift_amt == 0) {
1841      // conceptually a 32-bit shift, however:
1842      // res  = 0
1843      // newC = Rm[31]
1844      if (newC) {
1845         assign( *newC,
1846                 binop(Iop_And32,
1847                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1848                       mkU32(1)));
1849      }
1850      assign( *res, mkU32(0) );
1851      DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
1852   } else {
1853      // shift in range 1..31
1854      // res  = Rm >>u shift_amt
1855      // newC = Rm[shift_amt - 1]
1856      vassert(shift_amt >= 1 && shift_amt <= 31);
1857      if (newC) {
1858         assign( *newC,
1859                 binop(Iop_And32,
1860                       binop(Iop_Shr32, mkexpr(rMt),
1861                                        mkU8(shift_amt - 1)),
1862                       mkU32(1)));
1863      }
1864      assign( *res,
1865              binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
1866      DIS(buf, "r%u, LSR #%u", rM, shift_amt);
1867   }
1868}
1869
1870
1871static void compute_result_and_C_after_LSR_by_reg (
1872               /*OUT*/HChar* buf,
1873               IRTemp* res,
1874               IRTemp* newC,
1875               IRTemp rMt, IRTemp rSt,  /* operands */
1876               UInt rM,    UInt rS      /* only for debug printing */
1877            )
1878{
1879   // shift right in range 0 .. 255
1880   // amt = rS & 255
1881   // res  = amt < 32 ?  Rm >>u amt  : 0
1882   // newC = amt == 0     ? oldC  :
1883   //        amt in 1..32 ?  Rm[amt-1]  : 0
1884   IRTemp amtT = newTemp(Ity_I32);
1885   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1886   if (newC) {
1887      /* mux0X(amt == 0,
1888               mux0X(amt < 32,
1889                     0,
1890                     Rm[(amt-1) & 31]),
1891               oldC)
1892      */
1893      IRTemp oldC = newTemp(Ity_I32);
1894      assign(oldC, mk_armg_calculate_flag_c() );
1895      assign(
1896         *newC,
1897         IRExpr_ITE(
1898            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1899            mkexpr(oldC),
1900            IRExpr_ITE(
1901               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1902               binop(Iop_And32,
1903                     binop(Iop_Shr32,
1904                           mkexpr(rMt),
1905                           unop(Iop_32to8,
1906                                binop(Iop_And32,
1907                                      binop(Iop_Sub32,
1908                                            mkexpr(amtT),
1909                                            mkU32(1)),
1910                                      mkU32(31)
1911                                )
1912                           )
1913                     ),
1914                     mkU32(1)
1915                     ),
1916               mkU32(0)
1917            )
1918         )
1919      );
1920   }
1921   // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1922   // Lhs of the & limits the shift to 31 bits, so as to
1923   // give known IR semantics.  Rhs of the & is all 1s for
1924   // Rs <= 31 and all 0s for Rs >= 32.
1925   assign(
1926      *res,
1927      binop(
1928         Iop_And32,
1929         binop(Iop_Shr32,
1930               mkexpr(rMt),
1931               unop(Iop_32to8,
1932                    binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1933         binop(Iop_Sar32,
1934               binop(Iop_Sub32,
1935                     mkexpr(amtT),
1936                     mkU32(32)),
1937               mkU8(31))));
1938    DIS(buf, "r%u, LSR r%u", rM, rS);
1939}
1940
1941
1942static void compute_result_and_C_after_ASR_by_imm5 (
1943               /*OUT*/HChar* buf,
1944               IRTemp* res,
1945               IRTemp* newC,
1946               IRTemp rMt, UInt shift_amt, /* operands */
1947               UInt rM      /* only for debug printing */
1948            )
1949{
1950   if (shift_amt == 0) {
1951      // conceptually a 32-bit shift, however:
1952      // res  = Rm >>s 31
1953      // newC = Rm[31]
1954      if (newC) {
1955         assign( *newC,
1956                 binop(Iop_And32,
1957                       binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1958                       mkU32(1)));
1959      }
1960      assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
1961      DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
1962   } else {
1963      // shift in range 1..31
1964      // res = Rm >>s shift_amt
1965      // newC = Rm[shift_amt - 1]
1966      vassert(shift_amt >= 1 && shift_amt <= 31);
1967      if (newC) {
1968         assign( *newC,
1969                 binop(Iop_And32,
1970                       binop(Iop_Shr32, mkexpr(rMt),
1971                                        mkU8(shift_amt - 1)),
1972                       mkU32(1)));
1973      }
1974      assign( *res,
1975              binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
1976      DIS(buf, "r%u, ASR #%u", rM, shift_amt);
1977   }
1978}
1979
1980
1981static void compute_result_and_C_after_ASR_by_reg (
1982               /*OUT*/HChar* buf,
1983               IRTemp* res,
1984               IRTemp* newC,
1985               IRTemp rMt, IRTemp rSt,  /* operands */
1986               UInt rM,    UInt rS      /* only for debug printing */
1987            )
1988{
1989   // arithmetic shift right in range 0 .. 255
1990   // amt = rS & 255
1991   // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
1992   // newC = amt == 0     ? oldC  :
1993   //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
1994   IRTemp amtT = newTemp(Ity_I32);
1995   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1996   if (newC) {
1997      /* mux0X(amt == 0,
1998               mux0X(amt < 32,
1999                     Rm[31],
2000                     Rm[(amt-1) & 31])
2001               oldC)
2002      */
2003      IRTemp oldC = newTemp(Ity_I32);
2004      assign(oldC, mk_armg_calculate_flag_c() );
2005      assign(
2006         *newC,
2007         IRExpr_ITE(
2008            binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
2009            mkexpr(oldC),
2010            IRExpr_ITE(
2011               binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
2012               binop(Iop_And32,
2013                     binop(Iop_Shr32,
2014                           mkexpr(rMt),
2015                           unop(Iop_32to8,
2016                                binop(Iop_And32,
2017                                      binop(Iop_Sub32,
2018                                            mkexpr(amtT),
2019                                            mkU32(1)),
2020                                      mkU32(31)
2021                                )
2022                           )
2023                     ),
2024                     mkU32(1)
2025                     ),
2026               binop(Iop_And32,
2027                     binop(Iop_Shr32,
2028                           mkexpr(rMt),
2029                           mkU8(31)
2030                     ),
2031                     mkU32(1)
2032               )
2033            )
2034         )
2035      );
2036   }
2037   // (Rm >>s (amt <u 32 ? amt : 31))
2038   assign(
2039      *res,
2040      binop(
2041         Iop_Sar32,
2042         mkexpr(rMt),
2043         unop(
2044            Iop_32to8,
2045            IRExpr_ITE(
2046               binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
2047               mkexpr(amtT),
2048               mkU32(31)))));
2049    DIS(buf, "r%u, ASR r%u", rM, rS);
2050}
2051
2052
2053static void compute_result_and_C_after_ROR_by_reg (
2054               /*OUT*/HChar* buf,
2055               IRTemp* res,
2056               IRTemp* newC,
2057               IRTemp rMt, IRTemp rSt,  /* operands */
2058               UInt rM,    UInt rS      /* only for debug printing */
2059            )
2060{
2061   // rotate right in range 0 .. 255
2062   // amt = rS & 255
2063   // shop =  Rm `ror` (amt & 31)
2064   // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
2065   IRTemp amtT = newTemp(Ity_I32);
2066   assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2067   IRTemp amt5T = newTemp(Ity_I32);
2068   assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
2069   IRTemp oldC = newTemp(Ity_I32);
2070   assign(oldC, mk_armg_calculate_flag_c() );
2071   if (newC) {
2072      assign(
2073         *newC,
2074         IRExpr_ITE(
2075            binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
2076            binop(Iop_And32,
2077                  binop(Iop_Shr32,
2078                        mkexpr(rMt),
2079                        unop(Iop_32to8,
2080                             binop(Iop_And32,
2081                                   binop(Iop_Sub32,
2082                                         mkexpr(amtT),
2083                                         mkU32(1)
2084                                   ),
2085                                   mkU32(31)
2086                             )
2087                        )
2088                  ),
2089                  mkU32(1)
2090            ),
2091            mkexpr(oldC)
2092         )
2093      );
2094   }
2095   assign(
2096      *res,
2097      IRExpr_ITE(
2098         binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
2099         binop(Iop_Or32,
2100               binop(Iop_Shr32,
2101                     mkexpr(rMt),
2102                     unop(Iop_32to8, mkexpr(amt5T))
2103               ),
2104               binop(Iop_Shl32,
2105                     mkexpr(rMt),
2106                     unop(Iop_32to8,
2107                          binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
2108                     )
2109               )
2110               ),
2111         mkexpr(rMt)
2112      )
2113   );
2114   DIS(buf, "r%u, ROR r#%u", rM, rS);
2115}
2116
2117
2118/* Generate an expression corresponding to the immediate-shift case of
2119   a shifter operand.  This is used both for ARM and Thumb2.
2120
2121   Bind it to a temporary, and return that via *res.  If newC is
2122   non-NULL, also compute a value for the shifter's carry out (in the
2123   LSB of a word), bind it to a temporary, and return that via *shco.
2124
2125   Generates GETs from the guest state and is therefore not safe to
2126   use once we start doing PUTs to it, for any given instruction.
2127
2128   'how' is encoded thusly:
2129      00b LSL,  01b LSR,  10b ASR,  11b ROR
2130   Most but not all ARM and Thumb integer insns use this encoding.
2131   Be careful to ensure the right value is passed here.
2132*/
2133static void compute_result_and_C_after_shift_by_imm5 (
2134               /*OUT*/HChar* buf,
2135               /*OUT*/IRTemp* res,
2136               /*OUT*/IRTemp* newC,
2137               IRTemp  rMt,       /* reg to shift */
2138               UInt    how,       /* what kind of shift */
2139               UInt    shift_amt, /* shift amount (0..31) */
2140               UInt    rM         /* only for debug printing */
2141            )
2142{
2143   vassert(shift_amt < 32);
2144   vassert(how < 4);
2145
2146   switch (how) {
2147
2148      case 0:
2149         compute_result_and_C_after_LSL_by_imm5(
2150            buf, res, newC, rMt, shift_amt, rM
2151         );
2152         break;
2153
2154      case 1:
2155         compute_result_and_C_after_LSR_by_imm5(
2156            buf, res, newC, rMt, shift_amt, rM
2157         );
2158         break;
2159
2160      case 2:
2161         compute_result_and_C_after_ASR_by_imm5(
2162            buf, res, newC, rMt, shift_amt, rM
2163         );
2164         break;
2165
2166      case 3:
2167         if (shift_amt == 0) {
2168            IRTemp oldcT = newTemp(Ity_I32);
2169            // rotate right 1 bit through carry (?)
2170            // RRX -- described at ARM ARM A5-17
2171            // res  = (oldC << 31) | (Rm >>u 1)
2172            // newC = Rm[0]
2173            if (newC) {
2174               assign( *newC,
2175                       binop(Iop_And32, mkexpr(rMt), mkU32(1)));
2176            }
2177            assign( oldcT, mk_armg_calculate_flag_c() );
2178            assign( *res,
2179                    binop(Iop_Or32,
2180                          binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
2181                          binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
2182            DIS(buf, "r%u, RRX", rM);
2183         } else {
2184            // rotate right in range 1..31
2185            // res  = Rm `ror` shift_amt
2186            // newC = Rm[shift_amt - 1]
2187            vassert(shift_amt >= 1 && shift_amt <= 31);
2188            if (newC) {
2189               assign( *newC,
2190                       binop(Iop_And32,
2191                             binop(Iop_Shr32, mkexpr(rMt),
2192                                              mkU8(shift_amt - 1)),
2193                             mkU32(1)));
2194            }
2195            assign( *res,
2196                    binop(Iop_Or32,
2197                          binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
2198                          binop(Iop_Shl32, mkexpr(rMt),
2199                                           mkU8(32-shift_amt))));
2200            DIS(buf, "r%u, ROR #%u", rM, shift_amt);
2201         }
2202         break;
2203
2204      default:
2205         /*NOTREACHED*/
2206         vassert(0);
2207   }
2208}
2209
2210
2211/* Generate an expression corresponding to the register-shift case of
2212   a shifter operand.  This is used both for ARM and Thumb2.
2213
2214   Bind it to a temporary, and return that via *res.  If newC is
2215   non-NULL, also compute a value for the shifter's carry out (in the
2216   LSB of a word), bind it to a temporary, and return that via *shco.
2217
2218   Generates GETs from the guest state and is therefore not safe to
2219   use once we start doing PUTs to it, for any given instruction.
2220
2221   'how' is encoded thusly:
2222      00b LSL,  01b LSR,  10b ASR,  11b ROR
2223   Most but not all ARM and Thumb integer insns use this encoding.
2224   Be careful to ensure the right value is passed here.
2225*/
2226static void compute_result_and_C_after_shift_by_reg (
2227               /*OUT*/HChar*  buf,
2228               /*OUT*/IRTemp* res,
2229               /*OUT*/IRTemp* newC,
2230               IRTemp  rMt,       /* reg to shift */
2231               UInt    how,       /* what kind of shift */
2232               IRTemp  rSt,       /* shift amount */
2233               UInt    rM,        /* only for debug printing */
2234               UInt    rS         /* only for debug printing */
2235            )
2236{
2237   vassert(how < 4);
2238   switch (how) {
2239      case 0: { /* LSL */
2240         compute_result_and_C_after_LSL_by_reg(
2241            buf, res, newC, rMt, rSt, rM, rS
2242         );
2243         break;
2244      }
2245      case 1: { /* LSR */
2246         compute_result_and_C_after_LSR_by_reg(
2247            buf, res, newC, rMt, rSt, rM, rS
2248         );
2249         break;
2250      }
2251      case 2: { /* ASR */
2252         compute_result_and_C_after_ASR_by_reg(
2253            buf, res, newC, rMt, rSt, rM, rS
2254         );
2255         break;
2256      }
2257      case 3: { /* ROR */
2258         compute_result_and_C_after_ROR_by_reg(
2259             buf, res, newC, rMt, rSt, rM, rS
2260         );
2261         break;
2262      }
2263      default:
2264         /*NOTREACHED*/
2265         vassert(0);
2266   }
2267}
2268
2269
2270/* Generate an expression corresponding to a shifter_operand, bind it
2271   to a temporary, and return that via *shop.  If shco is non-NULL,
2272   also compute a value for the shifter's carry out (in the LSB of a
2273   word), bind it to a temporary, and return that via *shco.
2274
2275   If for some reason we can't come up with a shifter operand (missing
2276   case?  not really a shifter operand?) return False.
2277
2278   Generates GETs from the guest state and is therefore not safe to
2279   use once we start doing PUTs to it, for any given instruction.
2280
2281   For ARM insns only; not for Thumb.
2282*/
2283static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
2284                                 /*OUT*/IRTemp* shop,
2285                                 /*OUT*/IRTemp* shco,
2286                                 /*OUT*/HChar* buf )
2287{
2288   UInt insn_4 = (insn_11_0 >> 4) & 1;
2289   UInt insn_7 = (insn_11_0 >> 7) & 1;
2290   vassert(insn_25 <= 0x1);
2291   vassert(insn_11_0 <= 0xFFF);
2292
2293   vassert(shop && *shop == IRTemp_INVALID);
2294   *shop = newTemp(Ity_I32);
2295
2296   if (shco) {
2297      vassert(*shco == IRTemp_INVALID);
2298      *shco = newTemp(Ity_I32);
2299   }
2300
2301   /* 32-bit immediate */
2302
2303   if (insn_25 == 1) {
2304      /* immediate: (7:0) rotated right by 2 * (11:8) */
2305      UInt imm = (insn_11_0 >> 0) & 0xFF;
2306      UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
2307      vassert(rot <= 30);
2308      imm = ROR32(imm, rot);
2309      if (shco) {
2310         if (rot == 0) {
2311            assign( *shco, mk_armg_calculate_flag_c() );
2312         } else {
2313            assign( *shco, mkU32( (imm >> 31) & 1 ) );
2314         }
2315      }
2316      DIS(buf, "#0x%x", imm);
2317      assign( *shop, mkU32(imm) );
2318      return True;
2319   }
2320
2321   /* Shift/rotate by immediate */
2322
2323   if (insn_25 == 0 && insn_4 == 0) {
2324      /* Rm (3:0) shifted (6:5) by immediate (11:7) */
2325      UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
2326      UInt rM        = (insn_11_0 >> 0) & 0xF;
2327      UInt how       = (insn_11_0 >> 5) & 3;
2328      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2329      IRTemp rMt = newTemp(Ity_I32);
2330      assign(rMt, getIRegA(rM));
2331
2332      vassert(shift_amt <= 31);
2333
2334      compute_result_and_C_after_shift_by_imm5(
2335         buf, shop, shco, rMt, how, shift_amt, rM
2336      );
2337      return True;
2338   }
2339
2340   /* Shift/rotate by register */
2341   if (insn_25 == 0 && insn_4 == 1) {
2342      /* Rm (3:0) shifted (6:5) by Rs (11:8) */
2343      UInt rM  = (insn_11_0 >> 0) & 0xF;
2344      UInt rS  = (insn_11_0 >> 8) & 0xF;
2345      UInt how = (insn_11_0 >> 5) & 3;
2346      /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2347      IRTemp rMt = newTemp(Ity_I32);
2348      IRTemp rSt = newTemp(Ity_I32);
2349
2350      if (insn_7 == 1)
2351         return False; /* not really a shifter operand */
2352
2353      assign(rMt, getIRegA(rM));
2354      assign(rSt, getIRegA(rS));
2355
2356      compute_result_and_C_after_shift_by_reg(
2357         buf, shop, shco, rMt, how, rSt, rM, rS
2358      );
2359      return True;
2360   }
2361
2362   vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
2363   return False;
2364}
2365
2366
2367/* ARM only */
2368static
2369IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
2370                                    /*OUT*/HChar* buf )
2371{
2372   vassert(rN < 16);
2373   vassert(bU < 2);
2374   vassert(imm12 < 0x1000);
2375   HChar opChar = bU == 1 ? '+' : '-';
2376   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
2377   return
2378      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2379             getIRegA(rN),
2380             mkU32(imm12) );
2381}
2382
2383
2384/* ARM only.
2385   NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
2386*/
2387static
2388IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
2389                                          UInt sh2, UInt imm5,
2390                                          /*OUT*/HChar* buf )
2391{
2392   vassert(rN < 16);
2393   vassert(bU < 2);
2394   vassert(rM < 16);
2395   vassert(sh2 < 4);
2396   vassert(imm5 < 32);
2397   HChar   opChar = bU == 1 ? '+' : '-';
2398   IRExpr* index  = NULL;
2399   switch (sh2) {
2400      case 0: /* LSL */
2401         /* imm5 can be in the range 0 .. 31 inclusive. */
2402         index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
2403         DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
2404         break;
2405      case 1: /* LSR */
2406         if (imm5 == 0) {
2407            index = mkU32(0);
2408            vassert(0); // ATC
2409         } else {
2410            index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
2411         }
2412         DIS(buf, "[r%u, %cr%u, LSR #%u]",
2413                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2414         break;
2415      case 2: /* ASR */
2416         /* Doesn't this just mean that the behaviour with imm5 == 0
2417            is the same as if it had been 31 ? */
2418         if (imm5 == 0) {
2419            index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
2420            vassert(0); // ATC
2421         } else {
2422            index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
2423         }
2424         DIS(buf, "[r%u, %cr%u, ASR #%u]",
2425                  rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2426         break;
2427      case 3: /* ROR or RRX */
2428         if (imm5 == 0) {
2429            IRTemp rmT    = newTemp(Ity_I32);
2430            IRTemp cflagT = newTemp(Ity_I32);
2431            assign(rmT, getIRegA(rM));
2432            assign(cflagT, mk_armg_calculate_flag_c());
2433            index = binop(Iop_Or32,
2434                          binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
2435                          binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
2436            DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
2437         } else {
2438            IRTemp rmT = newTemp(Ity_I32);
2439            assign(rmT, getIRegA(rM));
2440            vassert(imm5 >= 1 && imm5 <= 31);
2441            index = binop(Iop_Or32,
2442                          binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
2443                          binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
2444            DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
2445         }
2446         break;
2447      default:
2448         vassert(0);
2449   }
2450   vassert(index);
2451   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2452                getIRegA(rN), index);
2453}
2454
2455
2456/* ARM only */
2457static
2458IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
2459                                   /*OUT*/HChar* buf )
2460{
2461   vassert(rN < 16);
2462   vassert(bU < 2);
2463   vassert(imm8 < 0x100);
2464   HChar opChar = bU == 1 ? '+' : '-';
2465   DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
2466   return
2467      binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2468             getIRegA(rN),
2469             mkU32(imm8) );
2470}
2471
2472
2473/* ARM only */
2474static
2475IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
2476                                  /*OUT*/HChar* buf )
2477{
2478   vassert(rN < 16);
2479   vassert(bU < 2);
2480   vassert(rM < 16);
2481   HChar   opChar = bU == 1 ? '+' : '-';
2482   IRExpr* index  = getIRegA(rM);
2483   DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
2484   return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2485                getIRegA(rN), index);
2486}
2487
2488
2489/* irRes :: Ity_I32 holds a floating point comparison result encoded
2490   as an IRCmpF64Result.  Generate code to convert it to an
2491   ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
2492   Assign a new temp to hold that value, and return the temp. */
2493static
2494IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
2495{
2496   IRTemp ix       = newTemp(Ity_I32);
2497   IRTemp termL    = newTemp(Ity_I32);
2498   IRTemp termR    = newTemp(Ity_I32);
2499   IRTemp nzcv     = newTemp(Ity_I32);
2500
2501   /* This is where the fun starts.  We have to convert 'irRes' from
2502      an IR-convention return result (IRCmpF64Result) to an
2503      ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2504      4 bits of 'nzcv'. */
2505   /* Map compare result from IR to ARM(nzcv) */
2506   /*
2507      FP cmp result | IR   | ARM(nzcv)
2508      --------------------------------
2509      UN              0x45   0011
2510      LT              0x01   1000
2511      GT              0x00   0010
2512      EQ              0x40   0110
2513   */
2514   /* Now since you're probably wondering WTF ..
2515
2516      ix fishes the useful bits out of the IR value, bits 6 and 0, and
2517      places them side by side, giving a number which is 0, 1, 2 or 3.
2518
2519      termL is a sequence cooked up by GNU superopt.  It converts ix
2520         into an almost correct value NZCV value (incredibly), except
2521         for the case of UN, where it produces 0100 instead of the
2522         required 0011.
2523
2524      termR is therefore a correction term, also computed from ix.  It
2525         is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2526         the final correct value, we subtract termR from termL.
2527
2528      Don't take my word for it.  There's a test program at the bottom
2529      of this file, to try this out with.
2530   */
2531   assign(
2532      ix,
2533      binop(Iop_Or32,
2534            binop(Iop_And32,
2535                  binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
2536                  mkU32(3)),
2537            binop(Iop_And32, mkexpr(irRes), mkU32(1))));
2538
2539   assign(
2540      termL,
2541      binop(Iop_Add32,
2542            binop(Iop_Shr32,
2543                  binop(Iop_Sub32,
2544                        binop(Iop_Shl32,
2545                              binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
2546                              mkU8(30)),
2547                        mkU32(1)),
2548                  mkU8(29)),
2549            mkU32(1)));
2550
2551   assign(
2552      termR,
2553      binop(Iop_And32,
2554            binop(Iop_And32,
2555                  mkexpr(ix),
2556                  binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
2557            mkU32(1)));
2558
2559   assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
2560   return nzcv;
2561}
2562
2563
2564/* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
2565   updatesC is non-NULL, a boolean is written to it indicating whether
2566   or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
2567*/
2568static UInt thumbExpandImm ( Bool* updatesC,
2569                             UInt imm1, UInt imm3, UInt imm8 )
2570{
2571   vassert(imm1 < (1<<1));
2572   vassert(imm3 < (1<<3));
2573   vassert(imm8 < (1<<8));
2574   UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
2575   UInt abcdefgh = imm8;
2576   UInt lbcdefgh = imm8 | 0x80;
2577   if (updatesC) {
2578      *updatesC = i_imm3_a >= 8;
2579   }
2580   switch (i_imm3_a) {
2581      case 0: case 1:
2582         return abcdefgh;
2583      case 2: case 3:
2584         return (abcdefgh << 16) | abcdefgh;
2585      case 4: case 5:
2586         return (abcdefgh << 24) | (abcdefgh << 8);
2587      case 6: case 7:
2588         return (abcdefgh << 24) | (abcdefgh << 16)
2589                | (abcdefgh << 8) | abcdefgh;
2590      case 8 ... 31:
2591         return lbcdefgh << (32 - i_imm3_a);
2592      default:
2593         break;
2594   }
2595   /*NOTREACHED*/vassert(0);
2596}
2597
2598
2599/* Version of thumbExpandImm where we simply feed it the
2600   instruction halfwords (the lowest addressed one is I0). */
2601static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
2602                                        UShort i0s, UShort i1s )
2603{
2604   UInt i0    = (UInt)i0s;
2605   UInt i1    = (UInt)i1s;
2606   UInt imm1  = SLICE_UInt(i0,10,10);
2607   UInt imm3  = SLICE_UInt(i1,14,12);
2608   UInt imm8  = SLICE_UInt(i1,7,0);
2609   return thumbExpandImm(updatesC, imm1, imm3, imm8);
2610}
2611
2612
2613/* Thumb16 only.  Given the firstcond and mask fields from an IT
2614   instruction, compute the 32-bit ITSTATE value implied, as described
2615   in libvex_guest_arm.h.  This is not the ARM ARM representation.
2616   Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
2617   disassembly printing.  Returns False if firstcond or mask
2618   denote something invalid.
2619
2620   The number and conditions for the instructions to be
2621   conditionalised depend on firstcond and mask:
2622
2623   mask      cond 1    cond 2      cond 3      cond 4
2624
2625   1000      fc[3:0]
2626   x100      fc[3:0]   fc[3:1]:x
2627   xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
2628   xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
2629
2630   The condition fields are assembled in *itstate backwards (cond 4 at
2631   the top, cond 1 at the bottom).  Conditions are << 4'd and then
2632   ^0xE'd, and those fields that correspond to instructions in the IT
2633   block are tagged with a 1 bit.
2634*/
2635static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
2636                              /*OUT*/HChar* ch1,
2637                              /*OUT*/HChar* ch2,
2638                              /*OUT*/HChar* ch3,
2639                              UInt firstcond, UInt mask )
2640{
2641   vassert(firstcond <= 0xF);
2642   vassert(mask <= 0xF);
2643   *itstate = 0;
2644   *ch1 = *ch2 = *ch3 = '.';
2645   if (mask == 0)
2646      return False; /* the logic below actually ensures this anyway,
2647                       but clearer to make it explicit. */
2648   if (firstcond == 0xF)
2649      return False; /* NV is not allowed */
2650   if (firstcond == 0xE && popcount32(mask) != 1)
2651      return False; /* if firstcond is AL then all the rest must be too */
2652
2653   UInt m3 = (mask >> 3) & 1;
2654   UInt m2 = (mask >> 2) & 1;
2655   UInt m1 = (mask >> 1) & 1;
2656   UInt m0 = (mask >> 0) & 1;
2657
2658   UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
2659   UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
2660
2661   if (m3 == 1 && (m2|m1|m0) == 0) {
2662      *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
2663      *itstate ^= 0xE0E0E0E0;
2664      return True;
2665   }
2666
2667   if (m2 == 1 && (m1|m0) == 0) {
2668      *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
2669      *itstate ^= 0xE0E0E0E0;
2670      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2671      return True;
2672   }
2673
2674   if (m1 == 1 && m0 == 0) {
2675      *itstate = (ni << 24)
2676                 | (setbit32(fc, 4, m2) << 16)
2677                 | (setbit32(fc, 4, m3) << 8) | fc;
2678      *itstate ^= 0xE0E0E0E0;
2679      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2680      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2681      return True;
2682   }
2683
2684   if (m0 == 1) {
2685      *itstate = (setbit32(fc, 4, m1) << 24)
2686                 | (setbit32(fc, 4, m2) << 16)
2687                 | (setbit32(fc, 4, m3) << 8) | fc;
2688      *itstate ^= 0xE0E0E0E0;
2689      *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2690      *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2691      *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
2692      return True;
2693   }
2694
2695   return False;
2696}
2697
2698
2699/* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
2700   Chapter 7 Section 1. */
2701static IRTemp gen_BITREV ( IRTemp x0 )
2702{
2703   IRTemp x1 = newTemp(Ity_I32);
2704   IRTemp x2 = newTemp(Ity_I32);
2705   IRTemp x3 = newTemp(Ity_I32);
2706   IRTemp x4 = newTemp(Ity_I32);
2707   IRTemp x5 = newTemp(Ity_I32);
2708   UInt   c1 = 0x55555555;
2709   UInt   c2 = 0x33333333;
2710   UInt   c3 = 0x0F0F0F0F;
2711   UInt   c4 = 0x00FF00FF;
2712   UInt   c5 = 0x0000FFFF;
2713   assign(x1,
2714          binop(Iop_Or32,
2715                binop(Iop_Shl32,
2716                      binop(Iop_And32, mkexpr(x0), mkU32(c1)),
2717                      mkU8(1)),
2718                binop(Iop_Shr32,
2719                      binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
2720                      mkU8(1))
2721   ));
2722   assign(x2,
2723          binop(Iop_Or32,
2724                binop(Iop_Shl32,
2725                      binop(Iop_And32, mkexpr(x1), mkU32(c2)),
2726                      mkU8(2)),
2727                binop(Iop_Shr32,
2728                      binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
2729                      mkU8(2))
2730   ));
2731   assign(x3,
2732          binop(Iop_Or32,
2733                binop(Iop_Shl32,
2734                      binop(Iop_And32, mkexpr(x2), mkU32(c3)),
2735                      mkU8(4)),
2736                binop(Iop_Shr32,
2737                      binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
2738                      mkU8(4))
2739   ));
2740   assign(x4,
2741          binop(Iop_Or32,
2742                binop(Iop_Shl32,
2743                      binop(Iop_And32, mkexpr(x3), mkU32(c4)),
2744                      mkU8(8)),
2745                binop(Iop_Shr32,
2746                      binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
2747                      mkU8(8))
2748   ));
2749   assign(x5,
2750          binop(Iop_Or32,
2751                binop(Iop_Shl32,
2752                      binop(Iop_And32, mkexpr(x4), mkU32(c5)),
2753                      mkU8(16)),
2754                binop(Iop_Shr32,
2755                      binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
2756                      mkU8(16))
2757   ));
2758   return x5;
2759}
2760
2761
2762/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2763   0:1:2:3 (aka byte-swap). */
2764static IRTemp gen_REV ( IRTemp arg )
2765{
2766   IRTemp res = newTemp(Ity_I32);
2767   assign(res,
2768          binop(Iop_Or32,
2769                binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
2770          binop(Iop_Or32,
2771                binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2772                                 mkU32(0x00FF0000)),
2773          binop(Iop_Or32,
2774                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2775                                       mkU32(0x0000FF00)),
2776                binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
2777                                       mkU32(0x000000FF) )
2778   ))));
2779   return res;
2780}
2781
2782
2783/* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2784   2:3:0:1 (swap within lo and hi halves). */
2785static IRTemp gen_REV16 ( IRTemp arg )
2786{
2787   IRTemp res = newTemp(Ity_I32);
2788   assign(res,
2789          binop(Iop_Or32,
2790                binop(Iop_And32,
2791                      binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2792                      mkU32(0xFF00FF00)),
2793                binop(Iop_And32,
2794                      binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2795                      mkU32(0x00FF00FF))));
2796   return res;
2797}
2798
2799
2800/*------------------------------------------------------------*/
2801/*--- Advanced SIMD (NEON) instructions                    ---*/
2802/*------------------------------------------------------------*/
2803
2804/*------------------------------------------------------------*/
2805/*--- NEON data processing                                 ---*/
2806/*------------------------------------------------------------*/
2807
2808/* For all NEON DP ops, we use the normal scheme to handle conditional
2809   writes to registers -- pass in condT and hand that on to the
2810   put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
2811   since NEON is unconditional for ARM.  In Thumb mode condT is
2812   derived from the ITSTATE shift register in the normal way. */
2813
2814static
2815UInt get_neon_d_regno(UInt theInstr)
2816{
2817   UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2818   if (theInstr & 0x40) {
2819      if (x & 1) {
2820         x = x + 0x100;
2821      } else {
2822         x = x >> 1;
2823      }
2824   }
2825   return x;
2826}
2827
2828static
2829UInt get_neon_n_regno(UInt theInstr)
2830{
2831   UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
2832   if (theInstr & 0x40) {
2833      if (x & 1) {
2834         x = x + 0x100;
2835      } else {
2836         x = x >> 1;
2837      }
2838   }
2839   return x;
2840}
2841
2842static
2843UInt get_neon_m_regno(UInt theInstr)
2844{
2845   UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2846   if (theInstr & 0x40) {
2847      if (x & 1) {
2848         x = x + 0x100;
2849      } else {
2850         x = x >> 1;
2851      }
2852   }
2853   return x;
2854}
2855
2856static
2857Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
2858{
2859   UInt dreg = get_neon_d_regno(theInstr);
2860   UInt mreg = get_neon_m_regno(theInstr);
2861   UInt nreg = get_neon_n_regno(theInstr);
2862   UInt imm4 = (theInstr >> 8) & 0xf;
2863   UInt Q = (theInstr >> 6) & 1;
2864   HChar reg_t = Q ? 'q' : 'd';
2865
2866   if (Q) {
2867      putQReg(dreg, triop(Iop_ExtractV128, getQReg(nreg),
2868               getQReg(mreg), mkU8(imm4)), condT);
2869   } else {
2870      putDRegI64(dreg, triop(Iop_Extract64, getDRegI64(nreg),
2871                 getDRegI64(mreg), mkU8(imm4)), condT);
2872   }
2873   DIP("vext.8 %c%d, %c%d, %c%d, #%d\n", reg_t, dreg, reg_t, nreg,
2874                                         reg_t, mreg, imm4);
2875   return True;
2876}
2877
2878/* Generate specific vector FP binary ops, possibly with a fake
2879   rounding mode as required by the primop. */
2880static
2881IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
2882{
2883   switch (op) {
2884      case Iop_Add32Fx4:
2885      case Iop_Sub32Fx4:
2886      case Iop_Mul32Fx4:
2887         return triop(op, get_FAKE_roundingmode(), argL, argR );
2888      case Iop_Add32x4: case Iop_Add16x8:
2889      case Iop_Sub32x4: case Iop_Sub16x8:
2890      case Iop_Mul32x4: case Iop_Mul16x8:
2891      case Iop_Mul32x2: case Iop_Mul16x4:
2892      case Iop_Add32Fx2:
2893      case Iop_Sub32Fx2:
2894      case Iop_Mul32Fx2:
2895      case Iop_PwAdd32Fx2:
2896         return binop(op, argL, argR);
2897      default:
2898        ppIROp(op);
2899        vassert(0);
2900   }
2901}
2902
2903/* VTBL, VTBX */
2904static
2905Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
2906{
2907   UInt op = (theInstr >> 6) & 1;
2908   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
2909   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
2910   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
2911   UInt len = (theInstr >> 8) & 3;
2912   Int i;
2913   IROp cmp;
2914   ULong imm;
2915   IRTemp arg_l;
2916   IRTemp old_mask, new_mask, cur_mask;
2917   IRTemp old_res, new_res;
2918   IRTemp old_arg, new_arg;
2919
2920   if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
2921      return False;
2922   if (nreg + len > 31)
2923      return False;
2924
2925   cmp = Iop_CmpGT8Ux8;
2926
2927   old_mask = newTemp(Ity_I64);
2928   old_res = newTemp(Ity_I64);
2929   old_arg = newTemp(Ity_I64);
2930   assign(old_mask, mkU64(0));
2931   assign(old_res, mkU64(0));
2932   assign(old_arg, getDRegI64(mreg));
2933   imm = 8;
2934   imm = (imm <<  8) | imm;
2935   imm = (imm << 16) | imm;
2936   imm = (imm << 32) | imm;
2937
2938   for (i = 0; i <= len; i++) {
2939      arg_l = newTemp(Ity_I64);
2940      new_mask = newTemp(Ity_I64);
2941      cur_mask = newTemp(Ity_I64);
2942      new_res = newTemp(Ity_I64);
2943      new_arg = newTemp(Ity_I64);
2944      assign(arg_l, getDRegI64(nreg+i));
2945      assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
2946      assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
2947      assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
2948      assign(new_res, binop(Iop_Or64,
2949                            mkexpr(old_res),
2950                            binop(Iop_And64,
2951                                  binop(Iop_Perm8x8,
2952                                        mkexpr(arg_l),
2953                                        binop(Iop_And64,
2954                                              mkexpr(old_arg),
2955                                              mkexpr(cur_mask))),
2956                                  mkexpr(cur_mask))));
2957
2958      old_arg = new_arg;
2959      old_mask = new_mask;
2960      old_res = new_res;
2961   }
2962   if (op) {
2963      new_res = newTemp(Ity_I64);
2964      assign(new_res, binop(Iop_Or64,
2965                            binop(Iop_And64,
2966                                  getDRegI64(dreg),
2967                                  unop(Iop_Not64, mkexpr(old_mask))),
2968                            mkexpr(old_res)));
2969      old_res = new_res;
2970   }
2971
2972   putDRegI64(dreg, mkexpr(old_res), condT);
2973   DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
2974   if (len > 0) {
2975      DIP("d%u-d%u", nreg, nreg + len);
2976   } else {
2977      DIP("d%u", nreg);
2978   }
2979   DIP("}, d%u\n", mreg);
2980   return True;
2981}
2982
2983/* VDUP (scalar)  */
2984static
2985Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
2986{
2987   UInt Q = (theInstr >> 6) & 1;
2988   UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2989   UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2990   UInt imm4 = (theInstr >> 16) & 0xF;
2991   UInt index;
2992   UInt size;
2993   IRTemp arg_m;
2994   IRTemp res;
2995   IROp op, op2;
2996
2997   if ((imm4 == 0) || (imm4 == 8))
2998      return False;
2999   if ((Q == 1) && ((dreg & 1) == 1))
3000      return False;
3001   if (Q)
3002      dreg >>= 1;
3003   arg_m = newTemp(Ity_I64);
3004   assign(arg_m, getDRegI64(mreg));
3005   if (Q)
3006      res = newTemp(Ity_V128);
3007   else
3008      res = newTemp(Ity_I64);
3009   if ((imm4 & 1) == 1) {
3010      op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
3011      op2 = Iop_GetElem8x8;
3012      index = imm4 >> 1;
3013      size = 8;
3014   } else if ((imm4 & 3) == 2) {
3015      op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
3016      op2 = Iop_GetElem16x4;
3017      index = imm4 >> 2;
3018      size = 16;
3019   } else if ((imm4 & 7) == 4) {
3020      op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
3021      op2 = Iop_GetElem32x2;
3022      index = imm4 >> 3;
3023      size = 32;
3024   } else {
3025      return False; // can this ever happen?
3026   }
3027   assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
3028   if (Q) {
3029      putQReg(dreg, mkexpr(res), condT);
3030   } else {
3031      putDRegI64(dreg, mkexpr(res), condT);
3032   }
3033   DIP("vdup.%d %c%d, d%d[%d]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
3034   return True;
3035}
3036
3037/* A7.4.1 Three registers of the same length */
3038static
3039Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
3040{
3041   UInt Q = (theInstr >> 6) & 1;
3042   UInt dreg = get_neon_d_regno(theInstr);
3043   UInt nreg = get_neon_n_regno(theInstr);
3044   UInt mreg = get_neon_m_regno(theInstr);
3045   UInt A = (theInstr >> 8) & 0xF;
3046   UInt B = (theInstr >> 4) & 1;
3047   UInt C = (theInstr >> 20) & 0x3;
3048   UInt U = (theInstr >> 24) & 1;
3049   UInt size = C;
3050
3051   IRTemp arg_n;
3052   IRTemp arg_m;
3053   IRTemp res;
3054
3055   if (Q) {
3056      arg_n = newTemp(Ity_V128);
3057      arg_m = newTemp(Ity_V128);
3058      res = newTemp(Ity_V128);
3059      assign(arg_n, getQReg(nreg));
3060      assign(arg_m, getQReg(mreg));
3061   } else {
3062      arg_n = newTemp(Ity_I64);
3063      arg_m = newTemp(Ity_I64);
3064      res = newTemp(Ity_I64);
3065      assign(arg_n, getDRegI64(nreg));
3066      assign(arg_m, getDRegI64(mreg));
3067   }
3068
3069   switch(A) {
3070      case 0:
3071         if (B == 0) {
3072            /* VHADD */
3073            ULong imm = 0;
3074            IRExpr *imm_val;
3075            IROp addOp;
3076            IROp andOp;
3077            IROp shOp;
3078            HChar regType = Q ? 'q' : 'd';
3079
3080            if (size == 3)
3081               return False;
3082            switch(size) {
3083               case 0: imm = 0x101010101010101LL; break;
3084               case 1: imm = 0x1000100010001LL; break;
3085               case 2: imm = 0x100000001LL; break;
3086               default: vassert(0);
3087            }
3088            if (Q) {
3089               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3090               andOp = Iop_AndV128;
3091            } else {
3092               imm_val = mkU64(imm);
3093               andOp = Iop_And64;
3094            }
3095            if (U) {
3096               switch(size) {
3097                  case 0:
3098                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3099                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3100                     break;
3101                  case 1:
3102                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3103                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3104                     break;
3105                  case 2:
3106                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3107                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3108                     break;
3109                  default:
3110                     vassert(0);
3111               }
3112            } else {
3113               switch(size) {
3114                  case 0:
3115                     addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3116                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3117                     break;
3118                  case 1:
3119                     addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3120                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3121                     break;
3122                  case 2:
3123                     addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3124                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3125                     break;
3126                  default:
3127                     vassert(0);
3128               }
3129            }
3130            assign(res,
3131                   binop(addOp,
3132                         binop(addOp,
3133                               binop(shOp, mkexpr(arg_m), mkU8(1)),
3134                               binop(shOp, mkexpr(arg_n), mkU8(1))),
3135                         binop(shOp,
3136                               binop(addOp,
3137                                     binop(andOp, mkexpr(arg_m), imm_val),
3138                                     binop(andOp, mkexpr(arg_n), imm_val)),
3139                               mkU8(1))));
3140            DIP("vhadd.%c%d %c%d, %c%d, %c%d\n",
3141                U ? 'u' : 's', 8 << size, regType,
3142                dreg, regType, nreg, regType, mreg);
3143         } else {
3144            /* VQADD */
3145            IROp op, op2;
3146            IRTemp tmp;
3147            HChar reg_t = Q ? 'q' : 'd';
3148            if (Q) {
3149               switch (size) {
3150                  case 0:
3151                     op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
3152                     op2 = Iop_Add8x16;
3153                     break;
3154                  case 1:
3155                     op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
3156                     op2 = Iop_Add16x8;
3157                     break;
3158                  case 2:
3159                     op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
3160                     op2 = Iop_Add32x4;
3161                     break;
3162                  case 3:
3163                     op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
3164                     op2 = Iop_Add64x2;
3165                     break;
3166                  default:
3167                     vassert(0);
3168               }
3169            } else {
3170               switch (size) {
3171                  case 0:
3172                     op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
3173                     op2 = Iop_Add8x8;
3174                     break;
3175                  case 1:
3176                     op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
3177                     op2 = Iop_Add16x4;
3178                     break;
3179                  case 2:
3180                     op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
3181                     op2 = Iop_Add32x2;
3182                     break;
3183                  case 3:
3184                     op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
3185                     op2 = Iop_Add64;
3186                     break;
3187                  default:
3188                     vassert(0);
3189               }
3190            }
3191            if (Q) {
3192               tmp = newTemp(Ity_V128);
3193            } else {
3194               tmp = newTemp(Ity_I64);
3195            }
3196            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3197            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3198            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3199            DIP("vqadd.%c%d %c%d, %c%d, %c%d\n",
3200                U ? 'u' : 's',
3201                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3202         }
3203         break;
3204      case 1:
3205         if (B == 0) {
3206            /* VRHADD */
3207            /* VRHADD C, A, B ::=
3208                 C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
3209            IROp shift_op, add_op;
3210            IRTemp cc;
3211            ULong one = 1;
3212            HChar reg_t = Q ? 'q' : 'd';
3213            switch (size) {
3214               case 0: one = (one <<  8) | one; /* fall through */
3215               case 1: one = (one << 16) | one; /* fall through */
3216               case 2: one = (one << 32) | one; break;
3217               case 3: return False;
3218               default: vassert(0);
3219            }
3220            if (Q) {
3221               switch (size) {
3222                  case 0:
3223                     shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
3224                     add_op = Iop_Add8x16;
3225                     break;
3226                  case 1:
3227                     shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
3228                     add_op = Iop_Add16x8;
3229                     break;
3230                  case 2:
3231                     shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
3232                     add_op = Iop_Add32x4;
3233                     break;
3234                  case 3:
3235                     return False;
3236                  default:
3237                     vassert(0);
3238               }
3239            } else {
3240               switch (size) {
3241                  case 0:
3242                     shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
3243                     add_op = Iop_Add8x8;
3244                     break;
3245                  case 1:
3246                     shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
3247                     add_op = Iop_Add16x4;
3248                     break;
3249                  case 2:
3250                     shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
3251                     add_op = Iop_Add32x2;
3252                     break;
3253                  case 3:
3254                     return False;
3255                  default:
3256                     vassert(0);
3257               }
3258            }
3259            if (Q) {
3260               cc = newTemp(Ity_V128);
3261               assign(cc, binop(shift_op,
3262                                binop(add_op,
3263                                      binop(add_op,
3264                                            binop(Iop_AndV128,
3265                                                  mkexpr(arg_n),
3266                                                  binop(Iop_64HLtoV128,
3267                                                        mkU64(one),
3268                                                        mkU64(one))),
3269                                            binop(Iop_AndV128,
3270                                                  mkexpr(arg_m),
3271                                                  binop(Iop_64HLtoV128,
3272                                                        mkU64(one),
3273                                                        mkU64(one)))),
3274                                      binop(Iop_64HLtoV128,
3275                                            mkU64(one),
3276                                            mkU64(one))),
3277                                mkU8(1)));
3278               assign(res, binop(add_op,
3279                                 binop(add_op,
3280                                       binop(shift_op,
3281                                             mkexpr(arg_n),
3282                                             mkU8(1)),
3283                                       binop(shift_op,
3284                                             mkexpr(arg_m),
3285                                             mkU8(1))),
3286                                 mkexpr(cc)));
3287            } else {
3288               cc = newTemp(Ity_I64);
3289               assign(cc, binop(shift_op,
3290                                binop(add_op,
3291                                      binop(add_op,
3292                                            binop(Iop_And64,
3293                                                  mkexpr(arg_n),
3294                                                  mkU64(one)),
3295                                            binop(Iop_And64,
3296                                                  mkexpr(arg_m),
3297                                                  mkU64(one))),
3298                                      mkU64(one)),
3299                                mkU8(1)));
3300               assign(res, binop(add_op,
3301                                 binop(add_op,
3302                                       binop(shift_op,
3303                                             mkexpr(arg_n),
3304                                             mkU8(1)),
3305                                       binop(shift_op,
3306                                             mkexpr(arg_m),
3307                                             mkU8(1))),
3308                                 mkexpr(cc)));
3309            }
3310            DIP("vrhadd.%c%d %c%d, %c%d, %c%d\n",
3311                U ? 'u' : 's',
3312                8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3313         } else {
3314            if (U == 0)  {
3315               switch(C) {
3316                  case 0: {
3317                     /* VAND  */
3318                     HChar reg_t = Q ? 'q' : 'd';
3319                     if (Q) {
3320                        assign(res, binop(Iop_AndV128, mkexpr(arg_n),
3321                                                       mkexpr(arg_m)));
3322                     } else {
3323                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3324                                                     mkexpr(arg_m)));
3325                     }
3326                     DIP("vand %c%d, %c%d, %c%d\n",
3327                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3328                     break;
3329                  }
3330                  case 1: {
3331                     /* VBIC  */
3332                     HChar reg_t = Q ? 'q' : 'd';
3333                     if (Q) {
3334                        assign(res, binop(Iop_AndV128,mkexpr(arg_n),
3335                               unop(Iop_NotV128, mkexpr(arg_m))));
3336                     } else {
3337                        assign(res, binop(Iop_And64, mkexpr(arg_n),
3338                               unop(Iop_Not64, mkexpr(arg_m))));
3339                     }
3340                     DIP("vbic %c%d, %c%d, %c%d\n",
3341                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3342                     break;
3343                  }
3344                  case 2:
3345                     if ( nreg != mreg) {
3346                        /* VORR  */
3347                        HChar reg_t = Q ? 'q' : 'd';
3348                        if (Q) {
3349                           assign(res, binop(Iop_OrV128, mkexpr(arg_n),
3350                                                         mkexpr(arg_m)));
3351                        } else {
3352                           assign(res, binop(Iop_Or64, mkexpr(arg_n),
3353                                                       mkexpr(arg_m)));
3354                        }
3355                        DIP("vorr %c%d, %c%d, %c%d\n",
3356                            reg_t, dreg, reg_t, nreg, reg_t, mreg);
3357                     } else {
3358                        /* VMOV  */
3359                        HChar reg_t = Q ? 'q' : 'd';
3360                        assign(res, mkexpr(arg_m));
3361                        DIP("vmov %c%d, %c%d\n", reg_t, dreg, reg_t, mreg);
3362                     }
3363                     break;
3364                  case 3:{
3365                     /* VORN  */
3366                     HChar reg_t = Q ? 'q' : 'd';
3367                     if (Q) {
3368                        assign(res, binop(Iop_OrV128,mkexpr(arg_n),
3369                               unop(Iop_NotV128, mkexpr(arg_m))));
3370                     } else {
3371                        assign(res, binop(Iop_Or64, mkexpr(arg_n),
3372                               unop(Iop_Not64, mkexpr(arg_m))));
3373                     }
3374                     DIP("vorn %c%d, %c%d, %c%d\n",
3375                         reg_t, dreg, reg_t, nreg, reg_t, mreg);
3376                     break;
3377                  }
3378               }
3379            } else {
3380               switch(C) {
3381                  case 0:
3382                     /* VEOR (XOR)  */
3383                     if (Q) {
3384                        assign(res, binop(Iop_XorV128, mkexpr(arg_n),
3385                                                       mkexpr(arg_m)));
3386                     } else {
3387                        assign(res, binop(Iop_Xor64, mkexpr(arg_n),
3388                                                     mkexpr(arg_m)));
3389                     }
3390                     DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
3391                           Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3392                     break;
3393                  case 1:
3394                     /* VBSL  */
3395                     if (Q) {
3396                        IRTemp reg_d = newTemp(Ity_V128);
3397                        assign(reg_d, getQReg(dreg));
3398                        assign(res,
3399                               binop(Iop_OrV128,
3400                                     binop(Iop_AndV128, mkexpr(arg_n),
3401                                                        mkexpr(reg_d)),
3402                                     binop(Iop_AndV128,
3403                                           mkexpr(arg_m),
3404                                           unop(Iop_NotV128,
3405                                                 mkexpr(reg_d)) ) ) );
3406                     } else {
3407                        IRTemp reg_d = newTemp(Ity_I64);
3408                        assign(reg_d, getDRegI64(dreg));
3409                        assign(res,
3410                               binop(Iop_Or64,
3411                                     binop(Iop_And64, mkexpr(arg_n),
3412                                                      mkexpr(reg_d)),
3413                                     binop(Iop_And64,
3414                                           mkexpr(arg_m),
3415                                           unop(Iop_Not64, mkexpr(reg_d)))));
3416                     }
3417                     DIP("vbsl %c%u, %c%u, %c%u\n",
3418                         Q ? 'q' : 'd', dreg,
3419                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3420                     break;
3421                  case 2:
3422                     /* VBIT  */
3423                     if (Q) {
3424                        IRTemp reg_d = newTemp(Ity_V128);
3425                        assign(reg_d, getQReg(dreg));
3426                        assign(res,
3427                               binop(Iop_OrV128,
3428                                     binop(Iop_AndV128, mkexpr(arg_n),
3429                                                        mkexpr(arg_m)),
3430                                     binop(Iop_AndV128,
3431                                           mkexpr(reg_d),
3432                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3433                     } else {
3434                        IRTemp reg_d = newTemp(Ity_I64);
3435                        assign(reg_d, getDRegI64(dreg));
3436                        assign(res,
3437                               binop(Iop_Or64,
3438                                     binop(Iop_And64, mkexpr(arg_n),
3439                                                      mkexpr(arg_m)),
3440                                     binop(Iop_And64,
3441                                           mkexpr(reg_d),
3442                                           unop(Iop_Not64, mkexpr(arg_m)))));
3443                     }
3444                     DIP("vbit %c%u, %c%u, %c%u\n",
3445                         Q ? 'q' : 'd', dreg,
3446                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3447                     break;
3448                  case 3:
3449                     /* VBIF  */
3450                     if (Q) {
3451                        IRTemp reg_d = newTemp(Ity_V128);
3452                        assign(reg_d, getQReg(dreg));
3453                        assign(res,
3454                               binop(Iop_OrV128,
3455                                     binop(Iop_AndV128, mkexpr(reg_d),
3456                                                        mkexpr(arg_m)),
3457                                     binop(Iop_AndV128,
3458                                           mkexpr(arg_n),
3459                                           unop(Iop_NotV128, mkexpr(arg_m)))));
3460                     } else {
3461                        IRTemp reg_d = newTemp(Ity_I64);
3462                        assign(reg_d, getDRegI64(dreg));
3463                        assign(res,
3464                               binop(Iop_Or64,
3465                                     binop(Iop_And64, mkexpr(reg_d),
3466                                                      mkexpr(arg_m)),
3467                                     binop(Iop_And64,
3468                                           mkexpr(arg_n),
3469                                           unop(Iop_Not64, mkexpr(arg_m)))));
3470                     }
3471                     DIP("vbif %c%u, %c%u, %c%u\n",
3472                         Q ? 'q' : 'd', dreg,
3473                         Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3474                     break;
3475               }
3476            }
3477         }
3478         break;
3479      case 2:
3480         if (B == 0) {
3481            /* VHSUB */
3482            /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
3483            ULong imm = 0;
3484            IRExpr *imm_val;
3485            IROp subOp;
3486            IROp notOp;
3487            IROp andOp;
3488            IROp shOp;
3489            if (size == 3)
3490               return False;
3491            switch(size) {
3492               case 0: imm = 0x101010101010101LL; break;
3493               case 1: imm = 0x1000100010001LL; break;
3494               case 2: imm = 0x100000001LL; break;
3495               default: vassert(0);
3496            }
3497            if (Q) {
3498               imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3499               andOp = Iop_AndV128;
3500               notOp = Iop_NotV128;
3501            } else {
3502               imm_val = mkU64(imm);
3503               andOp = Iop_And64;
3504               notOp = Iop_Not64;
3505            }
3506            if (U) {
3507               switch(size) {
3508                  case 0:
3509                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3510                     shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3511                     break;
3512                  case 1:
3513                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3514                     shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3515                     break;
3516                  case 2:
3517                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3518                     shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3519                     break;
3520                  default:
3521                     vassert(0);
3522               }
3523            } else {
3524               switch(size) {
3525                  case 0:
3526                     subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3527                     shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3528                     break;
3529                  case 1:
3530                     subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3531                     shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3532                     break;
3533                  case 2:
3534                     subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3535                     shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3536                     break;
3537                  default:
3538                     vassert(0);
3539               }
3540            }
3541            assign(res,
3542                   binop(subOp,
3543                         binop(subOp,
3544                               binop(shOp, mkexpr(arg_n), mkU8(1)),
3545                               binop(shOp, mkexpr(arg_m), mkU8(1))),
3546                         binop(andOp,
3547                               binop(andOp,
3548                                     unop(notOp, mkexpr(arg_n)),
3549                                     mkexpr(arg_m)),
3550                               imm_val)));
3551            DIP("vhsub.%c%u %c%u, %c%u, %c%u\n",
3552                U ? 'u' : 's', 8 << size,
3553                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3554                mreg);
3555         } else {
3556            /* VQSUB */
3557            IROp op, op2;
3558            IRTemp tmp;
3559            if (Q) {
3560               switch (size) {
3561                  case 0:
3562                     op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
3563                     op2 = Iop_Sub8x16;
3564                     break;
3565                  case 1:
3566                     op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
3567                     op2 = Iop_Sub16x8;
3568                     break;
3569                  case 2:
3570                     op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
3571                     op2 = Iop_Sub32x4;
3572                     break;
3573                  case 3:
3574                     op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
3575                     op2 = Iop_Sub64x2;
3576                     break;
3577                  default:
3578                     vassert(0);
3579               }
3580            } else {
3581               switch (size) {
3582                  case 0:
3583                     op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
3584                     op2 = Iop_Sub8x8;
3585                     break;
3586                  case 1:
3587                     op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
3588                     op2 = Iop_Sub16x4;
3589                     break;
3590                  case 2:
3591                     op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
3592                     op2 = Iop_Sub32x2;
3593                     break;
3594                  case 3:
3595                     op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
3596                     op2 = Iop_Sub64;
3597                     break;
3598                  default:
3599                     vassert(0);
3600               }
3601            }
3602            if (Q)
3603               tmp = newTemp(Ity_V128);
3604            else
3605               tmp = newTemp(Ity_I64);
3606            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3607            assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3608            setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3609            DIP("vqsub.%c%u %c%u, %c%u, %c%u\n",
3610                U ? 'u' : 's', 8 << size,
3611                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3612                mreg);
3613         }
3614         break;
3615      case 3: {
3616            IROp op;
3617            if (Q) {
3618               switch (size) {
3619                  case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
3620                  case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
3621                  case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
3622                  case 3: return False;
3623                  default: vassert(0);
3624               }
3625            } else {
3626               switch (size) {
3627                  case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
3628                  case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
3629                  case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
3630                  case 3: return False;
3631                  default: vassert(0);
3632               }
3633            }
3634            if (B == 0) {
3635               /* VCGT  */
3636               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3637               DIP("vcgt.%c%u %c%u, %c%u, %c%u\n",
3638                   U ? 'u' : 's', 8 << size,
3639                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3640                   mreg);
3641            } else {
3642               /* VCGE  */
3643               /* VCGE res, argn, argm
3644                    is equal to
3645                  VCGT tmp, argm, argn
3646                  VNOT res, tmp */
3647               assign(res,
3648                      unop(Q ? Iop_NotV128 : Iop_Not64,
3649                           binop(op, mkexpr(arg_m), mkexpr(arg_n))));
3650               DIP("vcge.%c%u %c%u, %c%u, %c%u\n",
3651                   U ? 'u' : 's', 8 << size,
3652                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3653                   mreg);
3654            }
3655         }
3656         break;
3657      case 4:
3658         if (B == 0) {
3659            /* VSHL */
3660            IROp op, sub_op;
3661            IRTemp tmp;
3662            if (U) {
3663               switch (size) {
3664                  case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
3665                  case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
3666                  case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
3667                  case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
3668                  default: vassert(0);
3669               }
3670            } else {
3671               tmp = newTemp(Q ? Ity_V128 : Ity_I64);
3672               switch (size) {
3673                  case 0:
3674                     op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3675                     sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3676                     break;
3677                  case 1:
3678                     op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3679                     sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3680                     break;
3681                  case 2:
3682                     op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3683                     sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3684                     break;
3685                  case 3:
3686                     op = Q ? Iop_Sar64x2 : Iop_Sar64;
3687                     sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
3688                     break;
3689                  default:
3690                     vassert(0);
3691               }
3692            }
3693            if (U) {
3694               if (!Q && (size == 3))
3695                  assign(res, binop(op, mkexpr(arg_m),
3696                                        unop(Iop_64to8, mkexpr(arg_n))));
3697               else
3698                  assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3699            } else {
3700               if (Q)
3701                  assign(tmp, binop(sub_op,
3702                                    binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
3703                                    mkexpr(arg_n)));
3704               else
3705                  assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
3706               if (!Q && (size == 3))
3707                  assign(res, binop(op, mkexpr(arg_m),
3708                                        unop(Iop_64to8, mkexpr(tmp))));
3709               else
3710                  assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
3711            }
3712            DIP("vshl.%c%u %c%u, %c%u, %c%u\n",
3713                U ? 'u' : 's', 8 << size,
3714                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3715                nreg);
3716         } else {
3717            /* VQSHL */
3718            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
3719            IRTemp tmp, shval, mask, old_shval;
3720            UInt i;
3721            ULong esize;
3722            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3723            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3724            if (U) {
3725               switch (size) {
3726                  case 0:
3727                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
3728                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
3729                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3730                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3731                     break;
3732                  case 1:
3733                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
3734                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
3735                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3736                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3737                     break;
3738                  case 2:
3739                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
3740                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
3741                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3742                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3743                     break;
3744                  case 3:
3745                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
3746                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
3747                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3748                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3749                     break;
3750                  default:
3751                     vassert(0);
3752               }
3753            } else {
3754               switch (size) {
3755                  case 0:
3756                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
3757                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3758                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3759                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3760                     break;
3761                  case 1:
3762                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
3763                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3764                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3765                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3766                     break;
3767                  case 2:
3768                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
3769                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3770                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3771                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3772                     break;
3773                  case 3:
3774                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
3775                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
3776                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3777                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3778                     break;
3779                  default:
3780                     vassert(0);
3781               }
3782            }
3783            if (Q) {
3784               tmp = newTemp(Ity_V128);
3785               shval = newTemp(Ity_V128);
3786               mask = newTemp(Ity_V128);
3787            } else {
3788               tmp = newTemp(Ity_I64);
3789               shval = newTemp(Ity_I64);
3790               mask = newTemp(Ity_I64);
3791            }
3792            assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3793            /* Only least significant byte from second argument is used.
3794               Copy this byte to the whole vector element. */
3795            assign(shval, binop(op_shrn,
3796                                binop(op_shln,
3797                                       mkexpr(arg_n),
3798                                       mkU8((8 << size) - 8)),
3799                                mkU8((8 << size) - 8)));
3800            for(i = 0; i < size; i++) {
3801               old_shval = shval;
3802               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3803               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3804                                   mkexpr(old_shval),
3805                                   binop(op_shln,
3806                                         mkexpr(old_shval),
3807                                         mkU8(8 << i))));
3808            }
3809            /* If shift is greater or equal to the element size and
3810               element is non-zero, then QC flag should be set. */
3811            esize = (8 << size) - 1;
3812            esize = (esize <<  8) | esize;
3813            esize = (esize << 16) | esize;
3814            esize = (esize << 32) | esize;
3815            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3816                             binop(cmp_gt, mkexpr(shval),
3817                                           Q ? mkU128(esize) : mkU64(esize)),
3818                             unop(cmp_neq, mkexpr(arg_m))),
3819                       Q ? mkU128(0) : mkU64(0),
3820                       Q, condT);
3821            /* Othervise QC flag should be set if shift value is positive and
3822               result beign rightshifted the same value is not equal to left
3823               argument. */
3824            assign(mask, binop(cmp_gt, mkexpr(shval),
3825                                       Q ? mkU128(0) : mkU64(0)));
3826            if (!Q && size == 3)
3827               assign(tmp, binop(op_rev, mkexpr(res),
3828                                         unop(Iop_64to8, mkexpr(arg_n))));
3829            else
3830               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
3831            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3832                             mkexpr(tmp), mkexpr(mask)),
3833                       binop(Q ? Iop_AndV128 : Iop_And64,
3834                             mkexpr(arg_m), mkexpr(mask)),
3835                       Q, condT);
3836            DIP("vqshl.%c%u %c%u, %c%u, %c%u\n",
3837                U ? 'u' : 's', 8 << size,
3838                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3839                nreg);
3840         }
3841         break;
3842      case 5:
3843         if (B == 0) {
3844            /* VRSHL */
3845            IROp op, op_shrn, op_shln, cmp_gt, op_add;
3846            IRTemp shval, old_shval, imm_val, round;
3847            UInt i;
3848            ULong imm;
3849            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3850            imm = 1L;
3851            switch (size) {
3852               case 0: imm = (imm <<  8) | imm; /* fall through */
3853               case 1: imm = (imm << 16) | imm; /* fall through */
3854               case 2: imm = (imm << 32) | imm; /* fall through */
3855               case 3: break;
3856               default: vassert(0);
3857            }
3858            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3859            round = newTemp(Q ? Ity_V128 : Ity_I64);
3860            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3861            if (U) {
3862               switch (size) {
3863                  case 0:
3864                     op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
3865                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3866                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3867                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3868                     break;
3869                  case 1:
3870                     op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
3871                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3872                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3873                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3874                     break;
3875                  case 2:
3876                     op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
3877                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3878                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3879                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3880                     break;
3881                  case 3:
3882                     op = Q ? Iop_Shl64x2 : Iop_Shl64;
3883                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3884                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3885                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3886                     break;
3887                  default:
3888                     vassert(0);
3889               }
3890            } else {
3891               switch (size) {
3892                  case 0:
3893                     op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
3894                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3895                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3896                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3897                     break;
3898                  case 1:
3899                     op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
3900                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3901                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3902                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3903                     break;
3904                  case 2:
3905                     op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
3906                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3907                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3908                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3909                     break;
3910                  case 3:
3911                     op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
3912                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
3913                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3914                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3915                     break;
3916                  default:
3917                     vassert(0);
3918               }
3919            }
3920            if (Q) {
3921               shval = newTemp(Ity_V128);
3922            } else {
3923               shval = newTemp(Ity_I64);
3924            }
3925            /* Only least significant byte from second argument is used.
3926               Copy this byte to the whole vector element. */
3927            assign(shval, binop(op_shrn,
3928                                binop(op_shln,
3929                                       mkexpr(arg_n),
3930                                       mkU8((8 << size) - 8)),
3931                                mkU8((8 << size) - 8)));
3932            for (i = 0; i < size; i++) {
3933               old_shval = shval;
3934               shval = newTemp(Q ? Ity_V128 : Ity_I64);
3935               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3936                                   mkexpr(old_shval),
3937                                   binop(op_shln,
3938                                         mkexpr(old_shval),
3939                                         mkU8(8 << i))));
3940            }
3941            /* Compute the result */
3942            if (!Q && size == 3 && U) {
3943               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3944                                   binop(op,
3945                                         mkexpr(arg_m),
3946                                         unop(Iop_64to8,
3947                                              binop(op_add,
3948                                                    mkexpr(arg_n),
3949                                                    mkexpr(imm_val)))),
3950                                   binop(Q ? Iop_AndV128 : Iop_And64,
3951                                         mkexpr(imm_val),
3952                                         binop(cmp_gt,
3953                                               Q ? mkU128(0) : mkU64(0),
3954                                               mkexpr(arg_n)))));
3955               assign(res, binop(op_add,
3956                                 binop(op,
3957                                       mkexpr(arg_m),
3958                                       unop(Iop_64to8, mkexpr(arg_n))),
3959                                 mkexpr(round)));
3960            } else {
3961               assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3962                                   binop(op,
3963                                         mkexpr(arg_m),
3964                                         binop(op_add,
3965                                               mkexpr(arg_n),
3966                                               mkexpr(imm_val))),
3967                                   binop(Q ? Iop_AndV128 : Iop_And64,
3968                                         mkexpr(imm_val),
3969                                         binop(cmp_gt,
3970                                               Q ? mkU128(0) : mkU64(0),
3971                                               mkexpr(arg_n)))));
3972               assign(res, binop(op_add,
3973                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
3974                                 mkexpr(round)));
3975            }
3976            DIP("vrshl.%c%u %c%u, %c%u, %c%u\n",
3977                U ? 'u' : 's', 8 << size,
3978                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3979                nreg);
3980         } else {
3981            /* VQRSHL */
3982            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
3983            IRTemp tmp, shval, mask, old_shval, imm_val, round;
3984            UInt i;
3985            ULong esize, imm;
3986            cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3987            cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3988            imm = 1L;
3989            switch (size) {
3990               case 0: imm = (imm <<  8) | imm; /* fall through */
3991               case 1: imm = (imm << 16) | imm; /* fall through */
3992               case 2: imm = (imm << 32) | imm; /* fall through */
3993               case 3: break;
3994               default: vassert(0);
3995            }
3996            imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3997            round = newTemp(Q ? Ity_V128 : Ity_I64);
3998            assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3999            if (U) {
4000               switch (size) {
4001                  case 0:
4002                     op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
4003                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4004                     op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
4005                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4006                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4007                     break;
4008                  case 1:
4009                     op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
4010                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4011                     op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
4012                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4013                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4014                     break;
4015                  case 2:
4016                     op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
4017                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4018                     op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
4019                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4020                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4021                     break;
4022                  case 3:
4023                     op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
4024                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
4025                     op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
4026                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4027                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4028                     break;
4029                  default:
4030                     vassert(0);
4031               }
4032            } else {
4033               switch (size) {
4034                  case 0:
4035                     op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
4036                     op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4037                     op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
4038                     op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4039                     op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4040                     break;
4041                  case 1:
4042                     op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
4043                     op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4044                     op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
4045                     op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4046                     op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4047                     break;
4048                  case 2:
4049                     op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
4050                     op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4051                     op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
4052                     op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4053                     op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4054                     break;
4055                  case 3:
4056                     op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
4057                     op_add = Q ? Iop_Add64x2 : Iop_Add64;
4058                     op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
4059                     op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4060                     op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4061                     break;
4062                  default:
4063                     vassert(0);
4064               }
4065            }
4066            if (Q) {
4067               tmp = newTemp(Ity_V128);
4068               shval = newTemp(Ity_V128);
4069               mask = newTemp(Ity_V128);
4070            } else {
4071               tmp = newTemp(Ity_I64);
4072               shval = newTemp(Ity_I64);
4073               mask = newTemp(Ity_I64);
4074            }
4075            /* Only least significant byte from second argument is used.
4076               Copy this byte to the whole vector element. */
4077            assign(shval, binop(op_shrn,
4078                                binop(op_shln,
4079                                       mkexpr(arg_n),
4080                                       mkU8((8 << size) - 8)),
4081                                mkU8((8 << size) - 8)));
4082            for (i = 0; i < size; i++) {
4083               old_shval = shval;
4084               shval = newTemp(Q ? Ity_V128 : Ity_I64);
4085               assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
4086                                   mkexpr(old_shval),
4087                                   binop(op_shln,
4088                                         mkexpr(old_shval),
4089                                         mkU8(8 << i))));
4090            }
4091            /* Compute the result */
4092            assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4093                                binop(op,
4094                                      mkexpr(arg_m),
4095                                      binop(op_add,
4096                                            mkexpr(arg_n),
4097                                            mkexpr(imm_val))),
4098                                binop(Q ? Iop_AndV128 : Iop_And64,
4099                                      mkexpr(imm_val),
4100                                      binop(cmp_gt,
4101                                            Q ? mkU128(0) : mkU64(0),
4102                                            mkexpr(arg_n)))));
4103            assign(res, binop(op_add,
4104                              binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4105                              mkexpr(round)));
4106            /* If shift is greater or equal to the element size and element is
4107               non-zero, then QC flag should be set. */
4108            esize = (8 << size) - 1;
4109            esize = (esize <<  8) | esize;
4110            esize = (esize << 16) | esize;
4111            esize = (esize << 32) | esize;
4112            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4113                             binop(cmp_gt, mkexpr(shval),
4114                                           Q ? mkU128(esize) : mkU64(esize)),
4115                             unop(cmp_neq, mkexpr(arg_m))),
4116                       Q ? mkU128(0) : mkU64(0),
4117                       Q, condT);
4118            /* Othervise QC flag should be set if shift value is positive and
4119               result beign rightshifted the same value is not equal to left
4120               argument. */
4121            assign(mask, binop(cmp_gt, mkexpr(shval),
4122                               Q ? mkU128(0) : mkU64(0)));
4123            if (!Q && size == 3)
4124               assign(tmp, binop(op_rev, mkexpr(res),
4125                                         unop(Iop_64to8, mkexpr(arg_n))));
4126            else
4127               assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
4128            setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4129                             mkexpr(tmp), mkexpr(mask)),
4130                       binop(Q ? Iop_AndV128 : Iop_And64,
4131                             mkexpr(arg_m), mkexpr(mask)),
4132                       Q, condT);
4133            DIP("vqrshl.%c%u %c%u, %c%u, %c%u\n",
4134                U ? 'u' : 's', 8 << size,
4135                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4136                nreg);
4137         }
4138         break;
4139      case 6:
4140         /* VMAX, VMIN  */
4141         if (B == 0) {
4142            /* VMAX */
4143            IROp op;
4144            if (U == 0) {
4145               switch (size) {
4146                  case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
4147                  case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
4148                  case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
4149                  case 3: return False;
4150                  default: vassert(0);
4151               }
4152            } else {
4153               switch (size) {
4154                  case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
4155                  case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
4156                  case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
4157                  case 3: return False;
4158                  default: vassert(0);
4159               }
4160            }
4161            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4162            DIP("vmax.%c%u %c%u, %c%u, %c%u\n",
4163                U ? 'u' : 's', 8 << size,
4164                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4165                mreg);
4166         } else {
4167            /* VMIN */
4168            IROp op;
4169            if (U == 0) {
4170               switch (size) {
4171                  case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
4172                  case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
4173                  case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
4174                  case 3: return False;
4175                  default: vassert(0);
4176               }
4177            } else {
4178               switch (size) {
4179                  case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
4180                  case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
4181                  case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
4182                  case 3: return False;
4183                  default: vassert(0);
4184               }
4185            }
4186            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4187            DIP("vmin.%c%u %c%u, %c%u, %c%u\n",
4188                U ? 'u' : 's', 8 << size,
4189                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4190                mreg);
4191         }
4192         break;
4193      case 7:
4194         if (B == 0) {
4195            /* VABD */
4196            IROp op_cmp, op_sub;
4197            IRTemp cond;
4198            if ((theInstr >> 23) & 1) {
4199               vpanic("VABDL should not be in dis_neon_data_3same\n");
4200            }
4201            if (Q) {
4202               switch (size) {
4203                  case 0:
4204                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4205                     op_sub = Iop_Sub8x16;
4206                     break;
4207                  case 1:
4208                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4209                     op_sub = Iop_Sub16x8;
4210                     break;
4211                  case 2:
4212                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4213                     op_sub = Iop_Sub32x4;
4214                     break;
4215                  case 3:
4216                     return False;
4217                  default:
4218                     vassert(0);
4219               }
4220            } else {
4221               switch (size) {
4222                  case 0:
4223                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4224                     op_sub = Iop_Sub8x8;
4225                     break;
4226                  case 1:
4227                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4228                     op_sub = Iop_Sub16x4;
4229                     break;
4230                  case 2:
4231                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4232                     op_sub = Iop_Sub32x2;
4233                     break;
4234                  case 3:
4235                     return False;
4236                  default:
4237                     vassert(0);
4238               }
4239            }
4240            if (Q) {
4241               cond = newTemp(Ity_V128);
4242            } else {
4243               cond = newTemp(Ity_I64);
4244            }
4245            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4246            assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
4247                              binop(Q ? Iop_AndV128 : Iop_And64,
4248                                    binop(op_sub, mkexpr(arg_n),
4249                                                  mkexpr(arg_m)),
4250                                    mkexpr(cond)),
4251                              binop(Q ? Iop_AndV128 : Iop_And64,
4252                                    binop(op_sub, mkexpr(arg_m),
4253                                                  mkexpr(arg_n)),
4254                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4255                                         mkexpr(cond)))));
4256            DIP("vabd.%c%u %c%u, %c%u, %c%u\n",
4257                U ? 'u' : 's', 8 << size,
4258                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4259                mreg);
4260         } else {
4261            /* VABA */
4262            IROp op_cmp, op_sub, op_add;
4263            IRTemp cond, acc, tmp;
4264            if ((theInstr >> 23) & 1) {
4265               vpanic("VABAL should not be in dis_neon_data_3same");
4266            }
4267            if (Q) {
4268               switch (size) {
4269                  case 0:
4270                     op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4271                     op_sub = Iop_Sub8x16;
4272                     op_add = Iop_Add8x16;
4273                     break;
4274                  case 1:
4275                     op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4276                     op_sub = Iop_Sub16x8;
4277                     op_add = Iop_Add16x8;
4278                     break;
4279                  case 2:
4280                     op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4281                     op_sub = Iop_Sub32x4;
4282                     op_add = Iop_Add32x4;
4283                     break;
4284                  case 3:
4285                     return False;
4286                  default:
4287                     vassert(0);
4288               }
4289            } else {
4290               switch (size) {
4291                  case 0:
4292                     op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4293                     op_sub = Iop_Sub8x8;
4294                     op_add = Iop_Add8x8;
4295                     break;
4296                  case 1:
4297                     op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4298                     op_sub = Iop_Sub16x4;
4299                     op_add = Iop_Add16x4;
4300                     break;
4301                  case 2:
4302                     op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4303                     op_sub = Iop_Sub32x2;
4304                     op_add = Iop_Add32x2;
4305                     break;
4306                  case 3:
4307                     return False;
4308                  default:
4309                     vassert(0);
4310               }
4311            }
4312            if (Q) {
4313               cond = newTemp(Ity_V128);
4314               acc = newTemp(Ity_V128);
4315               tmp = newTemp(Ity_V128);
4316               assign(acc, getQReg(dreg));
4317            } else {
4318               cond = newTemp(Ity_I64);
4319               acc = newTemp(Ity_I64);
4320               tmp = newTemp(Ity_I64);
4321               assign(acc, getDRegI64(dreg));
4322            }
4323            assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4324            assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
4325                              binop(Q ? Iop_AndV128 : Iop_And64,
4326                                    binop(op_sub, mkexpr(arg_n),
4327                                                  mkexpr(arg_m)),
4328                                    mkexpr(cond)),
4329                              binop(Q ? Iop_AndV128 : Iop_And64,
4330                                    binop(op_sub, mkexpr(arg_m),
4331                                                  mkexpr(arg_n)),
4332                                    unop(Q ? Iop_NotV128 : Iop_Not64,
4333                                         mkexpr(cond)))));
4334            assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
4335            DIP("vaba.%c%u %c%u, %c%u, %c%u\n",
4336                U ? 'u' : 's', 8 << size,
4337                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4338                mreg);
4339         }
4340         break;
4341      case 8:
4342         if (B == 0) {
4343            IROp op;
4344            if (U == 0) {
4345               /* VADD  */
4346               switch (size) {
4347                  case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
4348                  case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
4349                  case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
4350                  case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
4351                  default: vassert(0);
4352               }
4353               DIP("vadd.i%u %c%u, %c%u, %c%u\n",
4354                   8 << size, Q ? 'q' : 'd',
4355                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4356            } else {
4357               /* VSUB  */
4358               switch (size) {
4359                  case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
4360                  case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
4361                  case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
4362                  case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
4363                  default: vassert(0);
4364               }
4365               DIP("vsub.i%u %c%u, %c%u, %c%u\n",
4366                   8 << size, Q ? 'q' : 'd',
4367                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4368            }
4369            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4370         } else {
4371            IROp op;
4372            switch (size) {
4373               case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
4374               case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
4375               case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
4376               case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
4377               default: vassert(0);
4378            }
4379            if (U == 0) {
4380               /* VTST  */
4381               assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
4382                                          mkexpr(arg_n),
4383                                          mkexpr(arg_m))));
4384               DIP("vtst.%u %c%u, %c%u, %c%u\n",
4385                   8 << size, Q ? 'q' : 'd',
4386                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4387            } else {
4388               /* VCEQ  */
4389               assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
4390                                unop(op,
4391                                     binop(Q ? Iop_XorV128 : Iop_Xor64,
4392                                           mkexpr(arg_n),
4393                                           mkexpr(arg_m)))));
4394               DIP("vceq.i%u %c%u, %c%u, %c%u\n",
4395                   8 << size, Q ? 'q' : 'd',
4396                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4397            }
4398         }
4399         break;
4400      case 9:
4401         if (B == 0) {
4402            /* VMLA, VMLS (integer) */
4403            IROp op, op2;
4404            UInt P = (theInstr >> 24) & 1;
4405            if (P) {
4406               switch (size) {
4407                  case 0:
4408                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4409                     op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
4410                     break;
4411                  case 1:
4412                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4413                     op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
4414                     break;
4415                  case 2:
4416                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4417                     op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
4418                     break;
4419                  case 3:
4420                     return False;
4421                  default:
4422                     vassert(0);
4423               }
4424            } else {
4425               switch (size) {
4426                  case 0:
4427                     op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4428                     op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
4429                     break;
4430                  case 1:
4431                     op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4432                     op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
4433                     break;
4434                  case 2:
4435                     op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4436                     op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
4437                     break;
4438                  case 3:
4439                     return False;
4440                  default:
4441                     vassert(0);
4442               }
4443            }
4444            assign(res, binop(op2,
4445                              Q ? getQReg(dreg) : getDRegI64(dreg),
4446                              binop(op, mkexpr(arg_n), mkexpr(arg_m))));
4447            DIP("vml%c.i%u %c%u, %c%u, %c%u\n",
4448                P ? 's' : 'a', 8 << size,
4449                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4450                mreg);
4451         } else {
4452            /* VMUL */
4453            IROp op;
4454            UInt P = (theInstr >> 24) & 1;
4455            if (P) {
4456               switch (size) {
4457                  case 0:
4458                     op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
4459                     break;
4460                  case 1: case 2: case 3: return False;
4461                  default: vassert(0);
4462               }
4463            } else {
4464               switch (size) {
4465                  case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
4466                  case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
4467                  case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
4468                  case 3: return False;
4469                  default: vassert(0);
4470               }
4471            }
4472            assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4473            DIP("vmul.%c%u %c%u, %c%u, %c%u\n",
4474                P ? 'p' : 'i', 8 << size,
4475                Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4476                mreg);
4477         }
4478         break;
4479      case 10: {
4480         /* VPMAX, VPMIN  */
4481         UInt P = (theInstr >> 4) & 1;
4482         IROp op;
4483         if (Q)
4484            return False;
4485         if (P) {
4486            switch (size) {
4487               case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
4488               case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
4489               case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
4490               case 3: return False;
4491               default: vassert(0);
4492            }
4493         } else {
4494            switch (size) {
4495               case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
4496               case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
4497               case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
4498               case 3: return False;
4499               default: vassert(0);
4500            }
4501         }
4502         assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4503         DIP("vp%s.%c%u %c%u, %c%u, %c%u\n",
4504             P ? "min" : "max", U ? 'u' : 's',
4505             8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4506             Q ? 'q' : 'd', mreg);
4507         break;
4508      }
4509      case 11:
4510         if (B == 0) {
4511            if (U == 0) {
4512               /* VQDMULH  */
4513               IROp op ,op2;
4514               ULong imm;
4515               switch (size) {
4516                  case 0: case 3:
4517                     return False;
4518                  case 1:
4519                     op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
4520                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4521                     imm = 1LL << 15;
4522                     imm = (imm << 16) | imm;
4523                     imm = (imm << 32) | imm;
4524                     break;
4525                  case 2:
4526                     op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
4527                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4528                     imm = 1LL << 31;
4529                     imm = (imm << 32) | imm;
4530                     break;
4531                  default:
4532                     vassert(0);
4533               }
4534               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4535               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4536                                binop(op2, mkexpr(arg_n),
4537                                           Q ? mkU128(imm) : mkU64(imm)),
4538                                binop(op2, mkexpr(arg_m),
4539                                           Q ? mkU128(imm) : mkU64(imm))),
4540                          Q ? mkU128(0) : mkU64(0),
4541                          Q, condT);
4542               DIP("vqdmulh.s%u %c%u, %c%u, %c%u\n",
4543                   8 << size, Q ? 'q' : 'd',
4544                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4545            } else {
4546               /* VQRDMULH */
4547               IROp op ,op2;
4548               ULong imm;
4549               switch(size) {
4550                  case 0: case 3:
4551                     return False;
4552                  case 1:
4553                     imm = 1LL << 15;
4554                     imm = (imm << 16) | imm;
4555                     imm = (imm << 32) | imm;
4556                     op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
4557                     op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4558                     break;
4559                  case 2:
4560                     imm = 1LL << 31;
4561                     imm = (imm << 32) | imm;
4562                     op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
4563                     op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4564                     break;
4565                  default:
4566                     vassert(0);
4567               }
4568               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4569               setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4570                                binop(op2, mkexpr(arg_n),
4571                                           Q ? mkU128(imm) : mkU64(imm)),
4572                                binop(op2, mkexpr(arg_m),
4573                                           Q ? mkU128(imm) : mkU64(imm))),
4574                          Q ? mkU128(0) : mkU64(0),
4575                          Q, condT);
4576               DIP("vqrdmulh.s%u %c%u, %c%u, %c%u\n",
4577                   8 << size, Q ? 'q' : 'd',
4578                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4579            }
4580         } else {
4581            if (U == 0) {
4582               /* VPADD */
4583               IROp op;
4584               if (Q)
4585                  return False;
4586               switch (size) {
4587                  case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
4588                  case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
4589                  case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
4590                  case 3: return False;
4591                  default: vassert(0);
4592               }
4593               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4594               DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
4595                   8 << size, Q ? 'q' : 'd',
4596                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4597            }
4598         }
4599         break;
4600      /* Starting from here these are FP SIMD cases */
4601      case 13:
4602         if (B == 0) {
4603            IROp op;
4604            if (U == 0) {
4605               if ((C >> 1) == 0) {
4606                  /* VADD  */
4607                  op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
4608                  DIP("vadd.f32 %c%u, %c%u, %c%u\n",
4609                      Q ? 'q' : 'd', dreg,
4610                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4611               } else {
4612                  /* VSUB  */
4613                  op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
4614                  DIP("vsub.f32 %c%u, %c%u, %c%u\n",
4615                      Q ? 'q' : 'd', dreg,
4616                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4617               }
4618            } else {
4619               if ((C >> 1) == 0) {
4620                  /* VPADD */
4621                  if (Q)
4622                     return False;
4623                  op = Iop_PwAdd32Fx2;
4624                  DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4625               } else {
4626                  /* VABD  */
4627                  if (Q) {
4628                     assign(res, unop(Iop_Abs32Fx4,
4629                                      triop(Iop_Sub32Fx4,
4630                                            get_FAKE_roundingmode(),
4631                                            mkexpr(arg_n),
4632                                            mkexpr(arg_m))));
4633                  } else {
4634                     assign(res, unop(Iop_Abs32Fx2,
4635                                      binop(Iop_Sub32Fx2,
4636                                            mkexpr(arg_n),
4637                                            mkexpr(arg_m))));
4638                  }
4639                  DIP("vabd.f32 %c%u, %c%u, %c%u\n",
4640                      Q ? 'q' : 'd', dreg,
4641                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4642                  break;
4643               }
4644            }
4645            assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4646         } else {
4647            if (U == 0) {
4648               /* VMLA, VMLS  */
4649               IROp op, op2;
4650               UInt P = (theInstr >> 21) & 1;
4651               if (P) {
4652                  switch (size & 1) {
4653                     case 0:
4654                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4655                        op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
4656                        break;
4657                     case 1: return False;
4658                     default: vassert(0);
4659                  }
4660               } else {
4661                  switch (size & 1) {
4662                     case 0:
4663                        op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4664                        op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
4665                        break;
4666                     case 1: return False;
4667                     default: vassert(0);
4668                  }
4669               }
4670               assign(res, binop_w_fake_RM(
4671                              op2,
4672                              Q ? getQReg(dreg) : getDRegI64(dreg),
4673                              binop_w_fake_RM(op, mkexpr(arg_n),
4674                                                  mkexpr(arg_m))));
4675
4676               DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
4677                   P ? 's' : 'a', Q ? 'q' : 'd',
4678                   dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4679            } else {
4680               /* VMUL  */
4681               IROp op;
4682               if ((C >> 1) != 0)
4683                  return False;
4684               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
4685               assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4686               DIP("vmul.f32 %c%u, %c%u, %c%u\n",
4687                   Q ? 'q' : 'd', dreg,
4688                   Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4689            }
4690         }
4691         break;
4692      case 14:
4693         if (B == 0) {
4694            if (U == 0) {
4695               if ((C >> 1) == 0) {
4696                  /* VCEQ  */
4697                  IROp op;
4698                  if ((theInstr >> 20) & 1)
4699                     return False;
4700                  op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
4701                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4702                  DIP("vceq.f32 %c%u, %c%u, %c%u\n",
4703                      Q ? 'q' : 'd', dreg,
4704                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4705               } else {
4706                  return False;
4707               }
4708            } else {
4709               if ((C >> 1) == 0) {
4710                  /* VCGE  */
4711                  IROp op;
4712                  if ((theInstr >> 20) & 1)
4713                     return False;
4714                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4715                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4716                  DIP("vcge.f32 %c%u, %c%u, %c%u\n",
4717                      Q ? 'q' : 'd', dreg,
4718                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4719               } else {
4720                  /* VCGT  */
4721                  IROp op;
4722                  if ((theInstr >> 20) & 1)
4723                     return False;
4724                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4725                  assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4726                  DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
4727                      Q ? 'q' : 'd', dreg,
4728                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4729               }
4730            }
4731         } else {
4732            if (U == 1) {
4733               /* VACGE, VACGT */
4734               UInt op_bit = (theInstr >> 21) & 1;
4735               IROp op, op2;
4736               op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
4737               if (op_bit) {
4738                  op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4739                  assign(res, binop(op,
4740                                    unop(op2, mkexpr(arg_n)),
4741                                    unop(op2, mkexpr(arg_m))));
4742               } else {
4743                  op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4744                  assign(res, binop(op,
4745                                    unop(op2, mkexpr(arg_n)),
4746                                    unop(op2, mkexpr(arg_m))));
4747               }
4748               DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
4749                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4750                   Q ? 'q' : 'd', mreg);
4751            }
4752         }
4753         break;
4754      case 15:
4755         if (B == 0) {
4756            if (U == 0) {
4757               /* VMAX, VMIN  */
4758               IROp op;
4759               if ((theInstr >> 20) & 1)
4760                  return False;
4761               if ((theInstr >> 21) & 1) {
4762                  op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
4763                  DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4764                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4765               } else {
4766                  op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
4767                  DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4768                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4769               }
4770               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4771            } else {
4772               /* VPMAX, VPMIN   */
4773               IROp op;
4774               if (Q)
4775                  return False;
4776               if ((theInstr >> 20) & 1)
4777                  return False;
4778               if ((theInstr >> 21) & 1) {
4779                  op = Iop_PwMin32Fx2;
4780                  DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4781               } else {
4782                  op = Iop_PwMax32Fx2;
4783                  DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4784               }
4785               assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4786            }
4787         } else {
4788            if (U == 0) {
4789               if ((C >> 1) == 0) {
4790                  /* VRECPS */
4791                  if ((theInstr >> 20) & 1)
4792                     return False;
4793                  assign(res, binop(Q ? Iop_Recps32Fx4 : Iop_Recps32Fx2,
4794                                    mkexpr(arg_n),
4795                                    mkexpr(arg_m)));
4796                  DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4797                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4798               } else {
4799                  /* VRSQRTS  */
4800                  if ((theInstr >> 20) & 1)
4801                     return False;
4802                  assign(res, binop(Q ? Iop_Rsqrts32Fx4 : Iop_Rsqrts32Fx2,
4803                                    mkexpr(arg_n),
4804                                    mkexpr(arg_m)));
4805                  DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4806                      Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4807               }
4808            }
4809         }
4810         break;
4811   }
4812
4813   if (Q) {
4814      putQReg(dreg, mkexpr(res), condT);
4815   } else {
4816      putDRegI64(dreg, mkexpr(res), condT);
4817   }
4818
4819   return True;
4820}
4821
4822/* A7.4.2 Three registers of different length */
4823static
4824Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
4825{
4826   UInt A = (theInstr >> 8) & 0xf;
4827   UInt B = (theInstr >> 20) & 3;
4828   UInt U = (theInstr >> 24) & 1;
4829   UInt P = (theInstr >> 9) & 1;
4830   UInt mreg = get_neon_m_regno(theInstr);
4831   UInt nreg = get_neon_n_regno(theInstr);
4832   UInt dreg = get_neon_d_regno(theInstr);
4833   UInt size = B;
4834   ULong imm;
4835   IRTemp res, arg_m, arg_n, cond, tmp;
4836   IROp cvt, cvt2, cmp, op, op2, sh, add;
4837   switch (A) {
4838      case 0: case 1: case 2: case 3:
4839         /* VADDL, VADDW, VSUBL, VSUBW */
4840         if (dreg & 1)
4841            return False;
4842         dreg >>= 1;
4843         size = B;
4844         switch (size) {
4845            case 0:
4846               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4847               op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
4848               break;
4849            case 1:
4850               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4851               op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
4852               break;
4853            case 2:
4854               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4855               op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
4856               break;
4857            case 3:
4858               return False;
4859            default:
4860               vassert(0);
4861         }
4862         arg_n = newTemp(Ity_V128);
4863         arg_m = newTemp(Ity_V128);
4864         if (A & 1) {
4865            if (nreg & 1)
4866               return False;
4867            nreg >>= 1;
4868            assign(arg_n, getQReg(nreg));
4869         } else {
4870            assign(arg_n, unop(cvt, getDRegI64(nreg)));
4871         }
4872         assign(arg_m, unop(cvt, getDRegI64(mreg)));
4873         putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4874                       condT);
4875         DIP("v%s%c.%c%u q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
4876             (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
4877             (A & 1) ? 'q' : 'd', nreg, mreg);
4878         return True;
4879      case 4:
4880         /* VADDHN, VRADDHN */
4881         if (mreg & 1)
4882            return False;
4883         mreg >>= 1;
4884         if (nreg & 1)
4885            return False;
4886         nreg >>= 1;
4887         size = B;
4888         switch (size) {
4889            case 0:
4890               op = Iop_Add16x8;
4891               cvt = Iop_NarrowUn16to8x8;
4892               sh = Iop_ShrN16x8;
4893               imm = 1U << 7;
4894               imm = (imm << 16) | imm;
4895               imm = (imm << 32) | imm;
4896               break;
4897            case 1:
4898               op = Iop_Add32x4;
4899               cvt = Iop_NarrowUn32to16x4;
4900               sh = Iop_ShrN32x4;
4901               imm = 1U << 15;
4902               imm = (imm << 32) | imm;
4903               break;
4904            case 2:
4905               op = Iop_Add64x2;
4906               cvt = Iop_NarrowUn64to32x2;
4907               sh = Iop_ShrN64x2;
4908               imm = 1U << 31;
4909               break;
4910            case 3:
4911               return False;
4912            default:
4913               vassert(0);
4914         }
4915         tmp = newTemp(Ity_V128);
4916         res = newTemp(Ity_V128);
4917         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
4918         if (U) {
4919            /* VRADDHN */
4920            assign(res, binop(op, mkexpr(tmp),
4921                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
4922         } else {
4923            assign(res, mkexpr(tmp));
4924         }
4925         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
4926                    condT);
4927         DIP("v%saddhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
4928             nreg, mreg);
4929         return True;
4930      case 5:
4931         /* VABAL */
4932         if (!((theInstr >> 23) & 1)) {
4933            vpanic("VABA should not be in dis_neon_data_3diff\n");
4934         }
4935         if (dreg & 1)
4936            return False;
4937         dreg >>= 1;
4938         switch (size) {
4939            case 0:
4940               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4941               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4942               cvt2 = Iop_Widen8Sto16x8;
4943               op = Iop_Sub16x8;
4944               op2 = Iop_Add16x8;
4945               break;
4946            case 1:
4947               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4948               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4949               cvt2 = Iop_Widen16Sto32x4;
4950               op = Iop_Sub32x4;
4951               op2 = Iop_Add32x4;
4952               break;
4953            case 2:
4954               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4955               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4956               cvt2 = Iop_Widen32Sto64x2;
4957               op = Iop_Sub64x2;
4958               op2 = Iop_Add64x2;
4959               break;
4960            case 3:
4961               return False;
4962            default:
4963               vassert(0);
4964         }
4965         arg_n = newTemp(Ity_V128);
4966         arg_m = newTemp(Ity_V128);
4967         cond = newTemp(Ity_V128);
4968         res = newTemp(Ity_V128);
4969         assign(arg_n, unop(cvt, getDRegI64(nreg)));
4970         assign(arg_m, unop(cvt, getDRegI64(mreg)));
4971         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
4972                                            getDRegI64(mreg))));
4973         assign(res, binop(op2,
4974                           binop(Iop_OrV128,
4975                                 binop(Iop_AndV128,
4976                                       binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4977                                       mkexpr(cond)),
4978                                 binop(Iop_AndV128,
4979                                       binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4980                                       unop(Iop_NotV128, mkexpr(cond)))),
4981                           getQReg(dreg)));
4982         putQReg(dreg, mkexpr(res), condT);
4983         DIP("vabal.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
4984             nreg, mreg);
4985         return True;
4986      case 6:
4987         /* VSUBHN, VRSUBHN */
4988         if (mreg & 1)
4989            return False;
4990         mreg >>= 1;
4991         if (nreg & 1)
4992            return False;
4993         nreg >>= 1;
4994         size = B;
4995         switch (size) {
4996            case 0:
4997               op = Iop_Sub16x8;
4998               op2 = Iop_Add16x8;
4999               cvt = Iop_NarrowUn16to8x8;
5000               sh = Iop_ShrN16x8;
5001               imm = 1U << 7;
5002               imm = (imm << 16) | imm;
5003               imm = (imm << 32) | imm;
5004               break;
5005            case 1:
5006               op = Iop_Sub32x4;
5007               op2 = Iop_Add32x4;
5008               cvt = Iop_NarrowUn32to16x4;
5009               sh = Iop_ShrN32x4;
5010               imm = 1U << 15;
5011               imm = (imm << 32) | imm;
5012               break;
5013            case 2:
5014               op = Iop_Sub64x2;
5015               op2 = Iop_Add64x2;
5016               cvt = Iop_NarrowUn64to32x2;
5017               sh = Iop_ShrN64x2;
5018               imm = 1U << 31;
5019               break;
5020            case 3:
5021               return False;
5022            default:
5023               vassert(0);
5024         }
5025         tmp = newTemp(Ity_V128);
5026         res = newTemp(Ity_V128);
5027         assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
5028         if (U) {
5029            /* VRSUBHN */
5030            assign(res, binop(op2, mkexpr(tmp),
5031                     binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
5032         } else {
5033            assign(res, mkexpr(tmp));
5034         }
5035         putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
5036                    condT);
5037         DIP("v%ssubhn.i%u d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
5038             nreg, mreg);
5039         return True;
5040      case 7:
5041         /* VABDL */
5042         if (!((theInstr >> 23) & 1)) {
5043            vpanic("VABL should not be in dis_neon_data_3diff\n");
5044         }
5045         if (dreg & 1)
5046            return False;
5047         dreg >>= 1;
5048         switch (size) {
5049            case 0:
5050               cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5051               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5052               cvt2 = Iop_Widen8Sto16x8;
5053               op = Iop_Sub16x8;
5054               break;
5055            case 1:
5056               cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5057               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5058               cvt2 = Iop_Widen16Sto32x4;
5059               op = Iop_Sub32x4;
5060               break;
5061            case 2:
5062               cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5063               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5064               cvt2 = Iop_Widen32Sto64x2;
5065               op = Iop_Sub64x2;
5066               break;
5067            case 3:
5068               return False;
5069            default:
5070               vassert(0);
5071         }
5072         arg_n = newTemp(Ity_V128);
5073         arg_m = newTemp(Ity_V128);
5074         cond = newTemp(Ity_V128);
5075         res = newTemp(Ity_V128);
5076         assign(arg_n, unop(cvt, getDRegI64(nreg)));
5077         assign(arg_m, unop(cvt, getDRegI64(mreg)));
5078         assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5079                                            getDRegI64(mreg))));
5080         assign(res, binop(Iop_OrV128,
5081                           binop(Iop_AndV128,
5082                                 binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5083                                 mkexpr(cond)),
5084                           binop(Iop_AndV128,
5085                                 binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5086                                 unop(Iop_NotV128, mkexpr(cond)))));
5087         putQReg(dreg, mkexpr(res), condT);
5088         DIP("vabdl.%c%u q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5089             nreg, mreg);
5090         return True;
5091      case 8:
5092      case 10:
5093         /* VMLAL, VMLSL (integer) */
5094         if (dreg & 1)
5095            return False;
5096         dreg >>= 1;
5097         size = B;
5098         switch (size) {
5099            case 0:
5100               op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5101               op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
5102               break;
5103            case 1:
5104               op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5105               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5106               break;
5107            case 2:
5108               op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5109               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5110               break;
5111            case 3:
5112               return False;
5113            default:
5114               vassert(0);
5115         }
5116         res = newTemp(Ity_V128);
5117         assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
5118         putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5119         DIP("vml%cl.%c%u q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
5120             8 << size, dreg, nreg, mreg);
5121         return True;
5122      case 9:
5123      case 11:
5124         /* VQDMLAL, VQDMLSL */
5125         if (U)
5126            return False;
5127         if (dreg & 1)
5128            return False;
5129         dreg >>= 1;
5130         size = B;
5131         switch (size) {
5132            case 0: case 3:
5133               return False;
5134            case 1:
5135               op = Iop_QDMulLong16Sx4;
5136               cmp = Iop_CmpEQ16x4;
5137               add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5138               op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5139               imm = 1LL << 15;
5140               imm = (imm << 16) | imm;
5141               imm = (imm << 32) | imm;
5142               break;
5143            case 2:
5144               op = Iop_QDMulLong32Sx2;
5145               cmp = Iop_CmpEQ32x2;
5146               add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5147               op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5148               imm = 1LL << 31;
5149               imm = (imm << 32) | imm;
5150               break;
5151            default:
5152               vassert(0);
5153         }
5154         res = newTemp(Ity_V128);
5155         tmp = newTemp(Ity_V128);
5156         assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
5157         assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5158         setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5159                    True, condT);
5160         setFlag_QC(binop(Iop_And64,
5161                          binop(cmp, getDRegI64(nreg), mkU64(imm)),
5162                          binop(cmp, getDRegI64(mreg), mkU64(imm))),
5163                    mkU64(0),
5164                    False, condT);
5165         putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5166         DIP("vqdml%cl.s%u q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
5167             nreg, mreg);
5168         return True;
5169      case 12:
5170      case 14:
5171         /* VMULL (integer or polynomial) */
5172         if (dreg & 1)
5173            return False;
5174         dreg >>= 1;
5175         size = B;
5176         switch (size) {
5177            case 0:
5178               op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5179               if (P)
5180                  op = Iop_PolynomialMull8x8;
5181               break;
5182            case 1:
5183               op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5184               break;
5185            case 2:
5186               op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5187               break;
5188            default:
5189               vassert(0);
5190         }
5191         putQReg(dreg, binop(op, getDRegI64(nreg),
5192                                 getDRegI64(mreg)), condT);
5193         DIP("vmull.%c%u q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
5194               8 << size, dreg, nreg, mreg);
5195         return True;
5196      case 13:
5197         /* VQDMULL */
5198         if (U)
5199            return False;
5200         if (dreg & 1)
5201            return False;
5202         dreg >>= 1;
5203         size = B;
5204         switch (size) {
5205            case 0:
5206            case 3:
5207               return False;
5208            case 1:
5209               op = Iop_QDMulLong16Sx4;
5210               op2 = Iop_CmpEQ16x4;
5211               imm = 1LL << 15;
5212               imm = (imm << 16) | imm;
5213               imm = (imm << 32) | imm;
5214               break;
5215            case 2:
5216               op = Iop_QDMulLong32Sx2;
5217               op2 = Iop_CmpEQ32x2;
5218               imm = 1LL << 31;
5219               imm = (imm << 32) | imm;
5220               break;
5221            default:
5222               vassert(0);
5223         }
5224         putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
5225               condT);
5226         setFlag_QC(binop(Iop_And64,
5227                          binop(op2, getDRegI64(nreg), mkU64(imm)),
5228                          binop(op2, getDRegI64(mreg), mkU64(imm))),
5229                    mkU64(0),
5230                    False, condT);
5231         DIP("vqdmull.s%u q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
5232         return True;
5233      default:
5234         return False;
5235   }
5236   return False;
5237}
5238
5239/* A7.4.3 Two registers and a scalar */
5240static
5241Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
5242{
5243#  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
5244   UInt U = INSN(24,24);
5245   UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
5246   UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
5247   UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
5248   UInt size = INSN(21,20);
5249   UInt index;
5250   UInt Q = INSN(24,24);
5251
5252   if (INSN(27,25) != 1 || INSN(23,23) != 1
5253       || INSN(6,6) != 1 || INSN(4,4) != 0)
5254      return False;
5255
5256   /* VMLA, VMLS (scalar)  */
5257   if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
5258      IRTemp res, arg_m, arg_n;
5259      IROp dup, get, op, op2, add, sub;
5260      if (Q) {
5261         if ((dreg & 1) || (nreg & 1))
5262            return False;
5263         dreg >>= 1;
5264         nreg >>= 1;
5265         res = newTemp(Ity_V128);
5266         arg_m = newTemp(Ity_V128);
5267         arg_n = newTemp(Ity_V128);
5268         assign(arg_n, getQReg(nreg));
5269         switch(size) {
5270            case 1:
5271               dup = Iop_Dup16x8;
5272               get = Iop_GetElem16x4;
5273               index = mreg >> 3;
5274               mreg &= 7;
5275               break;
5276            case 2:
5277               dup = Iop_Dup32x4;
5278               get = Iop_GetElem32x2;
5279               index = mreg >> 4;
5280               mreg &= 0xf;
5281               break;
5282            case 0:
5283            case 3:
5284               return False;
5285            default:
5286               vassert(0);
5287         }
5288         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5289      } else {
5290         res = newTemp(Ity_I64);
5291         arg_m = newTemp(Ity_I64);
5292         arg_n = newTemp(Ity_I64);
5293         assign(arg_n, getDRegI64(nreg));
5294         switch(size) {
5295            case 1:
5296               dup = Iop_Dup16x4;
5297               get = Iop_GetElem16x4;
5298               index = mreg >> 3;
5299               mreg &= 7;
5300               break;
5301            case 2:
5302               dup = Iop_Dup32x2;
5303               get = Iop_GetElem32x2;
5304               index = mreg >> 4;
5305               mreg &= 0xf;
5306               break;
5307            case 0:
5308            case 3:
5309               return False;
5310            default:
5311               vassert(0);
5312         }
5313         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5314      }
5315      if (INSN(8,8)) {
5316         switch (size) {
5317            case 2:
5318               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5319               add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
5320               sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
5321               break;
5322            case 0:
5323            case 1:
5324            case 3:
5325               return False;
5326            default:
5327               vassert(0);
5328         }
5329      } else {
5330         switch (size) {
5331            case 1:
5332               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5333               add = Q ? Iop_Add16x8 : Iop_Add16x4;
5334               sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
5335               break;
5336            case 2:
5337               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5338               add = Q ? Iop_Add32x4 : Iop_Add32x2;
5339               sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
5340               break;
5341            case 0:
5342            case 3:
5343               return False;
5344            default:
5345               vassert(0);
5346         }
5347      }
5348      op2 = INSN(10,10) ? sub : add;
5349      assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5350      if (Q)
5351         putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
5352                 condT);
5353      else
5354         putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
5355                    condT);
5356      DIP("vml%c.%c%u %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
5357            INSN(8,8) ? 'f' : 'i', 8 << size,
5358            Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
5359      return True;
5360   }
5361
5362   /* VMLAL, VMLSL (scalar)   */
5363   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
5364      IRTemp res, arg_m, arg_n;
5365      IROp dup, get, op, op2, add, sub;
5366      if (dreg & 1)
5367         return False;
5368      dreg >>= 1;
5369      res = newTemp(Ity_V128);
5370      arg_m = newTemp(Ity_I64);
5371      arg_n = newTemp(Ity_I64);
5372      assign(arg_n, getDRegI64(nreg));
5373      switch(size) {
5374         case 1:
5375            dup = Iop_Dup16x4;
5376            get = Iop_GetElem16x4;
5377            index = mreg >> 3;
5378            mreg &= 7;
5379            break;
5380         case 2:
5381            dup = Iop_Dup32x2;
5382            get = Iop_GetElem32x2;
5383            index = mreg >> 4;
5384            mreg &= 0xf;
5385            break;
5386         case 0:
5387         case 3:
5388            return False;
5389         default:
5390            vassert(0);
5391      }
5392      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5393      switch (size) {
5394         case 1:
5395            op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5396            add = Iop_Add32x4;
5397            sub = Iop_Sub32x4;
5398            break;
5399         case 2:
5400            op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5401            add = Iop_Add64x2;
5402            sub = Iop_Sub64x2;
5403            break;
5404         case 0:
5405         case 3:
5406            return False;
5407         default:
5408            vassert(0);
5409      }
5410      op2 = INSN(10,10) ? sub : add;
5411      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5412      putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5413      DIP("vml%cl.%c%u q%u, d%u, d%u[%u]\n",
5414          INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
5415          8 << size, dreg, nreg, mreg, index);
5416      return True;
5417   }
5418
5419   /* VQDMLAL, VQDMLSL (scalar)  */
5420   if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
5421      IRTemp res, arg_m, arg_n, tmp;
5422      IROp dup, get, op, op2, add, cmp;
5423      UInt P = INSN(10,10);
5424      ULong imm;
5425      if (dreg & 1)
5426         return False;
5427      dreg >>= 1;
5428      res = newTemp(Ity_V128);
5429      arg_m = newTemp(Ity_I64);
5430      arg_n = newTemp(Ity_I64);
5431      assign(arg_n, getDRegI64(nreg));
5432      switch(size) {
5433         case 1:
5434            dup = Iop_Dup16x4;
5435            get = Iop_GetElem16x4;
5436            index = mreg >> 3;
5437            mreg &= 7;
5438            break;
5439         case 2:
5440            dup = Iop_Dup32x2;
5441            get = Iop_GetElem32x2;
5442            index = mreg >> 4;
5443            mreg &= 0xf;
5444            break;
5445         case 0:
5446         case 3:
5447            return False;
5448         default:
5449            vassert(0);
5450      }
5451      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5452      switch (size) {
5453         case 0:
5454         case 3:
5455            return False;
5456         case 1:
5457            op = Iop_QDMulLong16Sx4;
5458            cmp = Iop_CmpEQ16x4;
5459            add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5460            op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5461            imm = 1LL << 15;
5462            imm = (imm << 16) | imm;
5463            imm = (imm << 32) | imm;
5464            break;
5465         case 2:
5466            op = Iop_QDMulLong32Sx2;
5467            cmp = Iop_CmpEQ32x2;
5468            add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5469            op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5470            imm = 1LL << 31;
5471            imm = (imm << 32) | imm;
5472            break;
5473         default:
5474            vassert(0);
5475      }
5476      res = newTemp(Ity_V128);
5477      tmp = newTemp(Ity_V128);
5478      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5479      assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5480      setFlag_QC(binop(Iop_And64,
5481                       binop(cmp, mkexpr(arg_n), mkU64(imm)),
5482                       binop(cmp, mkexpr(arg_m), mkU64(imm))),
5483                 mkU64(0),
5484                 False, condT);
5485      setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5486                 True, condT);
5487      putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5488      DIP("vqdml%cl.s%u q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
5489          dreg, nreg, mreg, index);
5490      return True;
5491   }
5492
5493   /* VMUL (by scalar)  */
5494   if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
5495      IRTemp res, arg_m, arg_n;
5496      IROp dup, get, op;
5497      if (Q) {
5498         if ((dreg & 1) || (nreg & 1))
5499            return False;
5500         dreg >>= 1;
5501         nreg >>= 1;
5502         res = newTemp(Ity_V128);
5503         arg_m = newTemp(Ity_V128);
5504         arg_n = newTemp(Ity_V128);
5505         assign(arg_n, getQReg(nreg));
5506         switch(size) {
5507            case 1:
5508               dup = Iop_Dup16x8;
5509               get = Iop_GetElem16x4;
5510               index = mreg >> 3;
5511               mreg &= 7;
5512               break;
5513            case 2:
5514               dup = Iop_Dup32x4;
5515               get = Iop_GetElem32x2;
5516               index = mreg >> 4;
5517               mreg &= 0xf;
5518               break;
5519            case 0:
5520            case 3:
5521               return False;
5522            default:
5523               vassert(0);
5524         }
5525         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5526      } else {
5527         res = newTemp(Ity_I64);
5528         arg_m = newTemp(Ity_I64);
5529         arg_n = newTemp(Ity_I64);
5530         assign(arg_n, getDRegI64(nreg));
5531         switch(size) {
5532            case 1:
5533               dup = Iop_Dup16x4;
5534               get = Iop_GetElem16x4;
5535               index = mreg >> 3;
5536               mreg &= 7;
5537               break;
5538            case 2:
5539               dup = Iop_Dup32x2;
5540               get = Iop_GetElem32x2;
5541               index = mreg >> 4;
5542               mreg &= 0xf;
5543               break;
5544            case 0:
5545            case 3:
5546               return False;
5547            default:
5548               vassert(0);
5549         }
5550         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5551      }
5552      if (INSN(8,8)) {
5553         switch (size) {
5554            case 2:
5555               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5556               break;
5557            case 0:
5558            case 1:
5559            case 3:
5560               return False;
5561            default:
5562               vassert(0);
5563         }
5564      } else {
5565         switch (size) {
5566            case 1:
5567               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5568               break;
5569            case 2:
5570               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5571               break;
5572            case 0:
5573            case 3:
5574               return False;
5575            default:
5576               vassert(0);
5577         }
5578      }
5579      assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5580      if (Q)
5581         putQReg(dreg, mkexpr(res), condT);
5582      else
5583         putDRegI64(dreg, mkexpr(res), condT);
5584      DIP("vmul.%c%u %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
5585          8 << size, Q ? 'q' : 'd', dreg,
5586          Q ? 'q' : 'd', nreg, mreg, index);
5587      return True;
5588   }
5589
5590   /* VMULL (scalar) */
5591   if (INSN(11,8) == BITS4(1,0,1,0)) {
5592      IRTemp res, arg_m, arg_n;
5593      IROp dup, get, op;
5594      if (dreg & 1)
5595         return False;
5596      dreg >>= 1;
5597      res = newTemp(Ity_V128);
5598      arg_m = newTemp(Ity_I64);
5599      arg_n = newTemp(Ity_I64);
5600      assign(arg_n, getDRegI64(nreg));
5601      switch(size) {
5602         case 1:
5603            dup = Iop_Dup16x4;
5604            get = Iop_GetElem16x4;
5605            index = mreg >> 3;
5606            mreg &= 7;
5607            break;
5608         case 2:
5609            dup = Iop_Dup32x2;
5610            get = Iop_GetElem32x2;
5611            index = mreg >> 4;
5612            mreg &= 0xf;
5613            break;
5614         case 0:
5615         case 3:
5616            return False;
5617         default:
5618            vassert(0);
5619      }
5620      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5621      switch (size) {
5622         case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
5623         case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
5624         case 0: case 3: return False;
5625         default: vassert(0);
5626      }
5627      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5628      putQReg(dreg, mkexpr(res), condT);
5629      DIP("vmull.%c%u q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
5630          nreg, mreg, index);
5631      return True;
5632   }
5633
5634   /* VQDMULL */
5635   if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
5636      IROp op ,op2, dup, get;
5637      ULong imm;
5638      IRTemp arg_m, arg_n;
5639      if (dreg & 1)
5640         return False;
5641      dreg >>= 1;
5642      arg_m = newTemp(Ity_I64);
5643      arg_n = newTemp(Ity_I64);
5644      assign(arg_n, getDRegI64(nreg));
5645      switch(size) {
5646         case 1:
5647            dup = Iop_Dup16x4;
5648            get = Iop_GetElem16x4;
5649            index = mreg >> 3;
5650            mreg &= 7;
5651            break;
5652         case 2:
5653            dup = Iop_Dup32x2;
5654            get = Iop_GetElem32x2;
5655            index = mreg >> 4;
5656            mreg &= 0xf;
5657            break;
5658         case 0:
5659         case 3:
5660            return False;
5661         default:
5662            vassert(0);
5663      }
5664      assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5665      switch (size) {
5666         case 0:
5667         case 3:
5668            return False;
5669         case 1:
5670            op = Iop_QDMulLong16Sx4;
5671            op2 = Iop_CmpEQ16x4;
5672            imm = 1LL << 15;
5673            imm = (imm << 16) | imm;
5674            imm = (imm << 32) | imm;
5675            break;
5676         case 2:
5677            op = Iop_QDMulLong32Sx2;
5678            op2 = Iop_CmpEQ32x2;
5679            imm = 1LL << 31;
5680            imm = (imm << 32) | imm;
5681            break;
5682         default:
5683            vassert(0);
5684      }
5685      putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5686            condT);
5687      setFlag_QC(binop(Iop_And64,
5688                       binop(op2, mkexpr(arg_n), mkU64(imm)),
5689                       binop(op2, mkexpr(arg_m), mkU64(imm))),
5690                 mkU64(0),
5691                 False, condT);
5692      DIP("vqdmull.s%u q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
5693          index);
5694      return True;
5695   }
5696
5697   /* VQDMULH */
5698   if (INSN(11,8) == BITS4(1,1,0,0)) {
5699      IROp op ,op2, dup, get;
5700      ULong imm;
5701      IRTemp res, arg_m, arg_n;
5702      if (Q) {
5703         if ((dreg & 1) || (nreg & 1))
5704            return False;
5705         dreg >>= 1;
5706         nreg >>= 1;
5707         res = newTemp(Ity_V128);
5708         arg_m = newTemp(Ity_V128);
5709         arg_n = newTemp(Ity_V128);
5710         assign(arg_n, getQReg(nreg));
5711         switch(size) {
5712            case 1:
5713               dup = Iop_Dup16x8;
5714               get = Iop_GetElem16x4;
5715               index = mreg >> 3;
5716               mreg &= 7;
5717               break;
5718            case 2:
5719               dup = Iop_Dup32x4;
5720               get = Iop_GetElem32x2;
5721               index = mreg >> 4;
5722               mreg &= 0xf;
5723               break;
5724            case 0:
5725            case 3:
5726               return False;
5727            default:
5728               vassert(0);
5729         }
5730         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5731      } else {
5732         res = newTemp(Ity_I64);
5733         arg_m = newTemp(Ity_I64);
5734         arg_n = newTemp(Ity_I64);
5735         assign(arg_n, getDRegI64(nreg));
5736         switch(size) {
5737            case 1:
5738               dup = Iop_Dup16x4;
5739               get = Iop_GetElem16x4;
5740               index = mreg >> 3;
5741               mreg &= 7;
5742               break;
5743            case 2:
5744               dup = Iop_Dup32x2;
5745               get = Iop_GetElem32x2;
5746               index = mreg >> 4;
5747               mreg &= 0xf;
5748               break;
5749            case 0:
5750            case 3:
5751               return False;
5752            default:
5753               vassert(0);
5754         }
5755         assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5756      }
5757      switch (size) {
5758         case 0:
5759         case 3:
5760            return False;
5761         case 1:
5762            op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
5763            op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5764            imm = 1LL << 15;
5765            imm = (imm << 16) | imm;
5766            imm = (imm << 32) | imm;
5767            break;
5768         case 2:
5769            op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
5770            op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5771            imm = 1LL << 31;
5772            imm = (imm << 32) | imm;
5773            break;
5774         default:
5775            vassert(0);
5776      }
5777      assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5778      setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5779                       binop(op2, mkexpr(arg_n),
5780                                  Q ? mkU128(imm) : mkU64(imm)),
5781                       binop(op2, mkexpr(arg_m),
5782                             Q ? mkU128(imm) : mkU64(imm))),
5783                 Q ? mkU128(0) : mkU64(0),
5784                 Q, condT);
5785      if (Q)
5786         putQReg(dreg, mkexpr(res), condT);
5787      else
5788         putDRegI64(dreg, mkexpr(res), condT);
5789      DIP("vqdmulh.s%u %c%u, %c%u, d%u[%u]\n",
5790          8 << size, Q ? 'q' : 'd', dreg,
5791          Q ? 'q' : 'd', nreg, mreg, index);
5792      return True;
5793   }
5794
5795   /* VQRDMULH (scalar) */
5796   if (INSN(11,