1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_arm_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2013 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39#include "ir_match.h"
40
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"  // for 32-bit SIMD helpers
45#include "host_arm_defs.h"
46
47
48/*---------------------------------------------------------*/
49/*--- ARMvfp control word stuff                         ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53   exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54   flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
55   this corresponds to a FPSCR value of zero.
56
57   fpscr should therefore be zero on entry to Vex-generated code, and
58   should be unchanged at exit.  (Or at least the bottom 28 bits
59   should be zero).
60*/
61
62#define DEFAULT_FPSCR 0
63
64
65/*---------------------------------------------------------*/
66/*--- ISelEnv                                           ---*/
67/*---------------------------------------------------------*/
68
69/* This carries around:
70
71   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72     might encounter.  This is computed before insn selection starts,
73     and does not change.
74
75   - A mapping from IRTemp to HReg.  This tells the insn selector
76     which virtual register(s) are associated with each IRTemp
77     temporary.  This is computed before insn selection starts, and
78     does not change.  We expect this mapping to map precisely the
79     same set of IRTemps as the type mapping does.
80
81        - vregmap   holds the primary register for the IRTemp.
82        - vregmapHI is only used for 64-bit integer-typed
83             IRTemps.  It holds the identity of a second
84             32-bit virtual HReg, which holds the high half
85             of the value.
86
87   - The code array, that is, the insns selected so far.
88
89   - A counter, for generating new virtual registers.
90
91   - The host hardware capabilities word.  This is set at the start
92     and does not change.
93
94   - A Bool for indicating whether we may generate chain-me
95     instructions for control flow transfers, or whether we must use
96     XAssisted.
97
98   - The maximum guest address of any guest insn in this block.
99     Actually, the address of the highest-addressed byte from any insn
100     in this block.  Is set at the start and does not change.  This is
101     used for detecting jumps which are definitely forward-edges from
102     this block, and therefore can be made (chained) to the fast entry
103     point of the destination, thereby avoiding the destination's
104     event check.
105
106   Note, this is all (well, mostly) host-independent.
107*/
108
109typedef
110   struct {
111      /* Constant -- are set at the start and do not change. */
112      IRTypeEnv*   type_env;
113
114      HReg*        vregmap;
115      HReg*        vregmapHI;
116      Int          n_vregmap;
117
118      UInt         hwcaps;
119
120      Bool         chainingAllowed;
121      Addr64       max_ga;
122
123      /* These are modified as we go along. */
124      HInstrArray* code;
125      Int          vreg_ctr;
126   }
127   ISelEnv;
128
129static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
130{
131   vassert(tmp >= 0);
132   vassert(tmp < env->n_vregmap);
133   return env->vregmap[tmp];
134}
135
136static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
137{
138   vassert(tmp >= 0);
139   vassert(tmp < env->n_vregmap);
140   vassert(! hregIsInvalid(env->vregmapHI[tmp]));
141   *vrLO = env->vregmap[tmp];
142   *vrHI = env->vregmapHI[tmp];
143}
144
145static void addInstr ( ISelEnv* env, ARMInstr* instr )
146{
147   addHInstr(env->code, instr);
148   if (vex_traceflags & VEX_TRACE_VCODE) {
149      ppARMInstr(instr);
150      vex_printf("\n");
151   }
152#if 0
153   if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
154         || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
155         || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
156      ppARMInstr(instr);
157      vex_printf("\n");
158   }
159#endif
160}
161
162static HReg newVRegI ( ISelEnv* env )
163{
164   HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
165   env->vreg_ctr++;
166   return reg;
167}
168
169static HReg newVRegD ( ISelEnv* env )
170{
171   HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
172   env->vreg_ctr++;
173   return reg;
174}
175
176static HReg newVRegF ( ISelEnv* env )
177{
178   HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
179   env->vreg_ctr++;
180   return reg;
181}
182
183static HReg newVRegV ( ISelEnv* env )
184{
185   HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
186   env->vreg_ctr++;
187   return reg;
188}
189
190/* These are duplicated in guest_arm_toIR.c */
191static IRExpr* unop ( IROp op, IRExpr* a )
192{
193   return IRExpr_Unop(op, a);
194}
195
196static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
197{
198   return IRExpr_Binop(op, a1, a2);
199}
200
201static IRExpr* bind ( Int binder )
202{
203   return IRExpr_Binder(binder);
204}
205
206
207/*---------------------------------------------------------*/
208/*--- ISEL: Forward declarations                        ---*/
209/*---------------------------------------------------------*/
210
211/* These are organised as iselXXX and iselXXX_wrk pairs.  The
212   iselXXX_wrk do the real work, but are not to be called directly.
213   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
214   checks that all returned registers are virtual.  You should not
215   call the _wrk version directly.
216*/
217static ARMAMode1*  iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
218static ARMAMode1*  iselIntExpr_AMode1     ( ISelEnv* env, IRExpr* e );
219
220static ARMAMode2*  iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
221static ARMAMode2*  iselIntExpr_AMode2     ( ISelEnv* env, IRExpr* e );
222
223static ARMAModeV*  iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
224static ARMAModeV*  iselIntExpr_AModeV     ( ISelEnv* env, IRExpr* e );
225
226static ARMAModeN*  iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
227static ARMAModeN*  iselIntExpr_AModeN     ( ISelEnv* env, IRExpr* e );
228
229static ARMRI84*    iselIntExpr_RI84_wrk
230        ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
231static ARMRI84*    iselIntExpr_RI84
232        ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
233
234static ARMRI5*     iselIntExpr_RI5_wrk    ( ISelEnv* env, IRExpr* e );
235static ARMRI5*     iselIntExpr_RI5        ( ISelEnv* env, IRExpr* e );
236
237static ARMCondCode iselCondCode_wrk       ( ISelEnv* env, IRExpr* e );
238static ARMCondCode iselCondCode           ( ISelEnv* env, IRExpr* e );
239
240static HReg        iselIntExpr_R_wrk      ( ISelEnv* env, IRExpr* e );
241static HReg        iselIntExpr_R          ( ISelEnv* env, IRExpr* e );
242
243static void        iselInt64Expr_wrk      ( HReg* rHi, HReg* rLo,
244                                            ISelEnv* env, IRExpr* e );
245static void        iselInt64Expr          ( HReg* rHi, HReg* rLo,
246                                            ISelEnv* env, IRExpr* e );
247
248static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
249static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
250
251static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
252static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
253
254static HReg        iselNeon64Expr_wrk     ( ISelEnv* env, IRExpr* e );
255static HReg        iselNeon64Expr         ( ISelEnv* env, IRExpr* e );
256
257static HReg        iselNeonExpr_wrk       ( ISelEnv* env, IRExpr* e );
258static HReg        iselNeonExpr           ( ISelEnv* env, IRExpr* e );
259
260/*---------------------------------------------------------*/
261/*--- ISEL: Misc helpers                                ---*/
262/*---------------------------------------------------------*/
263
264static UInt ROR32 ( UInt x, UInt sh ) {
265   vassert(sh >= 0 && sh < 32);
266   if (sh == 0)
267      return x;
268   else
269      return (x << (32-sh)) | (x >> sh);
270}
271
272/* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
273   form, and if so return the components. */
274static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
275{
276   UInt i;
277   for (i = 0; i < 16; i++) {
278      if (0 == (u & 0xFFFFFF00)) {
279         *u8 = u;
280         *u4 = i;
281         return True;
282      }
283      u = ROR32(u, 30);
284   }
285   vassert(i == 16);
286   return False;
287}
288
289/* Make a int reg-reg move. */
290static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
291{
292   vassert(hregClass(src) == HRcInt32);
293   vassert(hregClass(dst) == HRcInt32);
294   return ARMInstr_Mov(dst, ARMRI84_R(src));
295}
296
297/* Set the VFP unit's rounding mode to default (round to nearest). */
298static void set_VFP_rounding_default ( ISelEnv* env )
299{
300   /* mov rTmp, #DEFAULT_FPSCR
301      fmxr fpscr, rTmp
302   */
303   HReg rTmp = newVRegI(env);
304   addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
305   addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
306}
307
308/* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
309   expression denoting a value in the range 0 .. 3, indicating a round
310   mode encoded as per type IRRoundingMode.  Set FPSCR to have the
311   same rounding.
312*/
313static
314void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
315{
316   /* This isn't simple, because 'mode' carries an IR rounding
317      encoding, and we need to translate that to an ARMvfp one:
318      The IR encoding:
319         00  to nearest (the default)
320         10  to +infinity
321         01  to -infinity
322         11  to zero
323      The ARMvfp encoding:
324         00  to nearest
325         01  to +infinity
326         10  to -infinity
327         11  to zero
328      Easy enough to do; just swap the two bits.
329   */
330   HReg irrm = iselIntExpr_R(env, mode);
331   HReg tL   = newVRegI(env);
332   HReg tR   = newVRegI(env);
333   HReg t3   = newVRegI(env);
334   /* tL = irrm << 1;
335      tR = irrm >> 1;  if we're lucky, these will issue together
336      tL &= 2;
337      tR &= 1;         ditto
338      t3 = tL | tR;
339      t3 <<= 22;
340      fmxr fpscr, t3
341   */
342   addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
343   addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
344   addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
345   addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
346   addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
347   addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
348   addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
349}
350
351
352/*---------------------------------------------------------*/
353/*--- ISEL: Function call helpers                       ---*/
354/*---------------------------------------------------------*/
355
356/* Used only in doHelperCall.  See big comment in doHelperCall re
357   handling of register-parameter args.  This function figures out
358   whether evaluation of an expression might require use of a fixed
359   register.  If in doubt return True (safe but suboptimal).
360*/
361static
362Bool mightRequireFixedRegs ( IRExpr* e )
363{
364   if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
365      // These are always "safe" -- either a copy of r13(sp) in some
366      // arbitrary vreg, or a copy of r8, respectively.
367      return False;
368   }
369   /* Else it's a "normal" expression. */
370   switch (e->tag) {
371   case Iex_RdTmp: case Iex_Const: case Iex_Get:
372      return False;
373   default:
374      return True;
375   }
376}
377
378
379/* Do a complete function call.  |guard| is a Ity_Bit expression
380   indicating whether or not the call happens.  If guard==NULL, the
381   call is unconditional.  |retloc| is set to indicate where the
382   return value is after the call.  The caller (of this fn) must
383   generate code to add |stackAdjustAfterCall| to the stack pointer
384   after the call is done.  Returns True iff it managed to handle this
385   combination of arg/return types, else returns False. */
386
387static
388Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
389                    /*OUT*/RetLoc* retloc,
390                    ISelEnv* env,
391                    IRExpr* guard,
392                    IRCallee* cee, IRType retTy, IRExpr** args )
393{
394   ARMCondCode cc;
395   HReg        argregs[ARM_N_ARGREGS];
396   HReg        tmpregs[ARM_N_ARGREGS];
397   Bool        go_fast;
398   Int         n_args, i, nextArgReg;
399   ULong       target;
400
401   vassert(ARM_N_ARGREGS == 4);
402
403   /* Set default returns.  We'll update them later if needed. */
404   *stackAdjustAfterCall = 0;
405   *retloc               = mk_RetLoc_INVALID();
406
407   /* These are used for cross-checking that IR-level constraints on
408      the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
409   UInt nVECRETs = 0;
410   UInt nBBPTRs  = 0;
411
412   /* Marshal args for a call and do the call.
413
414      This function only deals with a tiny set of possibilities, which
415      cover all helpers in practice.  The restrictions are that only
416      arguments in registers are supported, hence only ARM_N_REGPARMS
417      x 32 integer bits in total can be passed.  In fact the only
418      supported arg types are I32 and I64.
419
420      The return type can be I{64,32} or V128.  In the V128 case, it
421      is expected that |args| will contain the special node
422      IRExpr_VECRET(), in which case this routine generates code to
423      allocate space on the stack for the vector return value.  Since
424      we are not passing any scalars on the stack, it is enough to
425      preallocate the return space before marshalling any arguments,
426      in this case.
427
428      |args| may also contain IRExpr_BBPTR(), in which case the
429      value in r8 is passed as the corresponding argument.
430
431      Generating code which is both efficient and correct when
432      parameters are to be passed in registers is difficult, for the
433      reasons elaborated in detail in comments attached to
434      doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
435      of the method described in those comments.
436
437      The problem is split into two cases: the fast scheme and the
438      slow scheme.  In the fast scheme, arguments are computed
439      directly into the target (real) registers.  This is only safe
440      when we can be sure that computation of each argument will not
441      trash any real registers set by computation of any other
442      argument.
443
444      In the slow scheme, all args are first computed into vregs, and
445      once they are all done, they are moved to the relevant real
446      regs.  This always gives correct code, but it also gives a bunch
447      of vreg-to-rreg moves which are usually redundant but are hard
448      for the register allocator to get rid of.
449
450      To decide which scheme to use, all argument expressions are
451      first examined.  If they are all so simple that it is clear they
452      will be evaluated without use of any fixed registers, use the
453      fast scheme, else use the slow scheme.  Note also that only
454      unconditional calls may use the fast scheme, since having to
455      compute a condition expression could itself trash real
456      registers.
457
458      Note this requires being able to examine an expression and
459      determine whether or not evaluation of it might use a fixed
460      register.  That requires knowledge of how the rest of this insn
461      selector works.  Currently just the following 3 are regarded as
462      safe -- hopefully they cover the majority of arguments in
463      practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
464   */
465
466   /* Note that the cee->regparms field is meaningless on ARM hosts
467      (since there is only one calling convention) and so we always
468      ignore it. */
469
470   n_args = 0;
471   for (i = 0; args[i]; i++) {
472      IRExpr* arg = args[i];
473      if (UNLIKELY(arg->tag == Iex_VECRET)) {
474         nVECRETs++;
475      } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
476         nBBPTRs++;
477      }
478      n_args++;
479   }
480
481   argregs[0] = hregARM_R0();
482   argregs[1] = hregARM_R1();
483   argregs[2] = hregARM_R2();
484   argregs[3] = hregARM_R3();
485
486   tmpregs[0] = tmpregs[1] = tmpregs[2] =
487   tmpregs[3] = INVALID_HREG;
488
489   /* First decide which scheme (slow or fast) is to be used.  First
490      assume the fast scheme, and select slow if any contraindications
491      (wow) appear. */
492
493   go_fast = True;
494
495   if (guard) {
496      if (guard->tag == Iex_Const
497          && guard->Iex.Const.con->tag == Ico_U1
498          && guard->Iex.Const.con->Ico.U1 == True) {
499         /* unconditional */
500      } else {
501         /* Not manifestly unconditional -- be conservative. */
502         go_fast = False;
503      }
504   }
505
506   if (go_fast) {
507      for (i = 0; i < n_args; i++) {
508         if (mightRequireFixedRegs(args[i])) {
509            go_fast = False;
510            break;
511         }
512      }
513   }
514
515   if (go_fast) {
516      if (retTy == Ity_V128 || retTy == Ity_V256)
517         go_fast = False;
518   }
519
520   /* At this point the scheme to use has been established.  Generate
521      code to get the arg values into the argument rregs.  If we run
522      out of arg regs, give up. */
523
524   if (go_fast) {
525
526      /* FAST SCHEME */
527      nextArgReg = 0;
528
529      for (i = 0; i < n_args; i++) {
530         IRExpr* arg = args[i];
531
532         IRType  aTy = Ity_INVALID;
533         if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
534            aTy = typeOfIRExpr(env->type_env, arg);
535
536         if (nextArgReg >= ARM_N_ARGREGS)
537            return False; /* out of argregs */
538
539         if (aTy == Ity_I32) {
540            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
541                                        iselIntExpr_R(env, arg) ));
542            nextArgReg++;
543         }
544         else if (aTy == Ity_I64) {
545            /* 64-bit args must be passed in an a reg-pair of the form
546               n:n+1, where n is even.  Hence either r0:r1 or r2:r3.
547               On a little-endian host, the less significant word is
548               passed in the lower-numbered register. */
549            if (nextArgReg & 1) {
550               if (nextArgReg >= ARM_N_ARGREGS)
551                  return False; /* out of argregs */
552               addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
553               nextArgReg++;
554            }
555            if (nextArgReg >= ARM_N_ARGREGS)
556               return False; /* out of argregs */
557            HReg raHi, raLo;
558            iselInt64Expr(&raHi, &raLo, env, arg);
559            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
560            nextArgReg++;
561            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
562            nextArgReg++;
563         }
564         else if (arg->tag == Iex_BBPTR) {
565            vassert(0); //ATC
566            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
567                                        hregARM_R8() ));
568            nextArgReg++;
569         }
570         else if (arg->tag == Iex_VECRET) {
571            // If this happens, it denotes ill-formed IR
572            vassert(0);
573         }
574         else
575            return False; /* unhandled arg type */
576      }
577
578      /* Fast scheme only applies for unconditional calls.  Hence: */
579      cc = ARMcc_AL;
580
581   } else {
582
583      /* SLOW SCHEME; move via temporaries */
584      nextArgReg = 0;
585
586      for (i = 0; i < n_args; i++) {
587         IRExpr* arg = args[i];
588
589         IRType  aTy = Ity_INVALID;
590         if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
591            aTy  = typeOfIRExpr(env->type_env, arg);
592
593         if (nextArgReg >= ARM_N_ARGREGS)
594            return False; /* out of argregs */
595
596         if (aTy == Ity_I32) {
597            tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
598            nextArgReg++;
599         }
600         else if (aTy == Ity_I64) {
601            /* Same comment applies as in the Fast-scheme case. */
602            if (nextArgReg & 1)
603               nextArgReg++;
604            if (nextArgReg + 1 >= ARM_N_ARGREGS)
605               return False; /* out of argregs */
606            HReg raHi, raLo;
607            iselInt64Expr(&raHi, &raLo, env, args[i]);
608            tmpregs[nextArgReg] = raLo;
609            nextArgReg++;
610            tmpregs[nextArgReg] = raHi;
611            nextArgReg++;
612         }
613         else if (arg->tag == Iex_BBPTR) {
614            vassert(0); //ATC
615            tmpregs[nextArgReg] = hregARM_R8();
616            nextArgReg++;
617         }
618         else if (arg->tag == Iex_VECRET) {
619            // If this happens, it denotes ill-formed IR
620            vassert(0);
621         }
622         else
623            return False; /* unhandled arg type */
624      }
625
626      /* Now we can compute the condition.  We can't do it earlier
627         because the argument computations could trash the condition
628         codes.  Be a bit clever to handle the common case where the
629         guard is 1:Bit. */
630      cc = ARMcc_AL;
631      if (guard) {
632         if (guard->tag == Iex_Const
633             && guard->Iex.Const.con->tag == Ico_U1
634             && guard->Iex.Const.con->Ico.U1 == True) {
635            /* unconditional -- do nothing */
636         } else {
637            cc = iselCondCode( env, guard );
638         }
639      }
640
641      /* Move the args to their final destinations. */
642      for (i = 0; i < nextArgReg; i++) {
643         if (hregIsInvalid(tmpregs[i])) { // Skip invalid regs
644            addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
645            continue;
646         }
647         /* None of these insns, including any spill code that might
648            be generated, may alter the condition codes. */
649         addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
650      }
651
652   }
653
654   /* Should be assured by checks above */
655   vassert(nextArgReg <= ARM_N_ARGREGS);
656
657   /* Do final checks, set the return values, and generate the call
658      instruction proper. */
659   vassert(nBBPTRs == 0 || nBBPTRs == 1);
660   vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
661   vassert(*stackAdjustAfterCall == 0);
662   vassert(is_RetLoc_INVALID(*retloc));
663   switch (retTy) {
664         case Ity_INVALID:
665            /* Function doesn't return a value. */
666            *retloc = mk_RetLoc_simple(RLPri_None);
667            break;
668         case Ity_I64:
669            *retloc = mk_RetLoc_simple(RLPri_2Int);
670            break;
671         case Ity_I32: case Ity_I16: case Ity_I8:
672            *retloc = mk_RetLoc_simple(RLPri_Int);
673            break;
674         case Ity_V128:
675            vassert(0); // ATC
676            *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
677            *stackAdjustAfterCall = 16;
678            break;
679         case Ity_V256:
680            vassert(0); // ATC
681            *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
682            *stackAdjustAfterCall = 32;
683            break;
684         default:
685            /* IR can denote other possible return types, but we don't
686               handle those here. */
687           vassert(0);
688   }
689
690   /* Finally, generate the call itself.  This needs the *retloc value
691      set in the switch above, which is why it's at the end. */
692
693   /* nextArgReg doles out argument registers.  Since these are
694      assigned in the order r0, r1, r2, r3, its numeric value at this
695      point, which must be between 0 and 4 inclusive, is going to be
696      equal to the number of arg regs in use for the call.  Hence bake
697      that number into the call (we'll need to know it when doing
698      register allocation, to know what regs the call reads.)
699
700      There is a bit of a twist -- harmless but worth recording.
701      Suppose the arg types are (Ity_I32, Ity_I64).  Then we will have
702      the first arg in r0 and the second in r3:r2, but r1 isn't used.
703      We nevertheless have nextArgReg==4 and bake that into the call
704      instruction.  This will mean the register allocator wil believe
705      this insn reads r1 when in fact it doesn't.  But that's
706      harmless; it just artificially extends the live range of r1
707      unnecessarily.  The best fix would be to put into the
708      instruction, a bitmask indicating which of r0/1/2/3 carry live
709      values.  But that's too much hassle. */
710
711   target = (HWord)Ptr_to_ULong(cee->addr);
712   addInstr(env, ARMInstr_Call( cc, target, nextArgReg, *retloc ));
713
714   return True; /* success */
715}
716
717
718/*---------------------------------------------------------*/
719/*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
720/*---------------------------------------------------------*/
721
722/* Select insns for an integer-typed expression, and add them to the
723   code list.  Return a reg holding the result.  This reg will be a
724   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
725   want to modify it, ask for a new vreg, copy it in there, and modify
726   the copy.  The register allocator will do its best to map both
727   vregs to the same real register, so the copies will often disappear
728   later in the game.
729
730   This should handle expressions of 32, 16 and 8-bit type.  All
731   results are returned in a 32-bit register.  For 16- and 8-bit
732   expressions, the upper 16/24 bits are arbitrary, so you should mask
733   or sign extend partial values if necessary.
734*/
735
736/* --------------------- AMode1 --------------------- */
737
738/* Return an AMode1 which computes the value of the specified
739   expression, possibly also adding insns to the code list as a
740   result.  The expression may only be a 32-bit one.
741*/
742
743static Bool sane_AMode1 ( ARMAMode1* am )
744{
745   switch (am->tag) {
746      case ARMam1_RI:
747         return
748            toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
749                    && (hregIsVirtual(am->ARMam1.RI.reg)
750                        || sameHReg(am->ARMam1.RI.reg, hregARM_R8()))
751                    && am->ARMam1.RI.simm13 >= -4095
752                    && am->ARMam1.RI.simm13 <= 4095 );
753      case ARMam1_RRS:
754         return
755            toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
756                    && hregIsVirtual(am->ARMam1.RRS.base)
757                    && hregClass(am->ARMam1.RRS.index) == HRcInt32
758                    && hregIsVirtual(am->ARMam1.RRS.index)
759                    && am->ARMam1.RRS.shift >= 0
760                    && am->ARMam1.RRS.shift <= 3 );
761      default:
762         vpanic("sane_AMode: unknown ARM AMode1 tag");
763   }
764}
765
766static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
767{
768   ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
769   vassert(sane_AMode1(am));
770   return am;
771}
772
773static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
774{
775   IRType ty = typeOfIRExpr(env->type_env,e);
776   vassert(ty == Ity_I32);
777
778   /* FIXME: add RRS matching */
779
780   /* {Add32,Sub32}(expr,simm13) */
781   if (e->tag == Iex_Binop
782       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
783       && e->Iex.Binop.arg2->tag == Iex_Const
784       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
785      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
786      if (simm >= -4095 && simm <= 4095) {
787         HReg reg;
788         if (e->Iex.Binop.op == Iop_Sub32)
789            simm = -simm;
790         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
791         return ARMAMode1_RI(reg, simm);
792      }
793   }
794
795   /* Doesn't match anything in particular.  Generate it into
796      a register and use that. */
797   {
798      HReg reg = iselIntExpr_R(env, e);
799      return ARMAMode1_RI(reg, 0);
800   }
801
802}
803
804
805/* --------------------- AMode2 --------------------- */
806
807/* Return an AMode2 which computes the value of the specified
808   expression, possibly also adding insns to the code list as a
809   result.  The expression may only be a 32-bit one.
810*/
811
812static Bool sane_AMode2 ( ARMAMode2* am )
813{
814   switch (am->tag) {
815      case ARMam2_RI:
816         return
817            toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
818                    && hregIsVirtual(am->ARMam2.RI.reg)
819                    && am->ARMam2.RI.simm9 >= -255
820                    && am->ARMam2.RI.simm9 <= 255 );
821      case ARMam2_RR:
822         return
823            toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
824                    && hregIsVirtual(am->ARMam2.RR.base)
825                    && hregClass(am->ARMam2.RR.index) == HRcInt32
826                    && hregIsVirtual(am->ARMam2.RR.index) );
827      default:
828         vpanic("sane_AMode: unknown ARM AMode2 tag");
829   }
830}
831
832static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
833{
834   ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
835   vassert(sane_AMode2(am));
836   return am;
837}
838
839static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
840{
841   IRType ty = typeOfIRExpr(env->type_env,e);
842   vassert(ty == Ity_I32);
843
844   /* FIXME: add RR matching */
845
846   /* {Add32,Sub32}(expr,simm8) */
847   if (e->tag == Iex_Binop
848       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
849       && e->Iex.Binop.arg2->tag == Iex_Const
850       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
851      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
852      if (simm >= -255 && simm <= 255) {
853         HReg reg;
854         if (e->Iex.Binop.op == Iop_Sub32)
855            simm = -simm;
856         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
857         return ARMAMode2_RI(reg, simm);
858      }
859   }
860
861   /* Doesn't match anything in particular.  Generate it into
862      a register and use that. */
863   {
864      HReg reg = iselIntExpr_R(env, e);
865      return ARMAMode2_RI(reg, 0);
866   }
867
868}
869
870
871/* --------------------- AModeV --------------------- */
872
873/* Return an AModeV which computes the value of the specified
874   expression, possibly also adding insns to the code list as a
875   result.  The expression may only be a 32-bit one.
876*/
877
878static Bool sane_AModeV ( ARMAModeV* am )
879{
880  return toBool( hregClass(am->reg) == HRcInt32
881                 && hregIsVirtual(am->reg)
882                 && am->simm11 >= -1020 && am->simm11 <= 1020
883                 && 0 == (am->simm11 & 3) );
884}
885
886static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
887{
888   ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
889   vassert(sane_AModeV(am));
890   return am;
891}
892
893static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
894{
895   IRType ty = typeOfIRExpr(env->type_env,e);
896   vassert(ty == Ity_I32);
897
898   /* {Add32,Sub32}(expr, simm8 << 2) */
899   if (e->tag == Iex_Binop
900       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
901       && e->Iex.Binop.arg2->tag == Iex_Const
902       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
903      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
904      if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
905         HReg reg;
906         if (e->Iex.Binop.op == Iop_Sub32)
907            simm = -simm;
908         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
909         return mkARMAModeV(reg, simm);
910      }
911   }
912
913   /* Doesn't match anything in particular.  Generate it into
914      a register and use that. */
915   {
916      HReg reg = iselIntExpr_R(env, e);
917      return mkARMAModeV(reg, 0);
918   }
919
920}
921
922/* -------------------- AModeN -------------------- */
923
924static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
925{
926   return iselIntExpr_AModeN_wrk(env, e);
927}
928
929static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
930{
931   HReg reg = iselIntExpr_R(env, e);
932   return mkARMAModeN_R(reg);
933}
934
935
936/* --------------------- RI84 --------------------- */
937
938/* Select instructions to generate 'e' into a RI84.  If mayInv is
939   true, then the caller will also accept an I84 form that denotes
940   'not e'.  In this case didInv may not be NULL, and *didInv is set
941   to True.  This complication is so as to allow generation of an RI84
942   which is suitable for use in either an AND or BIC instruction,
943   without knowing (before this call) which one.
944*/
945static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
946                                   ISelEnv* env, IRExpr* e )
947{
948   ARMRI84* ri;
949   if (mayInv)
950      vassert(didInv != NULL);
951   ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
952   /* sanity checks ... */
953   switch (ri->tag) {
954      case ARMri84_I84:
955         return ri;
956      case ARMri84_R:
957         vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
958         vassert(hregIsVirtual(ri->ARMri84.R.reg));
959         return ri;
960      default:
961         vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
962   }
963}
964
965/* DO NOT CALL THIS DIRECTLY ! */
966static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
967                                       ISelEnv* env, IRExpr* e )
968{
969   IRType ty = typeOfIRExpr(env->type_env,e);
970   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
971
972   if (didInv) *didInv = False;
973
974   /* special case: immediate */
975   if (e->tag == Iex_Const) {
976      UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
977      switch (e->Iex.Const.con->tag) {
978         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
979         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
980         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
981         default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
982      }
983      if (fitsIn8x4(&u8, &u4, u)) {
984         return ARMRI84_I84( (UShort)u8, (UShort)u4 );
985      }
986      if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
987         vassert(didInv);
988         *didInv = True;
989         return ARMRI84_I84( (UShort)u8, (UShort)u4 );
990      }
991      /* else fail, fall through to default case */
992   }
993
994   /* default case: calculate into a register and return that */
995   {
996      HReg r = iselIntExpr_R ( env, e );
997      return ARMRI84_R(r);
998   }
999}
1000
1001
1002/* --------------------- RI5 --------------------- */
1003
1004/* Select instructions to generate 'e' into a RI5. */
1005
1006static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
1007{
1008   ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
1009   /* sanity checks ... */
1010   switch (ri->tag) {
1011      case ARMri5_I5:
1012         return ri;
1013      case ARMri5_R:
1014         vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
1015         vassert(hregIsVirtual(ri->ARMri5.R.reg));
1016         return ri;
1017      default:
1018         vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
1019   }
1020}
1021
1022/* DO NOT CALL THIS DIRECTLY ! */
1023static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
1024{
1025   IRType ty = typeOfIRExpr(env->type_env,e);
1026   vassert(ty == Ity_I32 || ty == Ity_I8);
1027
1028   /* special case: immediate */
1029   if (e->tag == Iex_Const) {
1030      UInt u; /* both invalid */
1031      switch (e->Iex.Const.con->tag) {
1032         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1033         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1034         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1035         default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
1036      }
1037      if (u >= 1 && u <= 31) {
1038         return ARMRI5_I5(u);
1039      }
1040      /* else fail, fall through to default case */
1041   }
1042
1043   /* default case: calculate into a register and return that */
1044   {
1045      HReg r = iselIntExpr_R ( env, e );
1046      return ARMRI5_R(r);
1047   }
1048}
1049
1050
1051/* ------------------- CondCode ------------------- */
1052
1053/* Generate code to evaluated a bit-typed expression, returning the
1054   condition code which would correspond when the expression would
1055   notionally have returned 1. */
1056
1057static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1058{
1059   ARMCondCode cc = iselCondCode_wrk(env,e);
1060   vassert(cc != ARMcc_NV);
1061   return cc;
1062}
1063
1064static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1065{
1066   vassert(e);
1067   vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1068
1069   /* var */
1070   if (e->tag == Iex_RdTmp) {
1071      HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1072      /* CmpOrTst doesn't modify rTmp; so this is OK. */
1073      ARMRI84* one  = ARMRI84_I84(1,0);
1074      addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1075      return ARMcc_NE;
1076   }
1077
1078   /* Not1(e) */
1079   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1080      /* Generate code for the arg, and negate the test condition */
1081      return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1082   }
1083
1084   /* --- patterns rooted at: 32to1 --- */
1085
1086   if (e->tag == Iex_Unop
1087       && e->Iex.Unop.op == Iop_32to1) {
1088      HReg     rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1089      ARMRI84* one  = ARMRI84_I84(1,0);
1090      addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1091      return ARMcc_NE;
1092   }
1093
1094   /* --- patterns rooted at: CmpNEZ8 --- */
1095
1096   if (e->tag == Iex_Unop
1097       && e->Iex.Unop.op == Iop_CmpNEZ8) {
1098      HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1099      ARMRI84* xFF  = ARMRI84_I84(0xFF,0);
1100      addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1101      return ARMcc_NE;
1102   }
1103
1104   /* --- patterns rooted at: CmpNEZ32 --- */
1105
1106   if (e->tag == Iex_Unop
1107       && e->Iex.Unop.op == Iop_CmpNEZ32) {
1108      HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1109      ARMRI84* zero = ARMRI84_I84(0,0);
1110      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1111      return ARMcc_NE;
1112   }
1113
1114   /* --- patterns rooted at: CmpNEZ64 --- */
1115
1116   if (e->tag == Iex_Unop
1117       && e->Iex.Unop.op == Iop_CmpNEZ64) {
1118      HReg     tHi, tLo;
1119      HReg     tmp  = newVRegI(env);
1120      ARMRI84* zero = ARMRI84_I84(0,0);
1121      iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1122      addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1123      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1124      return ARMcc_NE;
1125   }
1126
1127   /* --- Cmp*32*(x,y) --- */
1128   if (e->tag == Iex_Binop
1129       && (e->Iex.Binop.op == Iop_CmpEQ32
1130           || e->Iex.Binop.op == Iop_CmpNE32
1131           || e->Iex.Binop.op == Iop_CmpLT32S
1132           || e->Iex.Binop.op == Iop_CmpLT32U
1133           || e->Iex.Binop.op == Iop_CmpLE32S
1134           || e->Iex.Binop.op == Iop_CmpLE32U)) {
1135      HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1136      ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1137                                       env, e->Iex.Binop.arg2);
1138      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1139      switch (e->Iex.Binop.op) {
1140         case Iop_CmpEQ32:  return ARMcc_EQ;
1141         case Iop_CmpNE32:  return ARMcc_NE;
1142         case Iop_CmpLT32S: return ARMcc_LT;
1143         case Iop_CmpLT32U: return ARMcc_LO;
1144         case Iop_CmpLE32S: return ARMcc_LE;
1145         case Iop_CmpLE32U: return ARMcc_LS;
1146         default: vpanic("iselCondCode(arm): CmpXX32");
1147      }
1148   }
1149
1150   /* const */
1151   /* Constant 1:Bit */
1152   if (e->tag == Iex_Const) {
1153      HReg r;
1154      vassert(e->Iex.Const.con->tag == Ico_U1);
1155      vassert(e->Iex.Const.con->Ico.U1 == True
1156              || e->Iex.Const.con->Ico.U1 == False);
1157      r = newVRegI(env);
1158      addInstr(env, ARMInstr_Imm32(r, 0));
1159      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1160      return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1161   }
1162
1163   // JRS 2013-Jan-03: this seems completely nonsensical
1164   /* --- CasCmpEQ* --- */
1165   /* Ist_Cas has a dummy argument to compare with, so comparison is
1166      always true. */
1167   //if (e->tag == Iex_Binop
1168   //    && (e->Iex.Binop.op == Iop_CasCmpEQ32
1169   //        || e->Iex.Binop.op == Iop_CasCmpEQ16
1170   //        || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1171   //   return ARMcc_AL;
1172   //}
1173
1174   ppIRExpr(e);
1175   vpanic("iselCondCode");
1176}
1177
1178
1179/* --------------------- Reg --------------------- */
1180
1181static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1182{
1183   HReg r = iselIntExpr_R_wrk(env, e);
1184   /* sanity checks ... */
1185#  if 0
1186   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1187#  endif
1188   vassert(hregClass(r) == HRcInt32);
1189   vassert(hregIsVirtual(r));
1190   return r;
1191}
1192
1193/* DO NOT CALL THIS DIRECTLY ! */
1194static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1195{
1196   IRType ty = typeOfIRExpr(env->type_env,e);
1197   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1198
1199   switch (e->tag) {
1200
1201   /* --------- TEMP --------- */
1202   case Iex_RdTmp: {
1203      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1204   }
1205
1206   /* --------- LOAD --------- */
1207   case Iex_Load: {
1208      HReg dst  = newVRegI(env);
1209
1210      if (e->Iex.Load.end != Iend_LE)
1211         goto irreducible;
1212
1213      if (ty == Ity_I32) {
1214         ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1215         addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dst, amode));
1216         return dst;
1217      }
1218      if (ty == Ity_I16) {
1219         ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1220         addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
1221                                       True/*isLoad*/, False/*!signedLoad*/,
1222                                       dst, amode));
1223         return dst;
1224      }
1225      if (ty == Ity_I8) {
1226         ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1227         addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dst, amode));
1228         return dst;
1229      }
1230      break;
1231   }
1232
1233//zz   /* --------- TERNARY OP --------- */
1234//zz   case Iex_Triop: {
1235//zz      IRTriop *triop = e->Iex.Triop.details;
1236//zz      /* C3210 flags following FPU partial remainder (fprem), both
1237//zz         IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1238//zz      if (triop->op == Iop_PRemC3210F64
1239//zz          || triop->op == Iop_PRem1C3210F64) {
1240//zz         HReg junk = newVRegF(env);
1241//zz         HReg dst  = newVRegI(env);
1242//zz         HReg srcL = iselDblExpr(env, triop->arg2);
1243//zz         HReg srcR = iselDblExpr(env, triop->arg3);
1244//zz         /* XXXROUNDINGFIXME */
1245//zz         /* set roundingmode here */
1246//zz         addInstr(env, X86Instr_FpBinary(
1247//zz                           e->Iex.Binop.op==Iop_PRemC3210F64
1248//zz                              ? Xfp_PREM : Xfp_PREM1,
1249//zz                           srcL,srcR,junk
1250//zz                 ));
1251//zz         /* The previous pseudo-insn will have left the FPU's C3210
1252//zz            flags set correctly.  So bag them. */
1253//zz         addInstr(env, X86Instr_FpStSW_AX());
1254//zz         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1255//zz         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1256//zz         return dst;
1257//zz      }
1258//zz
1259//zz      break;
1260//zz   }
1261
1262   /* --------- BINARY OP --------- */
1263   case Iex_Binop: {
1264
1265      ARMAluOp   aop = 0; /* invalid */
1266      ARMShiftOp sop = 0; /* invalid */
1267
1268      /* ADD/SUB/AND/OR/XOR */
1269      switch (e->Iex.Binop.op) {
1270         case Iop_And32: {
1271            Bool     didInv = False;
1272            HReg     dst    = newVRegI(env);
1273            HReg     argL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1274            ARMRI84* argR   = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1275                                               env, e->Iex.Binop.arg2);
1276            addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1277                                       dst, argL, argR));
1278            return dst;
1279         }
1280         case Iop_Or32:  aop = ARMalu_OR;  goto std_binop;
1281         case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1282         case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1283         case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1284         std_binop: {
1285            HReg     dst  = newVRegI(env);
1286            HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1287            ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1288                                             env, e->Iex.Binop.arg2);
1289            addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1290            return dst;
1291         }
1292         default: break;
1293      }
1294
1295      /* SHL/SHR/SAR */
1296      switch (e->Iex.Binop.op) {
1297         case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1298         case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1299         case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1300         sh_binop: {
1301            HReg    dst  = newVRegI(env);
1302            HReg    argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1303            ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1304            addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1305            vassert(ty == Ity_I32); /* else the IR is ill-typed */
1306            return dst;
1307         }
1308         default: break;
1309      }
1310
1311      /* MUL */
1312      if (e->Iex.Binop.op == Iop_Mul32) {
1313         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1314         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1315         HReg dst  = newVRegI(env);
1316         addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1317         addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1318         addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1319         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1320         return dst;
1321      }
1322
1323      /* Handle misc other ops. */
1324
1325      if (e->Iex.Binop.op == Iop_Max32U) {
1326         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1327         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1328         HReg dst  = newVRegI(env);
1329         addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1330                                         ARMRI84_R(argR)));
1331         addInstr(env, mk_iMOVds_RR(dst, argL));
1332         addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1333         return dst;
1334      }
1335
1336      if (e->Iex.Binop.op == Iop_CmpF64) {
1337         HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1338         HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1339         HReg dst = newVRegI(env);
1340         /* Do the compare (FCMPD) and set NZCV in FPSCR.  Then also do
1341            FMSTAT, so we can examine the results directly. */
1342         addInstr(env, ARMInstr_VCmpD(dL, dR));
1343         /* Create in dst, the IRCmpF64Result encoded result. */
1344         addInstr(env, ARMInstr_Imm32(dst, 0));
1345         addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1346         addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1347         addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1348         addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1349         return dst;
1350      }
1351
1352      if (e->Iex.Binop.op == Iop_F64toI32S
1353          || e->Iex.Binop.op == Iop_F64toI32U) {
1354         /* Wretched uglyness all round, due to having to deal
1355            with rounding modes.  Oh well. */
1356         /* FIXME: if arg1 is a constant indicating round-to-zero,
1357            then we could skip all this arsing around with FPSCR and
1358            simply emit FTO{S,U}IZD. */
1359         Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1360         HReg valD  = iselDblExpr(env, e->Iex.Binop.arg2);
1361         set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1362         /* FTO{S,U}ID valF, valD */
1363         HReg valF = newVRegF(env);
1364         addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1365                                       valF, valD));
1366         set_VFP_rounding_default(env);
1367         /* VMOV dst, valF */
1368         HReg dst = newVRegI(env);
1369         addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1370         return dst;
1371      }
1372
1373      if (e->Iex.Binop.op == Iop_GetElem8x8
1374          || e->Iex.Binop.op == Iop_GetElem16x4
1375          || e->Iex.Binop.op == Iop_GetElem32x2) {
1376         HReg res = newVRegI(env);
1377         HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1378         UInt index, size;
1379         if (e->Iex.Binop.arg2->tag != Iex_Const ||
1380             typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1381            vpanic("ARM target supports GetElem with constant "
1382                   "second argument only\n");
1383         }
1384         index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1385         switch (e->Iex.Binop.op) {
1386            case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1387            case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1388            case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1389            default: vassert(0);
1390         }
1391         addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1392                                        mkARMNRS(ARMNRS_Reg, res, 0),
1393                                        mkARMNRS(ARMNRS_Scalar, arg, index),
1394                                        size, False));
1395         return res;
1396      }
1397
1398      if (e->Iex.Binop.op == Iop_GetElem8x16
1399          || e->Iex.Binop.op == Iop_GetElem16x8
1400          || e->Iex.Binop.op == Iop_GetElem32x4) {
1401         HReg res = newVRegI(env);
1402         HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1403         UInt index, size;
1404         if (e->Iex.Binop.arg2->tag != Iex_Const ||
1405             typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1406            vpanic("ARM target supports GetElem with constant "
1407                   "second argument only\n");
1408         }
1409         index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1410         switch (e->Iex.Binop.op) {
1411            case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1412            case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1413            case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1414            default: vassert(0);
1415         }
1416         addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1417                                        mkARMNRS(ARMNRS_Reg, res, 0),
1418                                        mkARMNRS(ARMNRS_Scalar, arg, index),
1419                                        size, True));
1420         return res;
1421      }
1422
1423      /* All cases involving host-side helper calls. */
1424      void* fn = NULL;
1425      switch (e->Iex.Binop.op) {
1426         case Iop_Add16x2:
1427            fn = &h_generic_calc_Add16x2; break;
1428         case Iop_Sub16x2:
1429            fn = &h_generic_calc_Sub16x2; break;
1430         case Iop_HAdd16Ux2:
1431            fn = &h_generic_calc_HAdd16Ux2; break;
1432         case Iop_HAdd16Sx2:
1433            fn = &h_generic_calc_HAdd16Sx2; break;
1434         case Iop_HSub16Ux2:
1435            fn = &h_generic_calc_HSub16Ux2; break;
1436         case Iop_HSub16Sx2:
1437            fn = &h_generic_calc_HSub16Sx2; break;
1438         case Iop_QAdd16Sx2:
1439            fn = &h_generic_calc_QAdd16Sx2; break;
1440         case Iop_QAdd16Ux2:
1441            fn = &h_generic_calc_QAdd16Ux2; break;
1442         case Iop_QSub16Sx2:
1443            fn = &h_generic_calc_QSub16Sx2; break;
1444         case Iop_Add8x4:
1445            fn = &h_generic_calc_Add8x4; break;
1446         case Iop_Sub8x4:
1447            fn = &h_generic_calc_Sub8x4; break;
1448         case Iop_HAdd8Ux4:
1449            fn = &h_generic_calc_HAdd8Ux4; break;
1450         case Iop_HAdd8Sx4:
1451            fn = &h_generic_calc_HAdd8Sx4; break;
1452         case Iop_HSub8Ux4:
1453            fn = &h_generic_calc_HSub8Ux4; break;
1454         case Iop_HSub8Sx4:
1455            fn = &h_generic_calc_HSub8Sx4; break;
1456         case Iop_QAdd8Sx4:
1457            fn = &h_generic_calc_QAdd8Sx4; break;
1458         case Iop_QAdd8Ux4:
1459            fn = &h_generic_calc_QAdd8Ux4; break;
1460         case Iop_QSub8Sx4:
1461            fn = &h_generic_calc_QSub8Sx4; break;
1462         case Iop_QSub8Ux4:
1463            fn = &h_generic_calc_QSub8Ux4; break;
1464         case Iop_Sad8Ux4:
1465            fn = &h_generic_calc_Sad8Ux4; break;
1466         case Iop_QAdd32S:
1467            fn = &h_generic_calc_QAdd32S; break;
1468         case Iop_QSub32S:
1469            fn = &h_generic_calc_QSub32S; break;
1470         case Iop_QSub16Ux2:
1471            fn = &h_generic_calc_QSub16Ux2; break;
1472         case Iop_DivU32:
1473            fn = &h_calc_udiv32_w_arm_semantics; break;
1474         case Iop_DivS32:
1475            fn = &h_calc_sdiv32_w_arm_semantics; break;
1476         default:
1477            break;
1478      }
1479
1480      if (fn) {
1481         HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1482         HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1483         HReg res  = newVRegI(env);
1484         addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1485         addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1486         addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
1487                                      2, mk_RetLoc_simple(RLPri_Int) ));
1488         addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1489         return res;
1490      }
1491
1492      break;
1493   }
1494
1495   /* --------- UNARY OP --------- */
1496   case Iex_Unop: {
1497
1498//zz      /* 1Uto8(32to1(expr32)) */
1499//zz      if (e->Iex.Unop.op == Iop_1Uto8) {
1500//zz         DECLARE_PATTERN(p_32to1_then_1Uto8);
1501//zz         DEFINE_PATTERN(p_32to1_then_1Uto8,
1502//zz                        unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1503//zz         if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1504//zz            IRExpr* expr32 = mi.bindee[0];
1505//zz            HReg dst = newVRegI(env);
1506//zz            HReg src = iselIntExpr_R(env, expr32);
1507//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1508//zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1509//zz                                          X86RMI_Imm(1), dst));
1510//zz            return dst;
1511//zz         }
1512//zz      }
1513//zz
1514//zz      /* 8Uto32(LDle(expr32)) */
1515//zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1516//zz         DECLARE_PATTERN(p_LDle8_then_8Uto32);
1517//zz         DEFINE_PATTERN(p_LDle8_then_8Uto32,
1518//zz                        unop(Iop_8Uto32,
1519//zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1520//zz         if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1521//zz            HReg dst = newVRegI(env);
1522//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1523//zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1524//zz            return dst;
1525//zz         }
1526//zz      }
1527//zz
1528//zz      /* 8Sto32(LDle(expr32)) */
1529//zz      if (e->Iex.Unop.op == Iop_8Sto32) {
1530//zz         DECLARE_PATTERN(p_LDle8_then_8Sto32);
1531//zz         DEFINE_PATTERN(p_LDle8_then_8Sto32,
1532//zz                        unop(Iop_8Sto32,
1533//zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1534//zz         if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1535//zz            HReg dst = newVRegI(env);
1536//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1537//zz            addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1538//zz            return dst;
1539//zz         }
1540//zz      }
1541//zz
1542//zz      /* 16Uto32(LDle(expr32)) */
1543//zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1544//zz         DECLARE_PATTERN(p_LDle16_then_16Uto32);
1545//zz         DEFINE_PATTERN(p_LDle16_then_16Uto32,
1546//zz                        unop(Iop_16Uto32,
1547//zz                             IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1548//zz         if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1549//zz            HReg dst = newVRegI(env);
1550//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1551//zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1552//zz            return dst;
1553//zz         }
1554//zz      }
1555//zz
1556//zz      /* 8Uto32(GET:I8) */
1557//zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1558//zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1559//zz            HReg      dst;
1560//zz            X86AMode* amode;
1561//zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1562//zz            dst = newVRegI(env);
1563//zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1564//zz                                hregX86_EBP());
1565//zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1566//zz            return dst;
1567//zz         }
1568//zz      }
1569//zz
1570//zz      /* 16to32(GET:I16) */
1571//zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1572//zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1573//zz            HReg      dst;
1574//zz            X86AMode* amode;
1575//zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1576//zz            dst = newVRegI(env);
1577//zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1578//zz                                hregX86_EBP());
1579//zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1580//zz            return dst;
1581//zz         }
1582//zz      }
1583
1584      switch (e->Iex.Unop.op) {
1585         case Iop_8Uto32: {
1586            HReg dst = newVRegI(env);
1587            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1588            addInstr(env, ARMInstr_Alu(ARMalu_AND,
1589                                       dst, src, ARMRI84_I84(0xFF,0)));
1590            return dst;
1591         }
1592//zz         case Iop_8Uto16:
1593//zz         case Iop_8Uto32:
1594//zz         case Iop_16Uto32: {
1595//zz            HReg dst = newVRegI(env);
1596//zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1597//zz            UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1598//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1599//zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1600//zz                                          X86RMI_Imm(mask), dst));
1601//zz            return dst;
1602//zz         }
1603//zz         case Iop_8Sto16:
1604//zz         case Iop_8Sto32:
1605         case Iop_16Uto32: {
1606            HReg dst = newVRegI(env);
1607            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1608            ARMRI5* amt = ARMRI5_I5(16);
1609            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1610            addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1611            return dst;
1612         }
1613         case Iop_8Sto32:
1614         case Iop_16Sto32: {
1615            HReg dst = newVRegI(env);
1616            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1617            ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1618            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1619            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1620            return dst;
1621         }
1622//zz         case Iop_Not8:
1623//zz         case Iop_Not16:
1624         case Iop_Not32: {
1625            HReg dst = newVRegI(env);
1626            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1627            addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1628            return dst;
1629         }
1630         case Iop_64HIto32: {
1631            HReg rHi, rLo;
1632            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1633            return rHi; /* and abandon rLo .. poor wee thing :-) */
1634         }
1635         case Iop_64to32: {
1636            HReg rHi, rLo;
1637            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1638            return rLo; /* similar stupid comment to the above ... */
1639         }
1640         case Iop_64to8: {
1641            HReg rHi, rLo;
1642            if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1643               HReg tHi = newVRegI(env);
1644               HReg tLo = newVRegI(env);
1645               HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1646               addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1647               rHi = tHi;
1648               rLo = tLo;
1649            } else {
1650               iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1651            }
1652            return rLo;
1653         }
1654
1655         case Iop_1Uto32:
1656            /* 1Uto32(tmp).  Since I1 values generated into registers
1657               are guaranteed to have value either only zero or one,
1658               we can simply return the value of the register in this
1659               case. */
1660            if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1661               HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1662               return dst;
1663            }
1664            /* else fall through */
1665         case Iop_1Uto8: {
1666            HReg        dst  = newVRegI(env);
1667            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1668            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1669            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1670            return dst;
1671         }
1672
1673         case Iop_1Sto32: {
1674            HReg        dst  = newVRegI(env);
1675            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1676            ARMRI5*     amt  = ARMRI5_I5(31);
1677            /* This is really rough.  We could do much better here;
1678               perhaps mvn{cond} dst, #0 as the second insn?
1679               (same applies to 1Sto64) */
1680            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1681            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1682            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1683            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1684            return dst;
1685         }
1686
1687
1688//zz         case Iop_1Sto8:
1689//zz         case Iop_1Sto16:
1690//zz         case Iop_1Sto32: {
1691//zz            /* could do better than this, but for now ... */
1692//zz            HReg dst         = newVRegI(env);
1693//zz            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1694//zz            addInstr(env, X86Instr_Set32(cond,dst));
1695//zz            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1696//zz            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1697//zz            return dst;
1698//zz         }
1699//zz         case Iop_Ctz32: {
1700//zz            /* Count trailing zeroes, implemented by x86 'bsfl' */
1701//zz            HReg dst = newVRegI(env);
1702//zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1703//zz            addInstr(env, X86Instr_Bsfr32(True,src,dst));
1704//zz            return dst;
1705//zz         }
1706         case Iop_Clz32: {
1707            /* Count leading zeroes; easy on ARM. */
1708            HReg dst = newVRegI(env);
1709            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1710            addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1711            return dst;
1712         }
1713
1714         case Iop_CmpwNEZ32: {
1715            HReg dst = newVRegI(env);
1716            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1717            addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1718            addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1719            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1720            return dst;
1721         }
1722
1723         case Iop_Left32: {
1724            HReg dst = newVRegI(env);
1725            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1726            addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1727            addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1728            return dst;
1729         }
1730
1731//zz         case Iop_V128to32: {
1732//zz            HReg      dst  = newVRegI(env);
1733//zz            HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1734//zz            X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1735//zz            sub_from_esp(env, 16);
1736//zz            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1737//zz            addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1738//zz            add_to_esp(env, 16);
1739//zz            return dst;
1740//zz         }
1741//zz
1742         case Iop_ReinterpF32asI32: {
1743            HReg dst = newVRegI(env);
1744            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1745            addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1746            return dst;
1747         }
1748
1749//zz
1750//zz         case Iop_16to8:
1751         case Iop_32to8:
1752         case Iop_32to16:
1753            /* These are no-ops. */
1754            return iselIntExpr_R(env, e->Iex.Unop.arg);
1755
1756         default:
1757            break;
1758      }
1759
1760      /* All Unop cases involving host-side helper calls. */
1761      void* fn = NULL;
1762      switch (e->Iex.Unop.op) {
1763         case Iop_CmpNEZ16x2:
1764            fn = &h_generic_calc_CmpNEZ16x2; break;
1765         case Iop_CmpNEZ8x4:
1766            fn = &h_generic_calc_CmpNEZ8x4; break;
1767         default:
1768            break;
1769      }
1770
1771      if (fn) {
1772         HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1773         HReg res = newVRegI(env);
1774         addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1775         addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
1776                                      1, mk_RetLoc_simple(RLPri_Int) ));
1777         addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1778         return res;
1779      }
1780
1781      break;
1782   }
1783
1784   /* --------- GET --------- */
1785   case Iex_Get: {
1786      if (ty == Ity_I32
1787          && 0 == (e->Iex.Get.offset & 3)
1788          && e->Iex.Get.offset < 4096-4) {
1789         HReg dst = newVRegI(env);
1790         addInstr(env, ARMInstr_LdSt32(
1791                          ARMcc_AL, True/*isLoad*/,
1792                          dst,
1793                          ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1794         return dst;
1795      }
1796//zz      if (ty == Ity_I8 || ty == Ity_I16) {
1797//zz         HReg dst = newVRegI(env);
1798//zz         addInstr(env, X86Instr_LoadEX(
1799//zz                          toUChar(ty==Ity_I8 ? 1 : 2),
1800//zz                          False,
1801//zz                          X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1802//zz                          dst));
1803//zz         return dst;
1804//zz      }
1805      break;
1806   }
1807
1808//zz   case Iex_GetI: {
1809//zz      X86AMode* am
1810//zz         = genGuestArrayOffset(
1811//zz              env, e->Iex.GetI.descr,
1812//zz                   e->Iex.GetI.ix, e->Iex.GetI.bias );
1813//zz      HReg dst = newVRegI(env);
1814//zz      if (ty == Ity_I8) {
1815//zz         addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1816//zz         return dst;
1817//zz      }
1818//zz      if (ty == Ity_I32) {
1819//zz         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1820//zz         return dst;
1821//zz      }
1822//zz      break;
1823//zz   }
1824
1825   /* --------- CCALL --------- */
1826   case Iex_CCall: {
1827      HReg    dst = newVRegI(env);
1828      vassert(ty == e->Iex.CCall.retty);
1829
1830      /* be very restrictive for now.  Only 32/64-bit ints allowed for
1831         args, and 32 bits for return type.  Don't forget to change
1832         the RetLoc if more types are allowed in future. */
1833      if (e->Iex.CCall.retty != Ity_I32)
1834         goto irreducible;
1835
1836      /* Marshal args, do the call, clear stack. */
1837      UInt   addToSp = 0;
1838      RetLoc rloc    = mk_RetLoc_INVALID();
1839      Bool   ok      = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1840                                     e->Iex.CCall.cee, e->Iex.CCall.retty,
1841                                     e->Iex.CCall.args );
1842      /* */
1843      if (ok) {
1844         vassert(is_sane_RetLoc(rloc));
1845         vassert(rloc.pri == RLPri_Int);
1846         vassert(addToSp == 0);
1847         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1848         return dst;
1849      }
1850      /* else fall through; will hit the irreducible: label */
1851   }
1852
1853   /* --------- LITERAL --------- */
1854   /* 32 literals */
1855   case Iex_Const: {
1856      UInt u   = 0;
1857      HReg dst = newVRegI(env);
1858      switch (e->Iex.Const.con->tag) {
1859         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1860         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1861         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1862         default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
1863      }
1864      addInstr(env, ARMInstr_Imm32(dst, u));
1865      return dst;
1866   }
1867
1868   /* --------- MULTIPLEX --------- */
1869   case Iex_ITE: { // VFD
1870      /* ITE(ccexpr, iftrue, iffalse) */
1871      if (ty == Ity_I32) {
1872         ARMCondCode cc;
1873         HReg     r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1874         ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.ITE.iffalse);
1875         HReg     dst = newVRegI(env);
1876         addInstr(env, mk_iMOVds_RR(dst, r1));
1877         cc = iselCondCode(env, e->Iex.ITE.cond);
1878         addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1879         return dst;
1880      }
1881      break;
1882   }
1883
1884   default:
1885   break;
1886   } /* switch (e->tag) */
1887
1888   /* We get here if no pattern matched. */
1889  irreducible:
1890   ppIRExpr(e);
1891   vpanic("iselIntExpr_R: cannot reduce tree");
1892}
1893
1894
1895/* -------------------- 64-bit -------------------- */
1896
1897/* Compute a 64-bit value into a register pair, which is returned as
1898   the first two parameters.  As with iselIntExpr_R, these may be
1899   either real or virtual regs; in any case they must not be changed
1900   by subsequent code emitted by the caller.  */
1901
1902static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1903{
1904   iselInt64Expr_wrk(rHi, rLo, env, e);
1905#  if 0
1906   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1907#  endif
1908   vassert(hregClass(*rHi) == HRcInt32);
1909   vassert(hregIsVirtual(*rHi));
1910   vassert(hregClass(*rLo) == HRcInt32);
1911   vassert(hregIsVirtual(*rLo));
1912}
1913
1914/* DO NOT CALL THIS DIRECTLY ! */
1915static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1916{
1917   vassert(e);
1918   vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1919
1920   /* 64-bit literal */
1921   if (e->tag == Iex_Const) {
1922      ULong   w64 = e->Iex.Const.con->Ico.U64;
1923      UInt    wHi = toUInt(w64 >> 32);
1924      UInt    wLo = toUInt(w64);
1925      HReg    tHi = newVRegI(env);
1926      HReg    tLo = newVRegI(env);
1927      vassert(e->Iex.Const.con->tag == Ico_U64);
1928      addInstr(env, ARMInstr_Imm32(tHi, wHi));
1929      addInstr(env, ARMInstr_Imm32(tLo, wLo));
1930      *rHi = tHi;
1931      *rLo = tLo;
1932      return;
1933   }
1934
1935   /* read 64-bit IRTemp */
1936   if (e->tag == Iex_RdTmp) {
1937      if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1938         HReg tHi = newVRegI(env);
1939         HReg tLo = newVRegI(env);
1940         HReg tmp = iselNeon64Expr(env, e);
1941         addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1942         *rHi = tHi;
1943         *rLo = tLo;
1944      } else {
1945         lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1946      }
1947      return;
1948   }
1949
1950   /* 64-bit load */
1951   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1952      HReg      tLo, tHi, rA;
1953      vassert(e->Iex.Load.ty == Ity_I64);
1954      rA  = iselIntExpr_R(env, e->Iex.Load.addr);
1955      tHi = newVRegI(env);
1956      tLo = newVRegI(env);
1957      addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
1958                                    tHi, ARMAMode1_RI(rA, 4)));
1959      addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
1960                                    tLo, ARMAMode1_RI(rA, 0)));
1961      *rHi = tHi;
1962      *rLo = tLo;
1963      return;
1964   }
1965
1966   /* 64-bit GET */
1967   if (e->tag == Iex_Get) {
1968      ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1969      ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1970      HReg tHi = newVRegI(env);
1971      HReg tLo = newVRegI(env);
1972      addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
1973      addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
1974      *rHi = tHi;
1975      *rLo = tLo;
1976      return;
1977   }
1978
1979   /* --------- BINARY ops --------- */
1980   if (e->tag == Iex_Binop) {
1981      switch (e->Iex.Binop.op) {
1982
1983         /* 32 x 32 -> 64 multiply */
1984         case Iop_MullS32:
1985         case Iop_MullU32: {
1986            HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1987            HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1988            HReg     tHi  = newVRegI(env);
1989            HReg     tLo  = newVRegI(env);
1990            ARMMulOp mop  = e->Iex.Binop.op == Iop_MullS32
1991                               ? ARMmul_SX : ARMmul_ZX;
1992            addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1993            addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1994            addInstr(env, ARMInstr_Mul(mop));
1995            addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
1996            addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
1997            *rHi = tHi;
1998            *rLo = tLo;
1999            return;
2000         }
2001
2002         case Iop_Or64: {
2003            HReg xLo, xHi, yLo, yHi;
2004            HReg tHi = newVRegI(env);
2005            HReg tLo = newVRegI(env);
2006            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2007            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2008            addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2009            addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2010            *rHi = tHi;
2011            *rLo = tLo;
2012            return;
2013         }
2014
2015         case Iop_Add64: {
2016            HReg xLo, xHi, yLo, yHi;
2017            HReg tHi = newVRegI(env);
2018            HReg tLo = newVRegI(env);
2019            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2020            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2021            addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2022            addInstr(env, ARMInstr_Alu(ARMalu_ADC,  tHi, xHi, ARMRI84_R(yHi)));
2023            *rHi = tHi;
2024            *rLo = tLo;
2025            return;
2026         }
2027
2028         /* 32HLto64(e1,e2) */
2029         case Iop_32HLto64: {
2030            *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2031            *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2032            return;
2033         }
2034
2035         default:
2036            break;
2037      }
2038   }
2039
2040   /* --------- UNARY ops --------- */
2041   if (e->tag == Iex_Unop) {
2042      switch (e->Iex.Unop.op) {
2043
2044         /* ReinterpF64asI64 */
2045         case Iop_ReinterpF64asI64: {
2046            HReg dstHi = newVRegI(env);
2047            HReg dstLo = newVRegI(env);
2048            HReg src   = iselDblExpr(env, e->Iex.Unop.arg);
2049            addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2050            *rHi = dstHi;
2051            *rLo = dstLo;
2052            return;
2053         }
2054
2055         /* Left64(e) */
2056         case Iop_Left64: {
2057            HReg yLo, yHi;
2058            HReg tHi  = newVRegI(env);
2059            HReg tLo  = newVRegI(env);
2060            HReg zero = newVRegI(env);
2061            /* yHi:yLo = arg */
2062            iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2063            /* zero = 0 */
2064            addInstr(env, ARMInstr_Imm32(zero, 0));
2065            /* tLo = 0 - yLo, and set carry */
2066            addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2067                                       tLo, zero, ARMRI84_R(yLo)));
2068            /* tHi = 0 - yHi - carry */
2069            addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2070                                       tHi, zero, ARMRI84_R(yHi)));
2071            /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
2072               back in, so as to give the final result
2073               tHi:tLo = arg | -arg. */
2074            addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2075            addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2076            *rHi = tHi;
2077            *rLo = tLo;
2078            return;
2079         }
2080
2081         /* CmpwNEZ64(e) */
2082         case Iop_CmpwNEZ64: {
2083            HReg srcLo, srcHi;
2084            HReg tmp1 = newVRegI(env);
2085            HReg tmp2 = newVRegI(env);
2086            /* srcHi:srcLo = arg */
2087            iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2088            /* tmp1 = srcHi | srcLo */
2089            addInstr(env, ARMInstr_Alu(ARMalu_OR,
2090                                       tmp1, srcHi, ARMRI84_R(srcLo)));
2091            /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2092            addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2093            addInstr(env, ARMInstr_Alu(ARMalu_OR,
2094                                       tmp2, tmp2, ARMRI84_R(tmp1)));
2095            addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2096                                         tmp2, tmp2, ARMRI5_I5(31)));
2097            *rHi = tmp2;
2098            *rLo = tmp2;
2099            return;
2100         }
2101
2102         case Iop_1Sto64: {
2103            HReg        dst  = newVRegI(env);
2104            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2105            ARMRI5*     amt  = ARMRI5_I5(31);
2106            /* This is really rough.  We could do much better here;
2107               perhaps mvn{cond} dst, #0 as the second insn?
2108               (same applies to 1Sto32) */
2109            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2110            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2111            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2112            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2113            *rHi = dst;
2114            *rLo = dst;
2115            return;
2116         }
2117
2118         default:
2119            break;
2120      }
2121   } /* if (e->tag == Iex_Unop) */
2122
2123   /* --------- MULTIPLEX --------- */
2124   if (e->tag == Iex_ITE) { // VFD
2125      IRType tyC;
2126      HReg   r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2127      ARMCondCode cc;
2128      tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2129      vassert(tyC == Ity_I1);
2130      iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2131      iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2132      dstHi = newVRegI(env);
2133      dstLo = newVRegI(env);
2134      addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2135      addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2136      cc = iselCondCode(env, e->Iex.ITE.cond);
2137      addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2138      addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2139      *rHi = dstHi;
2140      *rLo = dstLo;
2141      return;
2142   }
2143
2144   /* It is convenient sometimes to call iselInt64Expr even when we
2145      have NEON support (e.g. in do_helper_call we need 64-bit
2146      arguments as 2 x 32 regs). */
2147   if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2148      HReg tHi = newVRegI(env);
2149      HReg tLo = newVRegI(env);
2150      HReg tmp = iselNeon64Expr(env, e);
2151      addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2152      *rHi = tHi;
2153      *rLo = tLo;
2154      return ;
2155   }
2156
2157   ppIRExpr(e);
2158   vpanic("iselInt64Expr");
2159}
2160
2161
2162/*---------------------------------------------------------*/
2163/*--- ISEL: Vector (NEON) expressions (64 or 128 bit)   ---*/
2164/*---------------------------------------------------------*/
2165
2166static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2167{
2168   HReg r = iselNeon64Expr_wrk( env, e );
2169   vassert(hregClass(r) == HRcFlt64);
2170   vassert(hregIsVirtual(r));
2171   return r;
2172}
2173
2174/* DO NOT CALL THIS DIRECTLY */
2175static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2176{
2177   IRType ty = typeOfIRExpr(env->type_env, e);
2178   MatchInfo mi;
2179   vassert(e);
2180   vassert(ty == Ity_I64);
2181
2182   if (e->tag == Iex_RdTmp) {
2183      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2184   }
2185
2186   if (e->tag == Iex_Const) {
2187      HReg rLo, rHi;
2188      HReg res = newVRegD(env);
2189      iselInt64Expr(&rHi, &rLo, env, e);
2190      addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2191      return res;
2192   }
2193
2194   /* 64-bit load */
2195   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2196      HReg res = newVRegD(env);
2197      ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2198      vassert(ty == Ity_I64);
2199      addInstr(env, ARMInstr_NLdStD(True, res, am));
2200      return res;
2201   }
2202
2203   /* 64-bit GET */
2204   if (e->tag == Iex_Get) {
2205      HReg addr = newVRegI(env);
2206      HReg res = newVRegD(env);
2207      vassert(ty == Ity_I64);
2208      addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2209      addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2210      return res;
2211   }
2212
2213   /* --------- BINARY ops --------- */
2214   if (e->tag == Iex_Binop) {
2215      switch (e->Iex.Binop.op) {
2216
2217         /* 32 x 32 -> 64 multiply */
2218         case Iop_MullS32:
2219         case Iop_MullU32: {
2220            HReg rLo, rHi;
2221            HReg res = newVRegD(env);
2222            iselInt64Expr(&rHi, &rLo, env, e);
2223            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2224            return res;
2225         }
2226
2227         case Iop_And64: {
2228            HReg res = newVRegD(env);
2229            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2230            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2231            addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2232                                           res, argL, argR, 4, False));
2233            return res;
2234         }
2235         case Iop_Or64: {
2236            HReg res = newVRegD(env);
2237            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2238            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2239            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2240                                           res, argL, argR, 4, False));
2241            return res;
2242         }
2243         case Iop_Xor64: {
2244            HReg res = newVRegD(env);
2245            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2246            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2247            addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2248                                           res, argL, argR, 4, False));
2249            return res;
2250         }
2251
2252         /* 32HLto64(e1,e2) */
2253         case Iop_32HLto64: {
2254            HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2255            HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2256            HReg res = newVRegD(env);
2257            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2258            return res;
2259         }
2260
2261         case Iop_Add8x8:
2262         case Iop_Add16x4:
2263         case Iop_Add32x2:
2264         case Iop_Add64: {
2265            HReg res = newVRegD(env);
2266            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2267            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2268            UInt size;
2269            switch (e->Iex.Binop.op) {
2270               case Iop_Add8x8: size = 0; break;
2271               case Iop_Add16x4: size = 1; break;
2272               case Iop_Add32x2: size = 2; break;
2273               case Iop_Add64: size = 3; break;
2274               default: vassert(0);
2275            }
2276            addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2277                                           res, argL, argR, size, False));
2278            return res;
2279         }
2280         case Iop_Add32Fx2: {
2281            HReg res = newVRegD(env);
2282            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2283            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2284            UInt size = 0;
2285            addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2286                                           res, argL, argR, size, False));
2287            return res;
2288         }
2289         case Iop_Recps32Fx2: {
2290            HReg res = newVRegD(env);
2291            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2292            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2293            UInt size = 0;
2294            addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2295                                           res, argL, argR, size, False));
2296            return res;
2297         }
2298         case Iop_Rsqrts32Fx2: {
2299            HReg res = newVRegD(env);
2300            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2301            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2302            UInt size = 0;
2303            addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2304                                           res, argL, argR, size, False));
2305            return res;
2306         }
2307
2308         // These 6 verified 18 Apr 2013
2309         case Iop_InterleaveHI32x2:
2310         case Iop_InterleaveLO32x2:
2311         case Iop_InterleaveOddLanes8x8:
2312         case Iop_InterleaveEvenLanes8x8:
2313         case Iop_InterleaveOddLanes16x4:
2314         case Iop_InterleaveEvenLanes16x4: {
2315            HReg rD   = newVRegD(env);
2316            HReg rM   = newVRegD(env);
2317            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2318            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2319            UInt size;
2320            Bool resRd;  // is the result in rD or rM ?
2321            switch (e->Iex.Binop.op) {
2322               case Iop_InterleaveOddLanes8x8:   resRd = False; size = 0; break;
2323               case Iop_InterleaveEvenLanes8x8:  resRd = True;  size = 0; break;
2324               case Iop_InterleaveOddLanes16x4:  resRd = False; size = 1; break;
2325               case Iop_InterleaveEvenLanes16x4: resRd = True;  size = 1; break;
2326               case Iop_InterleaveHI32x2:        resRd = False; size = 2; break;
2327               case Iop_InterleaveLO32x2:        resRd = True;  size = 2; break;
2328               default: vassert(0);
2329            }
2330            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2331            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2332            addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2333            return resRd ? rD : rM;
2334         }
2335
2336         // These 4 verified 18 Apr 2013
2337         case Iop_InterleaveHI8x8:
2338         case Iop_InterleaveLO8x8:
2339         case Iop_InterleaveHI16x4:
2340         case Iop_InterleaveLO16x4: {
2341            HReg rD   = newVRegD(env);
2342            HReg rM   = newVRegD(env);
2343            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2344            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2345            UInt size;
2346            Bool resRd;  // is the result in rD or rM ?
2347            switch (e->Iex.Binop.op) {
2348               case Iop_InterleaveHI8x8:  resRd = False; size = 0; break;
2349               case Iop_InterleaveLO8x8:  resRd = True;  size = 0; break;
2350               case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2351               case Iop_InterleaveLO16x4: resRd = True;  size = 1; break;
2352               default: vassert(0);
2353            }
2354            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2355            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2356            addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2357            return resRd ? rD : rM;
2358         }
2359
2360         // These 4 verified 18 Apr 2013
2361         case Iop_CatOddLanes8x8:
2362         case Iop_CatEvenLanes8x8:
2363         case Iop_CatOddLanes16x4:
2364         case Iop_CatEvenLanes16x4: {
2365            HReg rD   = newVRegD(env);
2366            HReg rM   = newVRegD(env);
2367            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2368            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2369            UInt size;
2370            Bool resRd;  // is the result in rD or rM ?
2371            switch (e->Iex.Binop.op) {
2372               case Iop_CatOddLanes8x8:   resRd = False; size = 0; break;
2373               case Iop_CatEvenLanes8x8:  resRd = True;  size = 0; break;
2374               case Iop_CatOddLanes16x4:  resRd = False; size = 1; break;
2375               case Iop_CatEvenLanes16x4: resRd = True;  size = 1; break;
2376               default: vassert(0);
2377            }
2378            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2379            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2380            addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2381            return resRd ? rD : rM;
2382         }
2383
2384         case Iop_QAdd8Ux8:
2385         case Iop_QAdd16Ux4:
2386         case Iop_QAdd32Ux2:
2387         case Iop_QAdd64Ux1: {
2388            HReg res = newVRegD(env);
2389            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2390            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2391            UInt size;
2392            switch (e->Iex.Binop.op) {
2393               case Iop_QAdd8Ux8: size = 0; break;
2394               case Iop_QAdd16Ux4: size = 1; break;
2395               case Iop_QAdd32Ux2: size = 2; break;
2396               case Iop_QAdd64Ux1: size = 3; break;
2397               default: vassert(0);
2398            }
2399            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2400                                           res, argL, argR, size, False));
2401            return res;
2402         }
2403         case Iop_QAdd8Sx8:
2404         case Iop_QAdd16Sx4:
2405         case Iop_QAdd32Sx2:
2406         case Iop_QAdd64Sx1: {
2407            HReg res = newVRegD(env);
2408            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2409            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2410            UInt size;
2411            switch (e->Iex.Binop.op) {
2412               case Iop_QAdd8Sx8: size = 0; break;
2413               case Iop_QAdd16Sx4: size = 1; break;
2414               case Iop_QAdd32Sx2: size = 2; break;
2415               case Iop_QAdd64Sx1: size = 3; break;
2416               default: vassert(0);
2417            }
2418            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2419                                           res, argL, argR, size, False));
2420            return res;
2421         }
2422         case Iop_Sub8x8:
2423         case Iop_Sub16x4:
2424         case Iop_Sub32x2:
2425         case Iop_Sub64: {
2426            HReg res = newVRegD(env);
2427            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2428            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2429            UInt size;
2430            switch (e->Iex.Binop.op) {
2431               case Iop_Sub8x8: size = 0; break;
2432               case Iop_Sub16x4: size = 1; break;
2433               case Iop_Sub32x2: size = 2; break;
2434               case Iop_Sub64: size = 3; break;
2435               default: vassert(0);
2436            }
2437            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2438                                           res, argL, argR, size, False));
2439            return res;
2440         }
2441         case Iop_Sub32Fx2: {
2442            HReg res = newVRegD(env);
2443            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2444            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2445            UInt size = 0;
2446            addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2447                                           res, argL, argR, size, False));
2448            return res;
2449         }
2450         case Iop_QSub8Ux8:
2451         case Iop_QSub16Ux4:
2452         case Iop_QSub32Ux2:
2453         case Iop_QSub64Ux1: {
2454            HReg res = newVRegD(env);
2455            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2456            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2457            UInt size;
2458            switch (e->Iex.Binop.op) {
2459               case Iop_QSub8Ux8: size = 0; break;
2460               case Iop_QSub16Ux4: size = 1; break;
2461               case Iop_QSub32Ux2: size = 2; break;
2462               case Iop_QSub64Ux1: size = 3; break;
2463               default: vassert(0);
2464            }
2465            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2466                                           res, argL, argR, size, False));
2467            return res;
2468         }
2469         case Iop_QSub8Sx8:
2470         case Iop_QSub16Sx4:
2471         case Iop_QSub32Sx2:
2472         case Iop_QSub64Sx1: {
2473            HReg res = newVRegD(env);
2474            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2475            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2476            UInt size;
2477            switch (e->Iex.Binop.op) {
2478               case Iop_QSub8Sx8: size = 0; break;
2479               case Iop_QSub16Sx4: size = 1; break;
2480               case Iop_QSub32Sx2: size = 2; break;
2481               case Iop_QSub64Sx1: size = 3; break;
2482               default: vassert(0);
2483            }
2484            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2485                                           res, argL, argR, size, False));
2486            return res;
2487         }
2488         case Iop_Max8Ux8:
2489         case Iop_Max16Ux4:
2490         case Iop_Max32Ux2: {
2491            HReg res = newVRegD(env);
2492            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2493            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2494            UInt size;
2495            switch (e->Iex.Binop.op) {
2496               case Iop_Max8Ux8: size = 0; break;
2497               case Iop_Max16Ux4: size = 1; break;
2498               case Iop_Max32Ux2: size = 2; break;
2499               default: vassert(0);
2500            }
2501            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2502                                           res, argL, argR, size, False));
2503            return res;
2504         }
2505         case Iop_Max8Sx8:
2506         case Iop_Max16Sx4:
2507         case Iop_Max32Sx2: {
2508            HReg res = newVRegD(env);
2509            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2510            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2511            UInt size;
2512            switch (e->Iex.Binop.op) {
2513               case Iop_Max8Sx8: size = 0; break;
2514               case Iop_Max16Sx4: size = 1; break;
2515               case Iop_Max32Sx2: size = 2; break;
2516               default: vassert(0);
2517            }
2518            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2519                                           res, argL, argR, size, False));
2520            return res;
2521         }
2522         case Iop_Min8Ux8:
2523         case Iop_Min16Ux4:
2524         case Iop_Min32Ux2: {
2525            HReg res = newVRegD(env);
2526            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2527            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2528            UInt size;
2529            switch (e->Iex.Binop.op) {
2530               case Iop_Min8Ux8: size = 0; break;
2531               case Iop_Min16Ux4: size = 1; break;
2532               case Iop_Min32Ux2: size = 2; break;
2533               default: vassert(0);
2534            }
2535            addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2536                                           res, argL, argR, size, False));
2537            return res;
2538         }
2539         case Iop_Min8Sx8:
2540         case Iop_Min16Sx4:
2541         case Iop_Min32Sx2: {
2542            HReg res = newVRegD(env);
2543            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2544            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2545            UInt size;
2546            switch (e->Iex.Binop.op) {
2547               case Iop_Min8Sx8: size = 0; break;
2548               case Iop_Min16Sx4: size = 1; break;
2549               case Iop_Min32Sx2: size = 2; break;
2550               default: vassert(0);
2551            }
2552            addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2553                                           res, argL, argR, size, False));
2554            return res;
2555         }
2556         case Iop_Sar8x8:
2557         case Iop_Sar16x4:
2558         case Iop_Sar32x2: {
2559            HReg res = newVRegD(env);
2560            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2561            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2562            HReg argR2 = newVRegD(env);
2563            HReg zero = newVRegD(env);
2564            UInt size;
2565            switch (e->Iex.Binop.op) {
2566               case Iop_Sar8x8: size = 0; break;
2567               case Iop_Sar16x4: size = 1; break;
2568               case Iop_Sar32x2: size = 2; break;
2569               case Iop_Sar64: size = 3; break;
2570               default: vassert(0);
2571            }
2572            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2573            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2574                                           argR2, zero, argR, size, False));
2575            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2576                                          res, argL, argR2, size, False));
2577            return res;
2578         }
2579         case Iop_Sal8x8:
2580         case Iop_Sal16x4:
2581         case Iop_Sal32x2:
2582         case Iop_Sal64x1: {
2583            HReg res = newVRegD(env);
2584            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2585            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2586            UInt size;
2587            switch (e->Iex.Binop.op) {
2588               case Iop_Sal8x8: size = 0; break;
2589               case Iop_Sal16x4: size = 1; break;
2590               case Iop_Sal32x2: size = 2; break;
2591               case Iop_Sal64x1: size = 3; break;
2592               default: vassert(0);
2593            }
2594            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2595                                          res, argL, argR, size, False));
2596            return res;
2597         }
2598         case Iop_Shr8x8:
2599         case Iop_Shr16x4:
2600         case Iop_Shr32x2: {
2601            HReg res = newVRegD(env);
2602            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2603            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2604            HReg argR2 = newVRegD(env);
2605            HReg zero = newVRegD(env);
2606            UInt size;
2607            switch (e->Iex.Binop.op) {
2608               case Iop_Shr8x8: size = 0; break;
2609               case Iop_Shr16x4: size = 1; break;
2610               case Iop_Shr32x2: size = 2; break;
2611               default: vassert(0);
2612            }
2613            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2614            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2615                                           argR2, zero, argR, size, False));
2616            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2617                                          res, argL, argR2, size, False));
2618            return res;
2619         }
2620         case Iop_Shl8x8:
2621         case Iop_Shl16x4:
2622         case Iop_Shl32x2: {
2623            HReg res = newVRegD(env);
2624            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2625            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2626            UInt size;
2627            switch (e->Iex.Binop.op) {
2628               case Iop_Shl8x8: size = 0; break;
2629               case Iop_Shl16x4: size = 1; break;
2630               case Iop_Shl32x2: size = 2; break;
2631               default: vassert(0);
2632            }
2633            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2634                                          res, argL, argR, size, False));
2635            return res;
2636         }
2637         case Iop_QShl8x8:
2638         case Iop_QShl16x4:
2639         case Iop_QShl32x2:
2640         case Iop_QShl64x1: {
2641            HReg res = newVRegD(env);
2642            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2643            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2644            UInt size;
2645            switch (e->Iex.Binop.op) {
2646               case Iop_QShl8x8: size = 0; break;
2647               case Iop_QShl16x4: size = 1; break;
2648               case Iop_QShl32x2: size = 2; break;
2649               case Iop_QShl64x1: size = 3; break;
2650               default: vassert(0);
2651            }
2652            addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2653                                          res, argL, argR, size, False));
2654            return res;
2655         }
2656         case Iop_QSal8x8:
2657         case Iop_QSal16x4:
2658         case Iop_QSal32x2:
2659         case Iop_QSal64x1: {
2660            HReg res = newVRegD(env);
2661            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2662            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2663            UInt size;
2664            switch (e->Iex.Binop.op) {
2665               case Iop_QSal8x8: size = 0; break;
2666               case Iop_QSal16x4: size = 1; break;
2667               case Iop_QSal32x2: size = 2; break;
2668               case Iop_QSal64x1: size = 3; break;
2669               default: vassert(0);
2670            }
2671            addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2672                                          res, argL, argR, size, False));
2673            return res;
2674         }
2675         case Iop_QShlN8x8:
2676         case Iop_QShlN16x4:
2677         case Iop_QShlN32x2:
2678         case Iop_QShlN64x1: {
2679            HReg res = newVRegD(env);
2680            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2681            UInt size, imm;
2682            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2683                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2684               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2685                      "second argument only\n");
2686            }
2687            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2688            switch (e->Iex.Binop.op) {
2689               case Iop_QShlN8x8: size = 8 | imm; break;
2690               case Iop_QShlN16x4: size = 16 | imm; break;
2691               case Iop_QShlN32x2: size = 32 | imm; break;
2692               case Iop_QShlN64x1: size = 64 | imm; break;
2693               default: vassert(0);
2694            }
2695            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2696                                          res, argL, size, False));
2697            return res;
2698         }
2699         case Iop_QShlN8Sx8:
2700         case Iop_QShlN16Sx4:
2701         case Iop_QShlN32Sx2:
2702         case Iop_QShlN64Sx1: {
2703            HReg res = newVRegD(env);
2704            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2705            UInt size, imm;
2706            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2707                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2708               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2709                      "second argument only\n");
2710            }
2711            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2712            switch (e->Iex.Binop.op) {
2713               case Iop_QShlN8Sx8: size = 8 | imm; break;
2714               case Iop_QShlN16Sx4: size = 16 | imm; break;
2715               case Iop_QShlN32Sx2: size = 32 | imm; break;
2716               case Iop_QShlN64Sx1: size = 64 | imm; break;
2717               default: vassert(0);
2718            }
2719            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2720                                          res, argL, size, False));
2721            return res;
2722         }
2723         case Iop_QSalN8x8:
2724         case Iop_QSalN16x4:
2725         case Iop_QSalN32x2:
2726         case Iop_QSalN64x1: {
2727            HReg res = newVRegD(env);
2728            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2729            UInt size, imm;
2730            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2731                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2732               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2733                      "second argument only\n");
2734            }
2735            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2736            switch (e->Iex.Binop.op) {
2737               case Iop_QSalN8x8: size = 8 | imm; break;
2738               case Iop_QSalN16x4: size = 16 | imm; break;
2739               case Iop_QSalN32x2: size = 32 | imm; break;
2740               case Iop_QSalN64x1: size = 64 | imm; break;
2741               default: vassert(0);
2742            }
2743            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2744                                          res, argL, size, False));
2745            return res;
2746         }
2747         case Iop_ShrN8x8:
2748         case Iop_ShrN16x4:
2749         case Iop_ShrN32x2:
2750         case Iop_Shr64: {
2751            HReg res = newVRegD(env);
2752            HReg tmp = newVRegD(env);
2753            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2754            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2755            HReg argR2 = newVRegI(env);
2756            UInt size;
2757            switch (e->Iex.Binop.op) {
2758               case Iop_ShrN8x8: size = 0; break;
2759               case Iop_ShrN16x4: size = 1; break;
2760               case Iop_ShrN32x2: size = 2; break;
2761               case Iop_Shr64: size = 3; break;
2762               default: vassert(0);
2763            }
2764            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2765            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2766            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2767                                          res, argL, tmp, size, False));
2768            return res;
2769         }
2770         case Iop_ShlN8x8:
2771         case Iop_ShlN16x4:
2772         case Iop_ShlN32x2:
2773         case Iop_Shl64: {
2774            HReg res = newVRegD(env);
2775            HReg tmp = newVRegD(env);
2776            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2777            /* special-case Shl64(x, imm8) since the Neon front
2778               end produces a lot of those for V{LD,ST}{1,2,3,4}. */
2779            if (e->Iex.Binop.op == Iop_Shl64
2780                && e->Iex.Binop.arg2->tag == Iex_Const) {
2781               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
2782               Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2783               if (nshift >= 1 && nshift <= 63) {
2784                  addInstr(env, ARMInstr_NShl64(res, argL, nshift));
2785                  return res;
2786               }
2787               /* else fall through to general case */
2788            }
2789            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2790            UInt size;
2791            switch (e->Iex.Binop.op) {
2792               case Iop_ShlN8x8:  size = 0; break;
2793               case Iop_ShlN16x4: size = 1; break;
2794               case Iop_ShlN32x2: size = 2; break;
2795               case Iop_Shl64:    size = 3; break;
2796               default: vassert(0);
2797            }
2798            addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
2799                                          tmp, argR, 0, False));
2800            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2801                                          res, argL, tmp, size, False));
2802            return res;
2803         }
2804         case Iop_SarN8x8:
2805         case Iop_SarN16x4:
2806         case Iop_SarN32x2:
2807         case Iop_Sar64: {
2808            HReg res = newVRegD(env);
2809            HReg tmp = newVRegD(env);
2810            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2811            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2812            HReg argR2 = newVRegI(env);
2813            UInt size;
2814            switch (e->Iex.Binop.op) {
2815               case Iop_SarN8x8: size = 0; break;
2816               case Iop_SarN16x4: size = 1; break;
2817               case Iop_SarN32x2: size = 2; break;
2818               case Iop_Sar64: size = 3; break;
2819               default: vassert(0);
2820            }
2821            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2822            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2823            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2824                                          res, argL, tmp, size, False));
2825            return res;
2826         }
2827         case Iop_CmpGT8Ux8:
2828         case Iop_CmpGT16Ux4:
2829         case Iop_CmpGT32Ux2: {
2830            HReg res = newVRegD(env);
2831            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2832            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2833            UInt size;
2834            switch (e->Iex.Binop.op) {
2835               case Iop_CmpGT8Ux8: size = 0; break;
2836               case Iop_CmpGT16Ux4: size = 1; break;
2837               case Iop_CmpGT32Ux2: size = 2; break;
2838               default: vassert(0);
2839            }
2840            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2841                                           res, argL, argR, size, False));
2842            return res;
2843         }
2844         case Iop_CmpGT8Sx8:
2845         case Iop_CmpGT16Sx4:
2846         case Iop_CmpGT32Sx2: {
2847            HReg res = newVRegD(env);
2848            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2849            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2850            UInt size;
2851            switch (e->Iex.Binop.op) {
2852               case Iop_CmpGT8Sx8: size = 0; break;
2853               case Iop_CmpGT16Sx4: size = 1; break;
2854               case Iop_CmpGT32Sx2: size = 2; break;
2855               default: vassert(0);
2856            }
2857            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2858                                           res, argL, argR, size, False));
2859            return res;
2860         }
2861         case Iop_CmpEQ8x8:
2862         case Iop_CmpEQ16x4:
2863         case Iop_CmpEQ32x2: {
2864            HReg res = newVRegD(env);
2865            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2866            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2867            UInt size;
2868            switch (e->Iex.Binop.op) {
2869               case Iop_CmpEQ8x8: size = 0; break;
2870               case Iop_CmpEQ16x4: size = 1; break;
2871               case Iop_CmpEQ32x2: size = 2; break;
2872               default: vassert(0);
2873            }
2874            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2875                                           res, argL, argR, size, False));
2876            return res;
2877         }
2878         case Iop_Mul8x8:
2879         case Iop_Mul16x4:
2880         case Iop_Mul32x2: {
2881            HReg res = newVRegD(env);
2882            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2883            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2884            UInt size = 0;
2885            switch(e->Iex.Binop.op) {
2886               case Iop_Mul8x8: size = 0; break;
2887               case Iop_Mul16x4: size = 1; break;
2888               case Iop_Mul32x2: size = 2; break;
2889               default: vassert(0);
2890            }
2891            addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2892                                           res, argL, argR, size, False));
2893            return res;
2894         }
2895         case Iop_Mul32Fx2: {
2896            HReg res = newVRegD(env);
2897            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2898            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2899            UInt size = 0;
2900            addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2901                                           res, argL, argR, size, False));
2902            return res;
2903         }
2904         case Iop_QDMulHi16Sx4:
2905         case Iop_QDMulHi32Sx2: {
2906            HReg res = newVRegD(env);
2907            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2908            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2909            UInt size = 0;
2910            switch(e->Iex.Binop.op) {
2911               case Iop_QDMulHi16Sx4: size = 1; break;
2912               case Iop_QDMulHi32Sx2: size = 2; break;
2913               default: vassert(0);
2914            }
2915            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2916                                           res, argL, argR, size, False));
2917            return res;
2918         }
2919
2920         case Iop_QRDMulHi16Sx4:
2921         case Iop_QRDMulHi32Sx2: {
2922            HReg res = newVRegD(env);
2923            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2924            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2925            UInt size = 0;
2926            switch(e->Iex.Binop.op) {
2927               case Iop_QRDMulHi16Sx4: size = 1; break;
2928               case Iop_QRDMulHi32Sx2: size = 2; break;
2929               default: vassert(0);
2930            }
2931            addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2932                                           res, argL, argR, size, False));
2933            return res;
2934         }
2935
2936         case Iop_PwAdd8x8:
2937         case Iop_PwAdd16x4:
2938         case Iop_PwAdd32x2: {
2939            HReg res = newVRegD(env);
2940            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2941            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2942            UInt size = 0;
2943            switch(e->Iex.Binop.op) {
2944               case Iop_PwAdd8x8: size = 0; break;
2945               case Iop_PwAdd16x4: size = 1; break;
2946               case Iop_PwAdd32x2: size = 2; break;
2947               default: vassert(0);
2948            }
2949            addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2950                                           res, argL, argR, size, False));
2951            return res;
2952         }
2953         case Iop_PwAdd32Fx2: {
2954            HReg res = newVRegD(env);
2955            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2956            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2957            UInt size = 0;
2958            addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2959                                           res, argL, argR, size, False));
2960            return res;
2961         }
2962         case Iop_PwMin8Ux8:
2963         case Iop_PwMin16Ux4:
2964         case Iop_PwMin32Ux2: {
2965            HReg res = newVRegD(env);
2966            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2967            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2968            UInt size = 0;
2969            switch(e->Iex.Binop.op) {
2970               case Iop_PwMin8Ux8: size = 0; break;
2971               case Iop_PwMin16Ux4: size = 1; break;
2972               case Iop_PwMin32Ux2: size = 2; break;
2973               default: vassert(0);
2974            }
2975            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2976                                           res, argL, argR, size, False));
2977            return res;
2978         }
2979         case Iop_PwMin8Sx8:
2980         case Iop_PwMin16Sx4:
2981         case Iop_PwMin32Sx2: {
2982            HReg res = newVRegD(env);
2983            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2984            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2985            UInt size = 0;
2986            switch(e->Iex.Binop.op) {
2987               case Iop_PwMin8Sx8: size = 0; break;
2988               case Iop_PwMin16Sx4: size = 1; break;
2989               case Iop_PwMin32Sx2: size = 2; break;
2990               default: vassert(0);
2991            }
2992            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
2993                                           res, argL, argR, size, False));
2994            return res;
2995         }
2996         case Iop_PwMax8Ux8:
2997         case Iop_PwMax16Ux4:
2998         case Iop_PwMax32Ux2: {
2999            HReg res = newVRegD(env);
3000            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3001            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3002            UInt size = 0;
3003            switch(e->Iex.Binop.op) {
3004               case Iop_PwMax8Ux8: size = 0; break;
3005               case Iop_PwMax16Ux4: size = 1; break;
3006               case Iop_PwMax32Ux2: size = 2; break;
3007               default: vassert(0);
3008            }
3009            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3010                                           res, argL, argR, size, False));
3011            return res;
3012         }
3013         case Iop_PwMax8Sx8:
3014         case Iop_PwMax16Sx4:
3015         case Iop_PwMax32Sx2: {
3016            HReg res = newVRegD(env);
3017            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3018            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3019            UInt size = 0;
3020            switch(e->Iex.Binop.op) {
3021               case Iop_PwMax8Sx8: size = 0; break;
3022               case Iop_PwMax16Sx4: size = 1; break;
3023               case Iop_PwMax32Sx2: size = 2; break;
3024               default: vassert(0);
3025            }
3026            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3027                                           res, argL, argR, size, False));
3028            return res;
3029         }
3030         case Iop_Perm8x8: {
3031            HReg res = newVRegD(env);
3032            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3033            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3034            addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3035                                           res, argL, argR, 0, False));
3036            return res;
3037         }
3038         case Iop_PolynomialMul8x8: {
3039            HReg res = newVRegD(env);
3040            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3041            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3042            UInt size = 0;
3043            addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3044                                           res, argL, argR, size, False));
3045            return res;
3046         }
3047         case Iop_Max32Fx2: {
3048            HReg res = newVRegD(env);
3049            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3050            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3051            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3052                                           res, argL, argR, 2, False));
3053            return res;
3054         }
3055         case Iop_Min32Fx2: {
3056            HReg res = newVRegD(env);
3057            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3058            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3059            addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3060                                           res, argL, argR, 2, False));
3061            return res;
3062         }
3063         case Iop_PwMax32Fx2: {
3064            HReg res = newVRegD(env);
3065            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3066            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3067            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3068                                           res, argL, argR, 2, False));
3069            return res;
3070         }
3071         case Iop_PwMin32Fx2: {
3072            HReg res = newVRegD(env);
3073            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3074            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3075            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3076                                           res, argL, argR, 2, False));
3077            return res;
3078         }
3079         case Iop_CmpGT32Fx2: {
3080            HReg res = newVRegD(env);
3081            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3082            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3083            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3084                                           res, argL, argR, 2, False));
3085            return res;
3086         }
3087         case Iop_CmpGE32Fx2: {
3088            HReg res = newVRegD(env);
3089            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3090            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3091            addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3092                                           res, argL, argR, 2, False));
3093            return res;
3094         }
3095         case Iop_CmpEQ32Fx2: {
3096            HReg res = newVRegD(env);
3097            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3098            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3099            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3100                                           res, argL, argR, 2, False));
3101            return res;
3102         }
3103         case Iop_F32ToFixed32Ux2_RZ:
3104         case Iop_F32ToFixed32Sx2_RZ:
3105         case Iop_Fixed32UToF32x2_RN:
3106         case Iop_Fixed32SToF32x2_RN: {
3107            HReg res = newVRegD(env);
3108            HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3109            ARMNeonUnOp op;
3110            UInt imm6;
3111            if (e->Iex.Binop.arg2->tag != Iex_Const ||
3112               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3113                  vpanic("ARM supports FP <-> Fixed conversion with constant "
3114                         "second argument less than 33 only\n");
3115            }
3116            imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3117            vassert(imm6 <= 32 && imm6 > 0);
3118            imm6 = 64 - imm6;
3119            switch(e->Iex.Binop.op) {
3120               case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3121               case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3122               case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3123               case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3124               default: vassert(0);
3125            }
3126            addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3127            return res;
3128         }
3129         /*
3130         FIXME: is this here or not?
3131         case Iop_VDup8x8:
3132         case Iop_VDup16x4:
3133         case Iop_VDup32x2: {
3134            HReg res = newVRegD(env);
3135            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3136            UInt index;
3137            UInt imm4;
3138            UInt size = 0;
3139            if (e->Iex.Binop.arg2->tag != Iex_Const ||
3140               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3141                  vpanic("ARM supports Iop_VDup with constant "
3142                         "second argument less than 16 only\n");
3143            }
3144            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3145            switch(e->Iex.Binop.op) {
3146               case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3147               case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3148               case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3149               default: vassert(0);
3150            }
3151            if (imm4 >= 16) {
3152               vpanic("ARM supports Iop_VDup with constant "
3153                      "second argument less than 16 only\n");
3154            }
3155            addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3156                                          res, argL, imm4, False));
3157            return res;
3158         }
3159         */
3160         default:
3161            break;
3162      }
3163   }
3164
3165   /* --------- UNARY ops --------- */
3166   if (e->tag == Iex_Unop) {
3167      switch (e->Iex.Unop.op) {
3168
3169         /* 32Uto64 */
3170         case Iop_32Uto64: {
3171            HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3172            HReg rHi = newVRegI(env);
3173            HReg res = newVRegD(env);
3174            addInstr(env, ARMInstr_Imm32(rHi, 0));
3175            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3176            return res;
3177         }
3178
3179         /* 32Sto64 */
3180         case Iop_32Sto64: {
3181            HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3182            HReg rHi = newVRegI(env);
3183            addInstr(env, mk_iMOVds_RR(rHi, rLo));
3184            addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3185            HReg res = newVRegD(env);
3186            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3187            return res;
3188         }
3189
3190         /* The next 3 are pass-throughs */
3191         /* ReinterpF64asI64 */
3192         case Iop_ReinterpF64asI64:
3193         /* Left64(e) */
3194         case Iop_Left64:
3195         /* CmpwNEZ64(e) */
3196         case Iop_1Sto64: {
3197            HReg rLo, rHi;
3198            HReg res = newVRegD(env);
3199            iselInt64Expr(&rHi, &rLo, env, e);
3200            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3201            return res;
3202         }
3203
3204         case Iop_Not64: {
3205            DECLARE_PATTERN(p_veqz_8x8);
3206            DECLARE_PATTERN(p_veqz_16x4);
3207            DECLARE_PATTERN(p_veqz_32x2);
3208            DECLARE_PATTERN(p_vcge_8sx8);
3209            DECLARE_PATTERN(p_vcge_16sx4);
3210            DECLARE_PATTERN(p_vcge_32sx2);
3211            DECLARE_PATTERN(p_vcge_8ux8);
3212            DECLARE_PATTERN(p_vcge_16ux4);
3213            DECLARE_PATTERN(p_vcge_32ux2);
3214            DEFINE_PATTERN(p_veqz_8x8,
3215                  unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3216            DEFINE_PATTERN(p_veqz_16x4,
3217                  unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3218            DEFINE_PATTERN(p_veqz_32x2,
3219                  unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3220            DEFINE_PATTERN(p_vcge_8sx8,
3221                  unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3222            DEFINE_PATTERN(p_vcge_16sx4,
3223                  unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3224            DEFINE_PATTERN(p_vcge_32sx2,
3225                  unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3226            DEFINE_PATTERN(p_vcge_8ux8,
3227                  unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3228            DEFINE_PATTERN(p_vcge_16ux4,
3229                  unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3230            DEFINE_PATTERN(p_vcge_32ux2,
3231                  unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3232            if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3233               HReg res = newVRegD(env);
3234               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3235               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3236               return res;
3237            } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3238               HReg res = newVRegD(env);
3239               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3240               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3241               return res;
3242            } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3243               HReg res = newVRegD(env);
3244               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3245               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3246               return res;
3247            } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3248               HReg res = newVRegD(env);
3249               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3250               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3251               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3252                                              res, argL, argR, 0, False));
3253               return res;
3254            } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3255               HReg res = newVRegD(env);
3256               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3257               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3258               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3259                                              res, argL, argR, 1, False));
3260               return res;
3261            } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3262               HReg res = newVRegD(env);
3263               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3264               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3265               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3266                                              res, argL, argR, 2, False));
3267               return res;
3268            } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3269               HReg res = newVRegD(env);
3270               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3271               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3272               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3273                                              res, argL, argR, 0, False));
3274               return res;
3275            } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3276               HReg res = newVRegD(env);
3277               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3278               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3279               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3280                                              res, argL, argR, 1, False));
3281               return res;
3282            } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3283               HReg res = newVRegD(env);
3284               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3285               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3286               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3287                                              res, argL, argR, 2, False));
3288               return res;
3289            } else {
3290               HReg res = newVRegD(env);
3291               HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3292               addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3293               return res;
3294            }
3295         }
3296         case Iop_Dup8x8:
3297         case Iop_Dup16x4:
3298         case Iop_Dup32x2: {
3299            HReg res, arg;
3300            UInt size;
3301            DECLARE_PATTERN(p_vdup_8x8);
3302            DECLARE_PATTERN(p_vdup_16x4);
3303            DECLARE_PATTERN(p_vdup_32x2);
3304            DEFINE_PATTERN(p_vdup_8x8,
3305                  unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3306            DEFINE_PATTERN(p_vdup_16x4,
3307                  unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3308            DEFINE_PATTERN(p_vdup_32x2,
3309                  unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3310            if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3311               UInt index;
3312               UInt imm4;
3313               if (mi.bindee[1]->tag == Iex_Const &&
3314                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3315                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3316                  imm4 = (index << 1) + 1;
3317                  if (index < 8) {
3318                     res = newVRegD(env);
3319                     arg = iselNeon64Expr(env, mi.bindee[0]);
3320                     addInstr(env, ARMInstr_NUnaryS(
3321                                      ARMneon_VDUP,
3322                                      mkARMNRS(ARMNRS_Reg, res, 0),
3323                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3324                                      imm4, False
3325                             ));
3326                     return res;
3327                  }
3328               }
3329            } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3330               UInt index;
3331               UInt imm4;
3332               if (mi.bindee[1]->tag == Iex_Const &&
3333                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3334                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3335                  imm4 = (index << 2) + 2;
3336                  if (index < 4) {
3337                     res = newVRegD(env);
3338                     arg = iselNeon64Expr(env, mi.bindee[0]);
3339                     addInstr(env, ARMInstr_NUnaryS(
3340                                      ARMneon_VDUP,
3341                                      mkARMNRS(ARMNRS_Reg, res, 0),
3342                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3343                                      imm4, False
3344                             ));
3345                     return res;
3346                  }
3347               }
3348            } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3349               UInt index;
3350               UInt imm4;
3351               if (mi.bindee[1]->tag == Iex_Const &&
3352                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3353                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3354                  imm4 = (index << 3) + 4;
3355                  if (index < 2) {
3356                     res = newVRegD(env);
3357                     arg = iselNeon64Expr(env, mi.bindee[0]);
3358                     addInstr(env, ARMInstr_NUnaryS(
3359                                      ARMneon_VDUP,
3360                                      mkARMNRS(ARMNRS_Reg, res, 0),
3361                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3362                                      imm4, False
3363                             ));
3364                     return res;
3365                  }
3366               }
3367            }
3368            arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3369            res = newVRegD(env);
3370            switch (e->Iex.Unop.op) {
3371               case Iop_Dup8x8: size = 0; break;
3372               case Iop_Dup16x4: size = 1; break;
3373               case Iop_Dup32x2: size = 2; break;
3374               default: vassert(0);
3375            }
3376            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3377            return res;
3378         }
3379         case Iop_Abs8x8:
3380         case Iop_Abs16x4:
3381         case Iop_Abs32x2: {
3382            HReg res = newVRegD(env);
3383            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3384            UInt size = 0;
3385            switch(e->Iex.Binop.op) {
3386               case Iop_Abs8x8: size = 0; break;
3387               case Iop_Abs16x4: size = 1; break;
3388               case Iop_Abs32x2: size = 2; break;
3389               default: vassert(0);
3390            }
3391            addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3392            return res;
3393         }
3394         case Iop_Reverse64_8x8:
3395         case Iop_Reverse64_16x4:
3396         case Iop_Reverse64_32x2: {
3397            HReg res = newVRegD(env);
3398            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3399            UInt size = 0;
3400            switch(e->Iex.Binop.op) {
3401               case Iop_Reverse64_8x8: size = 0; break;
3402               case Iop_Reverse64_16x4: size = 1; break;
3403               case Iop_Reverse64_32x2: size = 2; break;
3404               default: vassert(0);
3405            }
3406            addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3407                                          res, arg, size, False));
3408            return res;
3409         }
3410         case Iop_Reverse32_8x8:
3411         case Iop_Reverse32_16x4: {
3412            HReg res = newVRegD(env);
3413            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3414            UInt size = 0;
3415            switch(e->Iex.Binop.op) {
3416               case Iop_Reverse32_8x8: size = 0; break;
3417               case Iop_Reverse32_16x4: size = 1; break;
3418               default: vassert(0);
3419            }
3420            addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3421                                          res, arg, size, False));
3422            return res;
3423         }
3424         case Iop_Reverse16_8x8: {
3425            HReg res = newVRegD(env);
3426            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3427            UInt size = 0;
3428            addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3429                                          res, arg, size, False));
3430            return res;
3431         }
3432         case Iop_CmpwNEZ64: {
3433            HReg x_lsh = newVRegD(env);
3434            HReg x_rsh = newVRegD(env);
3435            HReg lsh_amt = newVRegD(env);
3436            HReg rsh_amt = newVRegD(env);
3437            HReg zero = newVRegD(env);
3438            HReg tmp = newVRegD(env);
3439            HReg tmp2 = newVRegD(env);
3440            HReg res = newVRegD(env);
3441            HReg x = newVRegD(env);
3442            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3443            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3444            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3445            addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3446            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3447            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3448                                           rsh_amt, zero, lsh_amt, 2, False));
3449            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3450                                          x_lsh, x, lsh_amt, 3, False));
3451            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3452                                          x_rsh, x, rsh_amt, 3, False));
3453            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3454                                           tmp, x_lsh, x_rsh, 0, False));
3455            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3456                                           res, tmp, x, 0, False));
3457            return res;
3458         }
3459         case Iop_CmpNEZ8x8:
3460         case Iop_CmpNEZ16x4:
3461         case Iop_CmpNEZ32x2: {
3462            HReg res = newVRegD(env);
3463            HReg tmp = newVRegD(env);
3464            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3465            UInt size;
3466            switch (e->Iex.Unop.op) {
3467               case Iop_CmpNEZ8x8: size = 0; break;
3468               case Iop_CmpNEZ16x4: size = 1; break;
3469               case Iop_CmpNEZ32x2: size = 2; break;
3470               default: vassert(0);
3471            }
3472            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3473            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3474            return res;
3475         }
3476         case Iop_NarrowUn16to8x8:
3477         case Iop_NarrowUn32to16x4:
3478         case Iop_NarrowUn64to32x2: {
3479            HReg res = newVRegD(env);
3480            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3481            UInt size = 0;
3482            switch(e->Iex.Binop.op) {
3483               case Iop_NarrowUn16to8x8:  size = 0; break;
3484               case Iop_NarrowUn32to16x4: size = 1; break;
3485               case Iop_NarrowUn64to32x2: size = 2; break;
3486               default: vassert(0);
3487            }
3488            addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3489                                          res, arg, size, False));
3490            return res;
3491         }
3492         case Iop_QNarrowUn16Sto8Sx8:
3493         case Iop_QNarrowUn32Sto16Sx4:
3494         case Iop_QNarrowUn64Sto32Sx2: {
3495            HReg res = newVRegD(env);
3496            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3497            UInt size = 0;
3498            switch(e->Iex.Binop.op) {
3499               case Iop_QNarrowUn16Sto8Sx8:  size = 0; break;
3500               case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3501               case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
3502               default: vassert(0);
3503            }
3504            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3505                                          res, arg, size, False));
3506            return res;
3507         }
3508         case Iop_QNarrowUn16Sto8Ux8:
3509         case Iop_QNarrowUn32Sto16Ux4:
3510         case Iop_QNarrowUn64Sto32Ux2: {
3511            HReg res = newVRegD(env);
3512            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3513            UInt size = 0;
3514            switch(e->Iex.Binop.op) {
3515               case Iop_QNarrowUn16Sto8Ux8:  size = 0; break;
3516               case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3517               case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
3518               default: vassert(0);
3519            }
3520            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3521                                          res, arg, size, False));
3522            return res;
3523         }
3524         case Iop_QNarrowUn16Uto8Ux8:
3525         case Iop_QNarrowUn32Uto16Ux4:
3526         case Iop_QNarrowUn64Uto32Ux2: {
3527            HReg res = newVRegD(env);
3528            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3529            UInt size = 0;
3530            switch(e->Iex.Binop.op) {
3531               case Iop_QNarrowUn16Uto8Ux8:  size = 0; break;
3532               case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3533               case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
3534               default: vassert(0);
3535            }
3536            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3537                                          res, arg, size, False));
3538            return res;
3539         }
3540         case Iop_PwAddL8Sx8:
3541         case Iop_PwAddL16Sx4:
3542         case Iop_PwAddL32Sx2: {
3543            HReg res = newVRegD(env);
3544            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3545            UInt size = 0;
3546            switch(e->Iex.Binop.op) {
3547               case Iop_PwAddL8Sx8: size = 0; break;
3548               case Iop_PwAddL16Sx4: size = 1; break;
3549               case Iop_PwAddL32Sx2: size = 2; break;
3550               default: vassert(0);
3551            }
3552            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3553                                          res, arg, size, False));
3554            return res;
3555         }
3556         case Iop_PwAddL8Ux8:
3557         case Iop_PwAddL16Ux4:
3558         case Iop_PwAddL32Ux2: {
3559            HReg res = newVRegD(env);
3560            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3561            UInt size = 0;
3562            switch(e->Iex.Binop.op) {
3563               case Iop_PwAddL8Ux8: size = 0; break;
3564               case Iop_PwAddL16Ux4: size = 1; break;
3565               case Iop_PwAddL32Ux2: size = 2; break;
3566               default: vassert(0);
3567            }
3568            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3569                                          res, arg, size, False));
3570            return res;
3571         }
3572         case Iop_Cnt8x8: {
3573            HReg res = newVRegD(env);
3574            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3575            UInt size = 0;
3576            addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3577                                          res, arg, size, False));
3578            return res;
3579         }
3580         case Iop_Clz8Sx8:
3581         case Iop_Clz16Sx4:
3582         case Iop_Clz32Sx2: {
3583            HReg res = newVRegD(env);
3584            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3585            UInt size = 0;
3586            switch(e->Iex.Binop.op) {
3587               case Iop_Clz8Sx8: size = 0; break;
3588               case Iop_Clz16Sx4: size = 1; break;
3589               case Iop_Clz32Sx2: size = 2; break;
3590               default: vassert(0);
3591            }
3592            addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3593                                          res, arg, size, False));
3594            return res;
3595         }
3596         case Iop_Cls8Sx8:
3597         case Iop_Cls16Sx4:
3598         case Iop_Cls32Sx2: {
3599            HReg res = newVRegD(env);
3600            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3601            UInt size = 0;
3602            switch(e->Iex.Binop.op) {
3603               case Iop_Cls8Sx8: size = 0; break;
3604               case Iop_Cls16Sx4: size = 1; break;
3605               case Iop_Cls32Sx2: size = 2; break;
3606               default: vassert(0);
3607            }
3608            addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3609                                          res, arg, size, False));
3610            return res;
3611         }
3612         case Iop_FtoI32Sx2_RZ: {
3613            HReg res = newVRegD(env);
3614            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3615            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3616                                          res, arg, 2, False));
3617            return res;
3618         }
3619         case Iop_FtoI32Ux2_RZ: {
3620            HReg res = newVRegD(env);
3621            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3622            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3623                                          res, arg, 2, False));
3624            return res;
3625         }
3626         case Iop_I32StoFx2: {
3627            HReg res = newVRegD(env);
3628            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3629            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3630                                          res, arg, 2, False));
3631            return res;
3632         }
3633         case Iop_I32UtoFx2: {
3634            HReg res = newVRegD(env);
3635            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3636            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3637                                          res, arg, 2, False));
3638            return res;
3639         }
3640         case Iop_F32toF16x4: {
3641            HReg res = newVRegD(env);
3642            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3643            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3644                                          res, arg, 2, False));
3645            return res;
3646         }
3647         case Iop_Recip32Fx2: {
3648            HReg res = newVRegD(env);
3649            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3650            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3651                                          res, argL, 0, False));
3652            return res;
3653         }
3654         case Iop_Recip32x2: {
3655            HReg res = newVRegD(env);
3656            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3657            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3658                                          res, argL, 0, False));
3659            return res;
3660         }
3661         case Iop_Abs32Fx2: {
3662            DECLARE_PATTERN(p_vabd_32fx2);
3663            DEFINE_PATTERN(p_vabd_32fx2,
3664                           unop(Iop_Abs32Fx2,
3665                                binop(Iop_Sub32Fx2,
3666                                      bind(0),
3667                                      bind(1))));
3668            if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3669               HReg res = newVRegD(env);
3670               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3671               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3672               addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3673                                              res, argL, argR, 0, False));
3674               return res;
3675            } else {
3676               HReg res = newVRegD(env);
3677               HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3678               addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3679                                             res, arg, 0, False));
3680               return res;
3681            }
3682         }
3683         case Iop_Rsqrte32Fx2: {
3684            HReg res = newVRegD(env);
3685            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3686            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3687                                          res, arg, 0, False));
3688            return res;
3689         }
3690         case Iop_Rsqrte32x2: {
3691            HReg res = newVRegD(env);
3692            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3693            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3694                                          res, arg, 0, False));
3695            return res;
3696         }
3697         case Iop_Neg32Fx2: {
3698            HReg res = newVRegD(env);
3699            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3700            addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3701                                          res, arg, 0, False));
3702            return res;
3703         }
3704         default:
3705            break;
3706      }
3707   } /* if (e->tag == Iex_Unop) */
3708
3709   if (e->tag == Iex_Triop) {
3710      IRTriop *triop = e->Iex.Triop.details;
3711
3712      switch (triop->op) {
3713         case Iop_Extract64: {
3714            HReg res = newVRegD(env);
3715            HReg argL = iselNeon64Expr(env, triop->arg1);
3716            HReg argR = iselNeon64Expr(env, triop->arg2);
3717            UInt imm4;
3718            if (triop->arg3->tag != Iex_Const ||
3719                typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
3720               vpanic("ARM target supports Iop_Extract64 with constant "
3721                      "third argument less than 16 only\n");
3722            }
3723            imm4 = triop->arg3->Iex.Const.con->Ico.U8;
3724            if (imm4 >= 8) {
3725               vpanic("ARM target supports Iop_Extract64 with constant "
3726                      "third argument less than 16 only\n");
3727            }
3728            addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3729                                           res, argL, argR, imm4, False));
3730            return res;
3731         }
3732         case Iop_SetElem8x8:
3733         case Iop_SetElem16x4:
3734         case Iop_SetElem32x2: {
3735            HReg res = newVRegD(env);
3736            HReg dreg = iselNeon64Expr(env, triop->arg1);
3737            HReg arg = iselIntExpr_R(env, triop->arg3);
3738            UInt index, size;
3739            if (triop->arg2->tag != Iex_Const ||
3740                typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
3741               vpanic("ARM target supports SetElem with constant "
3742                      "second argument only\n");
3743            }
3744            index = triop->arg2->Iex.Const.con->Ico.U8;
3745            switch (triop->op) {
3746               case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3747               case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3748               case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3749               default: vassert(0);
3750            }
3751            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3752            addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3753                                           mkARMNRS(ARMNRS_Scalar, res, index),
3754                                           mkARMNRS(ARMNRS_Reg, arg, 0),
3755                                           size, False));
3756            return res;
3757         }
3758         default:
3759            break;
3760      }
3761   }
3762
3763   /* --------- MULTIPLEX --------- */
3764   if (e->tag == Iex_ITE) { // VFD
3765      HReg rLo, rHi;
3766      HReg res = newVRegD(env);
3767      iselInt64Expr(&rHi, &rLo, env, e);
3768      addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3769      return res;
3770   }
3771
3772   ppIRExpr(e);
3773   vpanic("iselNeon64Expr");
3774}
3775
3776static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3777{
3778   HReg r = iselNeonExpr_wrk( env, e );
3779   vassert(hregClass(r) == HRcVec128);
3780   vassert(hregIsVirtual(r));
3781   return r;
3782}
3783
3784/* DO NOT CALL THIS DIRECTLY */
3785static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3786{
3787   IRType ty = typeOfIRExpr(env->type_env, e);
3788   MatchInfo mi;
3789   vassert(e);
3790   vassert(ty == Ity_V128);
3791
3792   if (e->tag == Iex_RdTmp) {
3793      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3794   }
3795
3796   if (e->tag == Iex_Const) {
3797      /* At the moment there should be no 128-bit constants in IR for ARM
3798         generated during disassemble. They are represented as Iop_64HLtoV128
3799         binary operation and are handled among binary ops. */
3800      /* But zero can be created by valgrind internal optimizer */
3801      if (e->Iex.Const.con->Ico.V128 == 0x0000) {
3802         HReg res = newVRegV(env);
3803         addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 0)));
3804         return res;
3805      }
3806      if (e->Iex.Const.con->Ico.V128 == 0xFFFF) {
3807         HReg res = newVRegV(env);
3808         addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 255)));
3809         return res;
3810      }
3811      ppIRExpr(e);
3812      vpanic("128-bit constant is not implemented");
3813   }
3814
3815   if (e->tag == Iex_Load) {
3816      HReg res = newVRegV(env);
3817      ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3818      vassert(ty == Ity_V128);
3819      addInstr(env, ARMInstr_NLdStQ(True, res, am));
3820      return res;
3821   }
3822
3823   if (e->tag == Iex_Get) {
3824      HReg addr = newVRegI(env);
3825      HReg res = newVRegV(env);
3826      vassert(ty == Ity_V128);
3827      addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3828      addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3829      return res;
3830   }
3831
3832   if (e->tag == Iex_Unop) {
3833      switch (e->Iex.Unop.op) {
3834         case Iop_NotV128: {
3835            DECLARE_PATTERN(p_veqz_8x16);
3836            DECLARE_PATTERN(p_veqz_16x8);
3837            DECLARE_PATTERN(p_veqz_32x4);
3838            DECLARE_PATTERN(p_vcge_8sx16);
3839            DECLARE_PATTERN(p_vcge_16sx8);
3840            DECLARE_PATTERN(p_vcge_32sx4);
3841            DECLARE_PATTERN(p_vcge_8ux16);
3842            DECLARE_PATTERN(p_vcge_16ux8);
3843            DECLARE_PATTERN(p_vcge_32ux4);
3844            DEFINE_PATTERN(p_veqz_8x16,
3845                  unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3846            DEFINE_PATTERN(p_veqz_16x8,
3847                  unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3848            DEFINE_PATTERN(p_veqz_32x4,
3849                  unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3850            DEFINE_PATTERN(p_vcge_8sx16,
3851                  unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3852            DEFINE_PATTERN(p_vcge_16sx8,
3853                  unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3854            DEFINE_PATTERN(p_vcge_32sx4,
3855                  unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3856            DEFINE_PATTERN(p_vcge_8ux16,
3857                  unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3858            DEFINE_PATTERN(p_vcge_16ux8,
3859                  unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3860            DEFINE_PATTERN(p_vcge_32ux4,
3861                  unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3862            if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3863               HReg res = newVRegV(env);
3864               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3865               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3866               return res;
3867            } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3868               HReg res = newVRegV(env);
3869               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3870               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3871               return res;
3872            } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3873               HReg res = newVRegV(env);
3874               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3875               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3876               return res;
3877            } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3878               HReg res = newVRegV(env);
3879               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3880               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3881               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3882                                              res, argL, argR, 0, True));
3883               return res;
3884            } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3885               HReg res = newVRegV(env);
3886               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3887               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3888               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3889                                              res, argL, argR, 1, True));
3890               return res;
3891            } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3892               HReg res = newVRegV(env);
3893               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3894               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3895               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3896                                              res, argL, argR, 2, True));
3897               return res;
3898            } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3899               HReg res = newVRegV(env);
3900               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3901               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3902               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3903                                              res, argL, argR, 0, True));
3904               return res;
3905            } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3906               HReg res = newVRegV(env);
3907               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3908               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3909               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3910                                              res, argL, argR, 1, True));
3911               return res;
3912            } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3913               HReg res = newVRegV(env);
3914               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3915               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3916               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3917                                              res, argL, argR, 2, True));
3918               return res;
3919            } else {
3920               HReg res = newVRegV(env);
3921               HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3922               addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3923               return res;
3924            }
3925         }
3926         case Iop_Dup8x16:
3927         case Iop_Dup16x8:
3928         case Iop_Dup32x4: {
3929            HReg res, arg;
3930            UInt size;
3931            DECLARE_PATTERN(p_vdup_8x16);
3932            DECLARE_PATTERN(p_vdup_16x8);
3933            DECLARE_PATTERN(p_vdup_32x4);
3934            DEFINE_PATTERN(p_vdup_8x16,
3935                  unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3936            DEFINE_PATTERN(p_vdup_16x8,
3937                  unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3938            DEFINE_PATTERN(p_vdup_32x4,
3939                  unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3940            if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3941               UInt index;
3942               UInt imm4;
3943               if (mi.bindee[1]->tag == Iex_Const &&
3944                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3945                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3946                  imm4 = (index << 1) + 1;
3947                  if (index < 8) {
3948                     res = newVRegV(env);
3949                     arg = iselNeon64Expr(env, mi.bindee[0]);
3950                     addInstr(env, ARMInstr_NUnaryS(
3951                                      ARMneon_VDUP,
3952                                      mkARMNRS(ARMNRS_Reg, res, 0),
3953                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3954                                      imm4, True
3955                             ));
3956                     return res;
3957                  }
3958               }
3959            } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3960               UInt index;
3961               UInt imm4;
3962               if (mi.bindee[1]->tag == Iex_Const &&
3963                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3964                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3965                  imm4 = (index << 2) + 2;
3966                  if (index < 4) {
3967                     res = newVRegV(env);
3968                     arg = iselNeon64Expr(env, mi.bindee[0]);
3969                     addInstr(env, ARMInstr_NUnaryS(
3970                                      ARMneon_VDUP,
3971                                      mkARMNRS(ARMNRS_Reg, res, 0),
3972                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3973                                      imm4, True
3974                             ));
3975                     return res;
3976                  }
3977               }
3978            } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3979               UInt index;
3980               UInt imm4;
3981               if (mi.bindee[1]->tag == Iex_Const &&
3982                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3983                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3984                  imm4 = (index << 3) + 4;
3985                  if (index < 2) {
3986                     res = newVRegV(env);
3987                     arg = iselNeon64Expr(env, mi.bindee[0]);
3988                     addInstr(env, ARMInstr_NUnaryS(
3989                                      ARMneon_VDUP,
3990                                      mkARMNRS(ARMNRS_Reg, res, 0),
3991                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3992                                      imm4, True
3993                             ));
3994                     return res;
3995                  }
3996               }
3997            }
3998            arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3999            res = newVRegV(env);
4000            switch (e->Iex.Unop.op) {
4001               case Iop_Dup8x16: size = 0; break;
4002               case Iop_Dup16x8: size = 1; break;
4003               case Iop_Dup32x4: size = 2; break;
4004               default: vassert(0);
4005            }
4006            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4007            return res;
4008         }
4009         case Iop_Abs8x16:
4010         case Iop_Abs16x8:
4011         case Iop_Abs32x4: {
4012            HReg res = newVRegV(env);
4013            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4014            UInt size = 0;
4015            switch(e->Iex.Binop.op) {
4016               case Iop_Abs8x16: size = 0; break;
4017               case Iop_Abs16x8: size = 1; break;
4018               case Iop_Abs32x4: size = 2; break;
4019               default: vassert(0);
4020            }
4021            addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4022            return res;
4023         }
4024         case Iop_Reverse64_8x16:
4025         case Iop_Reverse64_16x8:
4026         case Iop_Reverse64_32x4: {
4027            HReg res = newVRegV(env);
4028            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4029            UInt size = 0;
4030            switch(e->Iex.Binop.op) {
4031               case Iop_Reverse64_8x16: size = 0; break;
4032               case Iop_Reverse64_16x8: size = 1; break;
4033               case Iop_Reverse64_32x4: size = 2; break;
4034               default: vassert(0);
4035            }
4036            addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4037                                          res, arg, size, True));
4038            return res;
4039         }
4040         case Iop_Reverse32_8x16:
4041         case Iop_Reverse32_16x8: {
4042            HReg res = newVRegV(env);
4043            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4044            UInt size = 0;
4045            switch(e->Iex.Binop.op) {
4046               case Iop_Reverse32_8x16: size = 0; break;
4047               case Iop_Reverse32_16x8: size = 1; break;
4048               default: vassert(0);
4049            }
4050            addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4051                                          res, arg, size, True));
4052            return res;
4053         }
4054         case Iop_Reverse16_8x16: {
4055            HReg res = newVRegV(env);
4056            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4057            UInt size = 0;
4058            addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4059                                          res, arg, size, True));
4060            return res;
4061         }
4062         case Iop_CmpNEZ64x2: {
4063            HReg x_lsh = newVRegV(env);
4064            HReg x_rsh = newVRegV(env);
4065            HReg lsh_amt = newVRegV(env);
4066            HReg rsh_amt = newVRegV(env);
4067            HReg zero = newVRegV(env);
4068            HReg tmp = newVRegV(env);
4069            HReg tmp2 = newVRegV(env);
4070            HReg res = newVRegV(env);
4071            HReg x = newVRegV(env);
4072            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4073            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4074            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4075            addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4076            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4077            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4078                                           rsh_amt, zero, lsh_amt, 2, True));
4079            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4080                                          x_lsh, x, lsh_amt, 3, True));
4081            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4082                                          x_rsh, x, rsh_amt, 3, True));
4083            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4084                                           tmp, x_lsh, x_rsh, 0, True));
4085            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4086                                           res, tmp, x, 0, True));
4087            return res;
4088         }
4089         case Iop_CmpNEZ8x16:
4090         case Iop_CmpNEZ16x8:
4091         case Iop_CmpNEZ32x4: {
4092            HReg res = newVRegV(env);
4093            HReg tmp = newVRegV(env);
4094            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4095            UInt size;
4096            switch (e->Iex.Unop.op) {
4097               case Iop_CmpNEZ8x16: size = 0; break;
4098               case Iop_CmpNEZ16x8: size = 1; break;
4099               case Iop_CmpNEZ32x4: size = 2; break;
4100               default: vassert(0);
4101            }
4102            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4103            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4104            return res;
4105         }
4106         case Iop_Widen8Uto16x8:
4107         case Iop_Widen16Uto32x4:
4108         case Iop_Widen32Uto64x2: {
4109            HReg res = newVRegV(env);
4110            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4111            UInt size;
4112            switch (e->Iex.Unop.op) {
4113               case Iop_Widen8Uto16x8:  size = 0; break;
4114               case Iop_Widen16Uto32x4: size = 1; break;
4115               case Iop_Widen32Uto64x2: size = 2; break;
4116               default: vassert(0);
4117            }
4118            addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4119                                          res, arg, size, True));
4120            return res;
4121         }
4122         case Iop_Widen8Sto16x8:
4123         case Iop_Widen16Sto32x4:
4124         case Iop_Widen32Sto64x2: {
4125            HReg res = newVRegV(env);
4126            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4127            UInt size;
4128            switch (e->Iex.Unop.op) {
4129               case Iop_Widen8Sto16x8:  size = 0; break;
4130               case Iop_Widen16Sto32x4: size = 1; break;
4131               case Iop_Widen32Sto64x2: size = 2; break;
4132               default: vassert(0);
4133            }
4134            addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4135                                          res, arg, size, True));
4136            return res;
4137         }
4138         case Iop_PwAddL8Sx16:
4139         case Iop_PwAddL16Sx8:
4140         case Iop_PwAddL32Sx4: {
4141            HReg res = newVRegV(env);
4142            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4143            UInt size = 0;
4144            switch(e->Iex.Binop.op) {
4145               case Iop_PwAddL8Sx16: size = 0; break;
4146               case Iop_PwAddL16Sx8: size = 1; break;
4147               case Iop_PwAddL32Sx4: size = 2; break;
4148               default: vassert(0);
4149            }
4150            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4151                                          res, arg, size, True));
4152            return res;
4153         }
4154         case Iop_PwAddL8Ux16:
4155         case Iop_PwAddL16Ux8:
4156         case Iop_PwAddL32Ux4: {
4157            HReg res = newVRegV(env);
4158            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4159            UInt size = 0;
4160            switch(e->Iex.Binop.op) {
4161               case Iop_PwAddL8Ux16: size = 0; break;
4162               case Iop_PwAddL16Ux8: size = 1; break;
4163               case Iop_PwAddL32Ux4: size = 2; break;
4164               default: vassert(0);
4165            }
4166            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4167                                          res, arg, size, True));
4168            return res;
4169         }
4170         case Iop_Cnt8x16: {
4171            HReg res = newVRegV(env);
4172            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4173            UInt size = 0;
4174            addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4175            return res;
4176         }
4177         case Iop_Clz8Sx16:
4178         case Iop_Clz16Sx8:
4179         case Iop_Clz32Sx4: {
4180            HReg res = newVRegV(env);
4181            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4182            UInt size = 0;
4183            switch(e->Iex.Binop.op) {
4184               case Iop_Clz8Sx16: size = 0; break;
4185               case Iop_Clz16Sx8: size = 1; break;
4186               case Iop_Clz32Sx4: size = 2; break;
4187               default: vassert(0);
4188            }
4189            addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4190            return res;
4191         }
4192         case Iop_Cls8Sx16:
4193         case Iop_Cls16Sx8:
4194         case Iop_Cls32Sx4: {
4195            HReg res = newVRegV(env);
4196            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4197            UInt size = 0;
4198            switch(e->Iex.Binop.op) {
4199               case Iop_Cls8Sx16: size = 0; break;
4200               case Iop_Cls16Sx8: size = 1; break;
4201               case Iop_Cls32Sx4: size = 2; break;
4202               default: vassert(0);
4203            }
4204            addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4205            return res;
4206         }
4207         case Iop_FtoI32Sx4_RZ: {
4208            HReg res = newVRegV(env);
4209            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4210            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4211                                          res, arg, 2, True));
4212            return res;
4213         }
4214         case Iop_FtoI32Ux4_RZ: {
4215            HReg res = newVRegV(env);
4216            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4217            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4218                                          res, arg, 2, True));
4219            return res;
4220         }
4221         case Iop_I32StoFx4: {
4222            HReg res = newVRegV(env);
4223            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4224            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4225                                          res, arg, 2, True));
4226            return res;
4227         }
4228         case Iop_I32UtoFx4: {
4229            HReg res = newVRegV(env);
4230            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4231            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4232                                          res, arg, 2, True));
4233            return res;
4234         }
4235         case Iop_F16toF32x4: {
4236            HReg res = newVRegV(env);
4237            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4238            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4239                                          res, arg, 2, True));
4240            return res;
4241         }
4242         case Iop_Recip32Fx4: {
4243            HReg res = newVRegV(env);
4244            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4245            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4246                                          res, argL, 0, True));
4247            return res;
4248         }
4249         case Iop_Recip32x4: {
4250            HReg res = newVRegV(env);
4251            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4252            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4253                                          res, argL, 0, True));
4254            return res;
4255         }
4256         case Iop_Abs32Fx4: {
4257            HReg res = newVRegV(env);
4258            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4259            addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4260                                          res, argL, 0, True));
4261            return res;
4262         }
4263         case Iop_Rsqrte32Fx4: {
4264            HReg res = newVRegV(env);
4265            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4266            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4267                                          res, argL, 0, True));
4268            return res;
4269         }
4270         case Iop_Rsqrte32x4: {
4271            HReg res = newVRegV(env);
4272            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4273            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4274                                          res, argL, 0, True));
4275            return res;
4276         }
4277         case Iop_Neg32Fx4: {
4278            HReg res = newVRegV(env);
4279            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4280            addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4281                                          res, arg, 0, True));
4282            return res;
4283         }
4284         /* ... */
4285         default:
4286            break;
4287      }
4288   }
4289
4290   if (e->tag == Iex_Binop) {
4291      switch (e->Iex.Binop.op) {
4292         case Iop_64HLtoV128:
4293            /* Try to match into single "VMOV reg, imm" instruction */
4294            if (e->Iex.Binop.arg1->tag == Iex_Const &&
4295                e->Iex.Binop.arg2->tag == Iex_Const &&
4296                typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4297                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4298                e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4299                           e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4300               ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4301               ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4302               if (imm) {
4303                  HReg res = newVRegV(env);
4304                  addInstr(env, ARMInstr_NeonImm(res, imm));
4305                  return res;
4306               }
4307               if ((imm64 >> 32) == 0LL &&
4308                   (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4309                  HReg tmp1 = newVRegV(env);
4310                  HReg tmp2 = newVRegV(env);
4311                  HReg res = newVRegV(env);
4312                  if (imm->type < 10) {
4313                     addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4314                     addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4315                     addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4316                                                    res, tmp1, tmp2, 4, True));
4317                     return res;
4318                  }
4319               }
4320               if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4321                   (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4322                  HReg tmp1 = newVRegV(env);
4323                  HReg tmp2 = newVRegV(env);
4324                  HReg res = newVRegV(env);
4325                  if (imm->type < 10) {
4326                     addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4327                     addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4328                     addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4329                                                    res, tmp1, tmp2, 4, True));
4330                     return res;
4331                  }
4332               }
4333            }
4334            /* Does not match "VMOV Reg, Imm" form.  We'll have to do
4335               it the slow way. */
4336            {
4337               /* local scope */
4338               /* Done via the stack for ease of use. */
4339               /* FIXME: assumes little endian host */
4340               HReg       w3, w2, w1, w0;
4341               HReg       res  = newVRegV(env);
4342               ARMAMode1* sp_0  = ARMAMode1_RI(hregARM_R13(), 0);
4343               ARMAMode1* sp_4  = ARMAMode1_RI(hregARM_R13(), 4);
4344               ARMAMode1* sp_8  = ARMAMode1_RI(hregARM_R13(), 8);
4345               ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
4346               ARMRI84*   c_16  = ARMRI84_I84(16,0);
4347               /* Make space for SP */
4348               addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
4349                                                      hregARM_R13(), c_16));
4350
4351               /* Store the less significant 64 bits */
4352               iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
4353               addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4354                                             w0, sp_0));
4355               addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4356                                             w1, sp_4));
4357
4358               /* Store the more significant 64 bits */
4359               iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
4360               addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4361                                             w2, sp_8));
4362               addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4363                                             w3, sp_12));
4364
4365                /* Load result back from stack. */
4366                addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
4367                                              mkARMAModeN_R(hregARM_R13())));
4368
4369                /* Restore SP */
4370                addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
4371                                           hregARM_R13(), c_16));
4372                return res;
4373            } /* local scope */
4374            goto neon_expr_bad;
4375         case Iop_AndV128: {
4376            HReg res = newVRegV(env);
4377            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4378            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4379            addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4380                                           res, argL, argR, 4, True));
4381            return res;
4382         }
4383         case Iop_OrV128: {
4384            HReg res = newVRegV(env);
4385            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4386            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4387            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4388                                           res, argL, argR, 4, True));
4389            return res;
4390         }
4391         case Iop_XorV128: {
4392            HReg res = newVRegV(env);
4393            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4394            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4395            addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4396                                           res, argL, argR, 4, True));
4397            return res;
4398         }
4399         case Iop_Add8x16:
4400         case Iop_Add16x8:
4401         case Iop_Add32x4:
4402         case Iop_Add64x2: {
4403            /*
4404            FIXME: remove this if not used
4405            DECLARE_PATTERN(p_vrhadd_32sx4);
4406            ULong one = (1LL << 32) | 1LL;
4407            DEFINE_PATTERN(p_vrhadd_32sx4,
4408                  binop(Iop_Add32x4,
4409                        binop(Iop_Add32x4,
4410                              binop(Iop_SarN32x4,
4411                                    bind(0),
4412                                    mkU8(1)),
4413                              binop(Iop_SarN32x4,
4414                                    bind(1),
4415                                    mkU8(1))),
4416                        binop(Iop_SarN32x4,
4417                              binop(Iop_Add32x4,
4418                                    binop(Iop_Add32x4,
4419                                          binop(Iop_AndV128,
4420                                                bind(0),
4421                                                mkU128(one)),
4422                                          binop(Iop_AndV128,
4423                                                bind(1),
4424                                                mkU128(one))),
4425                                    mkU128(one)),
4426                              mkU8(1))));
4427            */
4428            HReg res = newVRegV(env);
4429            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4430            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4431            UInt size;
4432            switch (e->Iex.Binop.op) {
4433               case Iop_Add8x16: size = 0; break;
4434               case Iop_Add16x8: size = 1; break;
4435               case Iop_Add32x4: size = 2; break;
4436               case Iop_Add64x2: size = 3; break;
4437               default:
4438                  ppIROp(e->Iex.Binop.op);
4439                  vpanic("Illegal element size in VADD");
4440            }
4441            addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4442                                           res, argL, argR, size, True));
4443            return res;
4444         }
4445         case Iop_Recps32Fx4: {
4446            HReg res = newVRegV(env);
4447            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4448            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4449            UInt size = 0;
4450            addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4451                                           res, argL, argR, size, True));
4452            return res;
4453         }
4454         case Iop_Rsqrts32Fx4: {
4455            HReg res = newVRegV(env);
4456            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4457            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4458            UInt size = 0;
4459            addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4460                                           res, argL, argR, size, True));
4461            return res;
4462         }
4463
4464         // These 6 verified 18 Apr 2013
4465         case Iop_InterleaveEvenLanes8x16:
4466         case Iop_InterleaveOddLanes8x16:
4467         case Iop_InterleaveEvenLanes16x8:
4468         case Iop_InterleaveOddLanes16x8:
4469         case Iop_InterleaveEvenLanes32x4:
4470         case Iop_InterleaveOddLanes32x4: {
4471            HReg rD   = newVRegV(env);
4472            HReg rM   = newVRegV(env);
4473            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4474            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4475            UInt size;
4476            Bool resRd;  // is the result in rD or rM ?
4477            switch (e->Iex.Binop.op) {
4478               case Iop_InterleaveOddLanes8x16:  resRd = False; size = 0; break;
4479               case Iop_InterleaveEvenLanes8x16: resRd = True;  size = 0; break;
4480               case Iop_InterleaveOddLanes16x8:  resRd = False; size = 1; break;
4481               case Iop_InterleaveEvenLanes16x8: resRd = True;  size = 1; break;
4482               case Iop_InterleaveOddLanes32x4:  resRd = False; size = 2; break;
4483               case Iop_InterleaveEvenLanes32x4: resRd = True;  size = 2; break;
4484               default: vassert(0);
4485            }
4486            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4487            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4488            addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
4489            return resRd ? rD : rM;
4490         }
4491
4492         // These 6 verified 18 Apr 2013
4493         case Iop_InterleaveHI8x16:
4494         case Iop_InterleaveLO8x16:
4495         case Iop_InterleaveHI16x8:
4496         case Iop_InterleaveLO16x8:
4497         case Iop_InterleaveHI32x4:
4498         case Iop_InterleaveLO32x4: {
4499            HReg rD   = newVRegV(env);
4500            HReg rM   = newVRegV(env);
4501            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4502            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4503            UInt size;
4504            Bool resRd;  // is the result in rD or rM ?
4505            switch (e->Iex.Binop.op) {
4506               case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
4507               case Iop_InterleaveLO8x16: resRd = True;  size = 0; break;
4508               case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
4509               case Iop_InterleaveLO16x8: resRd = True;  size = 1; break;
4510               case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
4511               case Iop_InterleaveLO32x4: resRd = True;  size = 2; break;
4512               default: vassert(0);
4513            }
4514            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4515            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4516            addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
4517            return resRd ? rD : rM;
4518         }
4519
4520         // These 6 verified 18 Apr 2013
4521         case Iop_CatOddLanes8x16:
4522         case Iop_CatEvenLanes8x16:
4523         case Iop_CatOddLanes16x8:
4524         case Iop_CatEvenLanes16x8:
4525         case Iop_CatOddLanes32x4:
4526         case Iop_CatEvenLanes32x4: {
4527            HReg rD   = newVRegV(env);
4528            HReg rM   = newVRegV(env);
4529            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4530            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4531            UInt size;
4532            Bool resRd;  // is the result in rD or rM ?
4533            switch (e->Iex.Binop.op) {
4534               case Iop_CatOddLanes8x16:  resRd = False; size = 0; break;
4535               case Iop_CatEvenLanes8x16: resRd = True;  size = 0; break;
4536               case Iop_CatOddLanes16x8:  resRd = False; size = 1; break;
4537               case Iop_CatEvenLanes16x8: resRd = True;  size = 1; break;
4538               case Iop_CatOddLanes32x4:  resRd = False; size = 2; break;
4539               case Iop_CatEvenLanes32x4: resRd = True;  size = 2; break;
4540               default: vassert(0);
4541            }
4542            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4543            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4544            addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
4545            return resRd ? rD : rM;
4546         }
4547
4548         case Iop_QAdd8Ux16:
4549         case Iop_QAdd16Ux8:
4550         case Iop_QAdd32Ux4:
4551         case Iop_QAdd64Ux2: {
4552            HReg res = newVRegV(env);
4553            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4554            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4555            UInt size;
4556            switch (e->Iex.Binop.op) {
4557               case Iop_QAdd8Ux16: size = 0; break;
4558               case Iop_QAdd16Ux8: size = 1; break;
4559               case Iop_QAdd32Ux4: size = 2; break;
4560               case Iop_QAdd64Ux2: size = 3; break;
4561               default:
4562                  ppIROp(e->Iex.Binop.op);
4563                  vpanic("Illegal element size in VQADDU");
4564            }
4565            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4566                                           res, argL, argR, size, True));
4567            return res;
4568         }
4569         case Iop_QAdd8Sx16:
4570         case Iop_QAdd16Sx8:
4571         case Iop_QAdd32Sx4:
4572         case Iop_QAdd64Sx2: {
4573            HReg res = newVRegV(env);
4574            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4575            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4576            UInt size;
4577            switch (e->Iex.Binop.op) {
4578               case Iop_QAdd8Sx16: size = 0; break;
4579               case Iop_QAdd16Sx8: size = 1; break;
4580               case Iop_QAdd32Sx4: size = 2; break;
4581               case Iop_QAdd64Sx2: size = 3; break;
4582               default:
4583                  ppIROp(e->Iex.Binop.op);
4584                  vpanic("Illegal element size in VQADDS");
4585            }
4586            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4587                                           res, argL, argR, size, True));
4588            return res;
4589         }
4590         case Iop_Sub8x16:
4591         case Iop_Sub16x8:
4592         case Iop_Sub32x4:
4593         case Iop_Sub64x2: {
4594            HReg res = newVRegV(env);
4595            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4596            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4597            UInt size;
4598            switch (e->Iex.Binop.op) {
4599               case Iop_Sub8x16: size = 0; break;
4600               case Iop_Sub16x8: size = 1; break;
4601               case Iop_Sub32x4: size = 2; break;
4602               case Iop_Sub64x2: size = 3; break;
4603               default:
4604                  ppIROp(e->Iex.Binop.op);
4605                  vpanic("Illegal element size in VSUB");
4606            }
4607            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4608                                           res, argL, argR, size, True));
4609            return res;
4610         }
4611         case Iop_QSub8Ux16:
4612         case Iop_QSub16Ux8:
4613         case Iop_QSub32Ux4:
4614         case Iop_QSub64Ux2: {
4615            HReg res = newVRegV(env);
4616            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4617            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4618            UInt size;
4619            switch (e->Iex.Binop.op) {
4620               case Iop_QSub8Ux16: size = 0; break;
4621               case Iop_QSub16Ux8: size = 1; break;
4622               case Iop_QSub32Ux4: size = 2; break;
4623               case Iop_QSub64Ux2: size = 3; break;
4624               default:
4625                  ppIROp(e->Iex.Binop.op);
4626                  vpanic("Illegal element size in VQSUBU");
4627            }
4628            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4629                                           res, argL, argR, size, True));
4630            return res;
4631         }
4632         case Iop_QSub8Sx16:
4633         case Iop_QSub16Sx8:
4634         case Iop_QSub32Sx4:
4635         case Iop_QSub64Sx2: {
4636            HReg res = newVRegV(env);
4637            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4638            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4639            UInt size;
4640            switch (e->Iex.Binop.op) {
4641               case Iop_QSub8Sx16: size = 0; break;
4642               case Iop_QSub16Sx8: size = 1; break;
4643               case Iop_QSub32Sx4: size = 2; break;
4644               case Iop_QSub64Sx2: size = 3; break;
4645               default:
4646                  ppIROp(e->Iex.Binop.op);
4647                  vpanic("Illegal element size in VQSUBS");
4648            }
4649            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4650                                           res, argL, argR, size, True));
4651            return res;
4652         }
4653         case Iop_Max8Ux16:
4654         case Iop_Max16Ux8:
4655         case Iop_Max32Ux4: {
4656            HReg res = newVRegV(env);
4657            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4658            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4659            UInt size;
4660            switch (e->Iex.Binop.op) {
4661               case Iop_Max8Ux16: size = 0; break;
4662               case Iop_Max16Ux8: size = 1; break;
4663               case Iop_Max32Ux4: size = 2; break;
4664               default: vpanic("Illegal element size in VMAXU");
4665            }
4666            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4667                                           res, argL, argR, size, True));
4668            return res;
4669         }
4670         case Iop_Max8Sx16:
4671         case Iop_Max16Sx8:
4672         case Iop_Max32Sx4: {
4673            HReg res = newVRegV(env);
4674            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4675            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4676            UInt size;
4677            switch (e->Iex.Binop.op) {
4678               case Iop_Max8Sx16: size = 0; break;
4679               case Iop_Max16Sx8: size = 1; break;
4680               case Iop_Max32Sx4: size = 2; break;
4681               default: vpanic("Illegal element size in VMAXU");
4682            }
4683            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4684                                           res, argL, argR, size, True));
4685            return res;
4686         }
4687         case Iop_Min8Ux16:
4688         case Iop_Min16Ux8:
4689         case Iop_Min32Ux4: {
4690            HReg res = newVRegV(env);
4691            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4692            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4693            UInt size;
4694            switch (e->Iex.Binop.op) {
4695               case Iop_Min8Ux16: size = 0; break;
4696               case Iop_Min16Ux8: size = 1; break;
4697               case Iop_Min32Ux4: size = 2; break;
4698               default: vpanic("Illegal element size in VMAXU");
4699            }
4700            addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4701                                           res, argL, argR, size, True));
4702            return res;
4703         }
4704         case Iop_Min8Sx16:
4705         case Iop_Min16Sx8:
4706         case Iop_Min32Sx4: {
4707            HReg res = newVRegV(env);
4708            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4709            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4710            UInt size;
4711            switch (e->Iex.Binop.op) {
4712               case Iop_Min8Sx16: size = 0; break;
4713               case Iop_Min16Sx8: size = 1; break;
4714               case Iop_Min32Sx4: size = 2; break;
4715               default: vpanic("Illegal element size in VMAXU");
4716            }
4717            addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4718                                           res, argL, argR, size, True));
4719            return res;
4720         }
4721         case Iop_Sar8x16:
4722         case Iop_Sar16x8:
4723         case Iop_Sar32x4:
4724         case Iop_Sar64x2: {
4725            HReg res = newVRegV(env);
4726            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4727            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4728            HReg argR2 = newVRegV(env);
4729            HReg zero = newVRegV(env);
4730            UInt size;
4731            switch (e->Iex.Binop.op) {
4732               case Iop_Sar8x16: size = 0; break;
4733               case Iop_Sar16x8: size = 1; break;
4734               case Iop_Sar32x4: size = 2; break;
4735               case Iop_Sar64x2: size = 3; break;
4736               default: vassert(0);
4737            }
4738            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4739            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4740                                           argR2, zero, argR, size, True));
4741            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4742                                          res, argL, argR2, size, True));
4743            return res;
4744         }
4745         case Iop_Sal8x16:
4746         case Iop_Sal16x8:
4747         case Iop_Sal32x4:
4748         case Iop_Sal64x2: {
4749            HReg res = newVRegV(env);
4750            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4751            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4752            UInt size;
4753            switch (e->Iex.Binop.op) {
4754               case Iop_Sal8x16: size = 0; break;
4755               case Iop_Sal16x8: size = 1; break;
4756               case Iop_Sal32x4: size = 2; break;
4757               case Iop_Sal64x2: size = 3; break;
4758               default: vassert(0);
4759            }
4760            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4761                                          res, argL, argR, size, True));
4762            return res;
4763         }
4764         case Iop_Shr8x16:
4765         case Iop_Shr16x8:
4766         case Iop_Shr32x4:
4767         case Iop_Shr64x2: {
4768            HReg res = newVRegV(env);
4769            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4770            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4771            HReg argR2 = newVRegV(env);
4772            HReg zero = newVRegV(env);
4773            UInt size;
4774            switch (e->Iex.Binop.op) {
4775               case Iop_Shr8x16: size = 0; break;
4776               case Iop_Shr16x8: size = 1; break;
4777               case Iop_Shr32x4: size = 2; break;
4778               case Iop_Shr64x2: size = 3; break;
4779               default: vassert(0);
4780            }
4781            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4782            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4783                                           argR2, zero, argR, size, True));
4784            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4785                                          res, argL, argR2, size, True));
4786            return res;
4787         }
4788         case Iop_Shl8x16:
4789         case Iop_Shl16x8:
4790         case Iop_Shl32x4:
4791         case Iop_Shl64x2: {
4792            HReg res = newVRegV(env);
4793            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4794            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4795            UInt size;
4796            switch (e->Iex.Binop.op) {
4797               case Iop_Shl8x16: size = 0; break;
4798               case Iop_Shl16x8: size = 1; break;
4799               case Iop_Shl32x4: size = 2; break;
4800               case Iop_Shl64x2: size = 3; break;
4801               default: vassert(0);
4802            }
4803            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4804                                          res, argL, argR, size, True));
4805            return res;
4806         }
4807         case Iop_QShl8x16:
4808         case Iop_QShl16x8:
4809         case Iop_QShl32x4:
4810         case Iop_QShl64x2: {
4811            HReg res = newVRegV(env);
4812            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4813            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4814            UInt size;
4815            switch (e->Iex.Binop.op) {
4816               case Iop_QShl8x16: size = 0; break;
4817               case Iop_QShl16x8: size = 1; break;
4818               case Iop_QShl32x4: size = 2; break;
4819               case Iop_QShl64x2: size = 3; break;
4820               default: vassert(0);
4821            }
4822            addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4823                                          res, argL, argR, size, True));
4824            return res;
4825         }
4826         case Iop_QSal8x16:
4827         case Iop_QSal16x8:
4828         case Iop_QSal32x4:
4829         case Iop_QSal64x2: {
4830            HReg res = newVRegV(env);
4831            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4832            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4833            UInt size;
4834            switch (e->Iex.Binop.op) {
4835               case Iop_QSal8x16: size = 0; break;
4836               case Iop_QSal16x8: size = 1; break;
4837               case Iop_QSal32x4: size = 2; break;
4838               case Iop_QSal64x2: size = 3; break;
4839               default: vassert(0);
4840            }
4841            addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4842                                          res, argL, argR, size, True));
4843            return res;
4844         }
4845         case Iop_QShlN8x16:
4846         case Iop_QShlN16x8:
4847         case Iop_QShlN32x4:
4848         case Iop_QShlN64x2: {
4849            HReg res = newVRegV(env);
4850            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4851            UInt size, imm;
4852            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4853                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4854               vpanic("ARM taget supports Iop_QShlNAxB with constant "
4855                      "second argument only\n");
4856            }
4857            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4858            switch (e->Iex.Binop.op) {
4859               case Iop_QShlN8x16: size = 8 | imm; break;
4860               case Iop_QShlN16x8: size = 16 | imm; break;
4861               case Iop_QShlN32x4: size = 32 | imm; break;
4862               case Iop_QShlN64x2: size = 64 | imm; break;
4863               default: vassert(0);
4864            }
4865            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4866                                          res, argL, size, True));
4867            return res;
4868         }
4869         case Iop_QShlN8Sx16:
4870         case Iop_QShlN16Sx8:
4871         case Iop_QShlN32Sx4:
4872         case Iop_QShlN64Sx2: {
4873            HReg res = newVRegV(env);
4874            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4875            UInt size, imm;
4876            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4877                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4878               vpanic("ARM taget supports Iop_QShlNASxB with constant "
4879                      "second argument only\n");
4880            }
4881            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4882            switch (e->Iex.Binop.op) {
4883               case Iop_QShlN8Sx16: size = 8 | imm; break;
4884               case Iop_QShlN16Sx8: size = 16 | imm; break;
4885               case Iop_QShlN32Sx4: size = 32 | imm; break;
4886               case Iop_QShlN64Sx2: size = 64 | imm; break;
4887               default: vassert(0);
4888            }
4889            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4890                                          res, argL, size, True));
4891            return res;
4892         }
4893         case Iop_QSalN8x16:
4894         case Iop_QSalN16x8:
4895         case Iop_QSalN32x4:
4896         case Iop_QSalN64x2: {
4897            HReg res = newVRegV(env);
4898            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4899            UInt size, imm;
4900            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4901                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4902               vpanic("ARM taget supports Iop_QShlNAxB with constant "
4903                      "second argument only\n");
4904            }
4905            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4906            switch (e->Iex.Binop.op) {
4907               case Iop_QSalN8x16: size = 8 | imm; break;
4908               case Iop_QSalN16x8: size = 16 | imm; break;
4909               case Iop_QSalN32x4: size = 32 | imm; break;
4910               case Iop_QSalN64x2: size = 64 | imm; break;
4911               default: vassert(0);
4912            }
4913            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4914                                          res, argL, size, True));
4915            return res;
4916         }
4917         case Iop_ShrN8x16:
4918         case Iop_ShrN16x8:
4919         case Iop_ShrN32x4:
4920         case Iop_ShrN64x2: {
4921            HReg res = newVRegV(env);
4922            HReg tmp = newVRegV(env);
4923            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4924            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4925            HReg argR2 = newVRegI(env);
4926            UInt size;
4927            switch (e->Iex.Binop.op) {
4928               case Iop_ShrN8x16: size = 0; break;
4929               case Iop_ShrN16x8: size = 1; break;
4930               case Iop_ShrN32x4: size = 2; break;
4931               case Iop_ShrN64x2: size = 3; break;
4932               default: vassert(0);
4933            }
4934            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4935            addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4936                                          tmp, argR2, 0, True));
4937            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4938                                          res, argL, tmp, size, True));
4939            return res;
4940         }
4941         case Iop_ShlN8x16:
4942         case Iop_ShlN16x8:
4943         case Iop_ShlN32x4:
4944         case Iop_ShlN64x2: {
4945            HReg res = newVRegV(env);
4946            HReg tmp = newVRegV(env);
4947            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4948            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4949            UInt size;
4950            switch (e->Iex.Binop.op) {
4951               case Iop_ShlN8x16: size = 0; break;
4952               case Iop_ShlN16x8: size = 1; break;
4953               case Iop_ShlN32x4: size = 2; break;
4954               case Iop_ShlN64x2: size = 3; break;
4955               default: vassert(0);
4956            }
4957            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4958            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4959                                          res, argL, tmp, size, True));
4960            return res;
4961         }
4962         case Iop_SarN8x16:
4963         case Iop_SarN16x8:
4964         case Iop_SarN32x4:
4965         case Iop_SarN64x2: {
4966            HReg res = newVRegV(env);
4967            HReg tmp = newVRegV(env);
4968            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4969            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4970            HReg argR2 = newVRegI(env);
4971            UInt size;
4972            switch (e->Iex.Binop.op) {
4973               case Iop_SarN8x16: size = 0; break;
4974               case Iop_SarN16x8: size = 1; break;
4975               case Iop_SarN32x4: size = 2; break;
4976               case Iop_SarN64x2: size = 3; break;
4977               default: vassert(0);
4978            }
4979            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4980            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4981            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4982                                          res, argL, tmp, size, True));
4983            return res;
4984         }
4985         case Iop_CmpGT8Ux16:
4986         case Iop_CmpGT16Ux8:
4987         case Iop_CmpGT32Ux4: {
4988            HReg res = newVRegV(env);
4989            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4990            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4991            UInt size;
4992            switch (e->Iex.Binop.op) {
4993               case Iop_CmpGT8Ux16: size = 0; break;
4994               case Iop_CmpGT16Ux8: size = 1; break;
4995               case Iop_CmpGT32Ux4: size = 2; break;
4996               default: vassert(0);
4997            }
4998            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
4999                                           res, argL, argR, size, True));
5000            return res;
5001         }
5002         case Iop_CmpGT8Sx16:
5003         case Iop_CmpGT16Sx8:
5004         case Iop_CmpGT32Sx4: {
5005            HReg res = newVRegV(env);
5006            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5007            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5008            UInt size;
5009            switch (e->Iex.Binop.op) {
5010               case Iop_CmpGT8Sx16: size = 0; break;
5011               case Iop_CmpGT16Sx8: size = 1; break;
5012               case Iop_CmpGT32Sx4: size = 2; break;
5013               default: vassert(0);
5014            }
5015            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5016                                           res, argL, argR, size, True));
5017            return res;
5018         }
5019         case Iop_CmpEQ8x16:
5020         case Iop_CmpEQ16x8:
5021         case Iop_CmpEQ32x4: {
5022            HReg res = newVRegV(env);
5023            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5024            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5025            UInt size;
5026            switch (e->Iex.Binop.op) {
5027               case Iop_CmpEQ8x16: size = 0; break;
5028               case Iop_CmpEQ16x8: size = 1; break;
5029               case Iop_CmpEQ32x4: size = 2; break;
5030               default: vassert(0);
5031            }
5032            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5033                                           res, argL, argR, size, True));
5034            return res;
5035         }
5036         case Iop_Mul8x16:
5037         case Iop_Mul16x8:
5038         case Iop_Mul32x4: {
5039            HReg res = newVRegV(env);
5040            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5041            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5042            UInt size = 0;
5043            switch(e->Iex.Binop.op) {
5044               case Iop_Mul8x16: size = 0; break;
5045               case Iop_Mul16x8: size = 1; break;
5046               case Iop_Mul32x4: size = 2; break;
5047               default: vassert(0);
5048            }
5049            addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5050                                           res, argL, argR, size, True));
5051            return res;
5052         }
5053         case Iop_Mull8Ux8:
5054         case Iop_Mull16Ux4:
5055         case Iop_Mull32Ux2: {
5056            HReg res = newVRegV(env);
5057            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5058            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5059            UInt size = 0;
5060            switch(e->Iex.Binop.op) {
5061               case Iop_Mull8Ux8: size = 0; break;
5062               case Iop_Mull16Ux4: size = 1; break;
5063               case Iop_Mull32Ux2: size = 2; break;
5064               default: vassert(0);
5065            }
5066            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5067                                           res, argL, argR, size, True));
5068            return res;
5069         }
5070
5071         case Iop_Mull8Sx8:
5072         case Iop_Mull16Sx4:
5073         case Iop_Mull32Sx2: {
5074            HReg res = newVRegV(env);
5075            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5076            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5077            UInt size = 0;
5078            switch(e->Iex.Binop.op) {
5079               case Iop_Mull8Sx8: size = 0; break;
5080               case Iop_Mull16Sx4: size = 1; break;
5081               case Iop_Mull32Sx2: size = 2; break;
5082               default: vassert(0);
5083            }
5084            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5085                                           res, argL, argR, size, True));
5086            return res;
5087         }
5088
5089         case Iop_QDMulHi16Sx8:
5090         case Iop_QDMulHi32Sx4: {
5091            HReg res = newVRegV(env);
5092            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5093            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5094            UInt size = 0;
5095            switch(e->Iex.Binop.op) {
5096               case Iop_QDMulHi16Sx8: size = 1; break;
5097               case Iop_QDMulHi32Sx4: size = 2; break;
5098               default: vassert(0);
5099            }
5100            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5101                                           res, argL, argR, size, True));
5102            return res;
5103         }
5104
5105         case Iop_QRDMulHi16Sx8:
5106         case Iop_QRDMulHi32Sx4: {
5107            HReg res = newVRegV(env);
5108            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5109            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5110            UInt size = 0;
5111            switch(e->Iex.Binop.op) {
5112               case Iop_QRDMulHi16Sx8: size = 1; break;
5113               case Iop_QRDMulHi32Sx4: size = 2; break;
5114               default: vassert(0);
5115            }
5116            addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5117                                           res, argL, argR, size, True));
5118            return res;
5119         }
5120
5121         case Iop_QDMulLong16Sx4:
5122         case Iop_QDMulLong32Sx2: {
5123            HReg res = newVRegV(env);
5124            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5125            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5126            UInt size = 0;
5127            switch(e->Iex.Binop.op) {
5128               case Iop_QDMulLong16Sx4: size = 1; break;
5129               case Iop_QDMulLong32Sx2: size = 2; break;
5130               default: vassert(0);
5131            }
5132            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5133                                           res, argL, argR, size, True));
5134            return res;
5135         }
5136         case Iop_PolynomialMul8x16: {
5137            HReg res = newVRegV(env);
5138            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5139            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5140            UInt size = 0;
5141            addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5142                                           res, argL, argR, size, True));
5143            return res;
5144         }
5145         case Iop_Max32Fx4: {
5146            HReg res = newVRegV(env);
5147            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5148            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5149            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5150                                           res, argL, argR, 2, True));
5151            return res;
5152         }
5153         case Iop_Min32Fx4: {
5154            HReg res = newVRegV(env);
5155            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5156            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5157            addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5158                                           res, argL, argR, 2, True));
5159            return res;
5160         }
5161         case Iop_PwMax32Fx4: {
5162            HReg res = newVRegV(env);
5163            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5164            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5165            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5166                                           res, argL, argR, 2, True));
5167            return res;
5168         }
5169         case Iop_PwMin32Fx4: {
5170            HReg res = newVRegV(env);
5171            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5172            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5173            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5174                                           res, argL, argR, 2, True));
5175            return res;
5176         }
5177         case Iop_CmpGT32Fx4: {
5178            HReg res = newVRegV(env);
5179            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5180            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5181            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5182                                           res, argL, argR, 2, True));
5183            return res;
5184         }
5185         case Iop_CmpGE32Fx4: {
5186            HReg res = newVRegV(env);
5187            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5188            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5189            addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5190                                           res, argL, argR, 2, True));
5191            return res;
5192         }
5193         case Iop_CmpEQ32Fx4: {
5194            HReg res = newVRegV(env);
5195            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5196            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5197            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5198                                           res, argL, argR, 2, True));
5199            return res;
5200         }
5201
5202         case Iop_PolynomialMull8x8: {
5203            HReg res = newVRegV(env);
5204            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5205            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5206            UInt size = 0;
5207            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5208                                           res, argL, argR, size, True));
5209            return res;
5210         }
5211         case Iop_F32ToFixed32Ux4_RZ:
5212         case Iop_F32ToFixed32Sx4_RZ:
5213         case Iop_Fixed32UToF32x4_RN:
5214         case Iop_Fixed32SToF32x4_RN: {
5215            HReg res = newVRegV(env);
5216            HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5217            ARMNeonUnOp op;
5218            UInt imm6;
5219            if (e->Iex.Binop.arg2->tag != Iex_Const ||
5220               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5221                  vpanic("ARM supports FP <-> Fixed conversion with constant "
5222                         "second argument less than 33 only\n");
5223            }
5224            imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5225            vassert(imm6 <= 32 && imm6 > 0);
5226            imm6 = 64 - imm6;
5227            switch(e->Iex.Binop.op) {
5228               case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5229               case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5230               case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5231               case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5232               default: vassert(0);
5233            }
5234            addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5235            return res;
5236         }
5237         /*
5238         FIXME remove if not used
5239         case Iop_VDup8x16:
5240         case Iop_VDup16x8:
5241         case Iop_VDup32x4: {
5242            HReg res = newVRegV(env);
5243            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5244            UInt imm4;
5245            UInt index;
5246            if (e->Iex.Binop.arg2->tag != Iex_Const ||
5247               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5248                  vpanic("ARM supports Iop_VDup with constant "
5249                         "second argument less than 16 only\n");
5250            }
5251            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5252            switch(e->Iex.Binop.op) {
5253               case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5254               case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5255               case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5256               default: vassert(0);
5257            }
5258            if (imm4 >= 16) {
5259               vpanic("ARM supports Iop_VDup with constant "
5260                      "second argument less than 16 only\n");
5261            }
5262            addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5263                                          res, argL, imm4, True));
5264            return res;
5265         }
5266         */
5267         case Iop_PwAdd8x16:
5268         case Iop_PwAdd16x8:
5269         case Iop_PwAdd32x4: {
5270            HReg res = newVRegV(env);
5271            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5272            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5273            UInt size = 0;
5274            switch(e->Iex.Binop.op) {
5275               case Iop_PwAdd8x16: size = 0; break;
5276               case Iop_PwAdd16x8: size = 1; break;
5277               case Iop_PwAdd32x4: size = 2; break;
5278               default: vassert(0);
5279            }
5280            addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5281                                           res, argL, argR, size, True));
5282            return res;
5283         }
5284         /* ... */
5285         default:
5286            break;
5287      }
5288   }
5289
5290   if (e->tag == Iex_Triop) {
5291      IRTriop *triop = e->Iex.Triop.details;
5292
5293      switch (triop->op) {
5294         case Iop_ExtractV128: {
5295            HReg res = newVRegV(env);
5296            HReg argL = iselNeonExpr(env, triop->arg1);
5297            HReg argR = iselNeonExpr(env, triop->arg2);
5298            UInt imm4;
5299            if (triop->arg3->tag != Iex_Const ||
5300                typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5301               vpanic("ARM target supports Iop_ExtractV128 with constant "
5302                      "third argument less than 16 only\n");
5303            }
5304            imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5305            if (imm4 >= 16) {
5306               vpanic("ARM target supports Iop_ExtractV128 with constant "
5307                      "third argument less than 16 only\n");
5308            }
5309            addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5310                                           res, argL, argR, imm4, True));
5311            return res;
5312         }
5313         case Iop_Mul32Fx4:
5314         case Iop_Sub32Fx4:
5315         case Iop_Add32Fx4: {
5316            HReg res = newVRegV(env);
5317            HReg argL = iselNeonExpr(env, triop->arg2);
5318            HReg argR = iselNeonExpr(env, triop->arg3);
5319            UInt size = 0;
5320            ARMNeonBinOp op = ARMneon_INVALID;
5321            switch (triop->op) {
5322               case Iop_Mul32Fx4: op = ARMneon_VMULFP; break;
5323               case Iop_Sub32Fx4: op = ARMneon_VSUBFP; break;
5324               case Iop_Add32Fx4: op = ARMneon_VADDFP; break;
5325               default: vassert(0);
5326            }
5327            addInstr(env, ARMInstr_NBinary(op, res, argL, argR, size, True));
5328            return res;
5329         }
5330         default:
5331            break;
5332      }
5333   }
5334
5335   if (e->tag == Iex_ITE) { // VFD
5336      ARMCondCode cc;
5337      HReg r1  = iselNeonExpr(env, e->Iex.ITE.iftrue);
5338      HReg r0  = iselNeonExpr(env, e->Iex.ITE.iffalse);
5339      HReg dst = newVRegV(env);
5340      addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5341      cc = iselCondCode(env, e->Iex.ITE.cond);
5342      addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5343      return dst;
5344   }
5345
5346  neon_expr_bad:
5347   ppIRExpr(e);
5348   vpanic("iselNeonExpr_wrk");
5349}
5350
5351/*---------------------------------------------------------*/
5352/*--- ISEL: Floating point expressions (64 bit)         ---*/
5353/*---------------------------------------------------------*/
5354
5355/* Compute a 64-bit floating point value into a register, the identity
5356   of which is returned.  As with iselIntExpr_R, the reg may be either
5357   real or virtual; in any case it must not be changed by subsequent
5358   code emitted by the caller.  */
5359
5360static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5361{
5362   HReg r = iselDblExpr_wrk( env, e );
5363#  if 0
5364   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5365#  endif
5366   vassert(hregClass(r) == HRcFlt64);
5367   vassert(hregIsVirtual(r));
5368   return r;
5369}
5370
5371/* DO NOT CALL THIS DIRECTLY */
5372static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5373{
5374   IRType ty = typeOfIRExpr(env->type_env,e);
5375   vassert(e);
5376   vassert(ty == Ity_F64);
5377
5378   if (e->tag == Iex_RdTmp) {
5379      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5380   }
5381
5382   if (e->tag == Iex_Const) {
5383      /* Just handle the zero case. */
5384      IRConst* con = e->Iex.Const.con;
5385      if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5386         HReg z32 = newVRegI(env);
5387         HReg dst = newVRegD(env);
5388         addInstr(env, ARMInstr_Imm32(z32, 0));
5389         addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5390         return dst;
5391      }
5392   }
5393
5394   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5395      ARMAModeV* am;
5396      HReg res = newVRegD(env);
5397      vassert(e->Iex.Load.ty == Ity_F64);
5398      am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5399      addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5400      return res;
5401   }
5402
5403   if (e->tag == Iex_Get) {
5404      // XXX This won't work if offset > 1020 or is not 0 % 4.
5405      // In which case we'll have to generate more longwinded code.
5406      ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5407      HReg       res = newVRegD(env);
5408      addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5409      return res;
5410   }
5411
5412   if (e->tag == Iex_Unop) {
5413      switch (e->Iex.Unop.op) {
5414         case Iop_ReinterpI64asF64: {
5415            if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5416               return iselNeon64Expr(env, e->Iex.Unop.arg);
5417            } else {
5418               HReg srcHi, srcLo;
5419               HReg dst = newVRegD(env);
5420               iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5421               addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5422               return dst;
5423            }
5424         }
5425         case Iop_NegF64: {
5426            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5427            HReg dst = newVRegD(env);
5428            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5429            return dst;
5430         }
5431         case Iop_AbsF64: {
5432            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5433            HReg dst = newVRegD(env);
5434            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5435            return dst;
5436         }
5437         case Iop_F32toF64: {
5438            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5439            HReg dst = newVRegD(env);
5440            addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5441            return dst;
5442         }
5443         case Iop_I32UtoF64:
5444         case Iop_I32StoF64: {
5445            HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
5446            HReg f32   = newVRegF(env);
5447            HReg dst   = newVRegD(env);
5448            Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5449            /* VMOV f32, src */
5450            addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5451            /* FSITOD dst, f32 */
5452            addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5453                                          dst, f32));
5454            return dst;
5455         }
5456         default:
5457            break;
5458      }
5459   }
5460
5461   if (e->tag == Iex_Binop) {
5462      switch (e->Iex.Binop.op) {
5463         case Iop_SqrtF64: {
5464            /* first arg is rounding mode; we ignore it. */
5465            HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5466            HReg dst = newVRegD(env);
5467            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5468            return dst;
5469         }
5470         default:
5471            break;
5472      }
5473   }
5474
5475   if (e->tag == Iex_Triop) {
5476      IRTriop *triop = e->Iex.Triop.details;
5477
5478      switch (triop->op) {
5479         case Iop_DivF64:
5480         case Iop_MulF64:
5481         case Iop_AddF64:
5482         case Iop_SubF64: {
5483            ARMVfpOp op = 0; /*INVALID*/
5484            HReg argL = iselDblExpr(env, triop->arg2);
5485            HReg argR = iselDblExpr(env, triop->arg3);
5486            HReg dst  = newVRegD(env);
5487            switch (triop->op) {
5488               case Iop_DivF64: op = ARMvfp_DIV; break;
5489               case Iop_MulF64: op = ARMvfp_MUL; break;
5490               case Iop_AddF64: op = ARMvfp_ADD; break;
5491               case Iop_SubF64: op = ARMvfp_SUB; break;
5492               default: vassert(0);
5493            }
5494            addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5495            return dst;
5496         }
5497         default:
5498            break;
5499      }
5500   }
5501
5502   if (e->tag == Iex_ITE) { // VFD
5503      if (ty == Ity_F64
5504          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5505         HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
5506         HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
5507         HReg dst = newVRegD(env);
5508         addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
5509         ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
5510         addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
5511         return dst;
5512      }
5513   }
5514
5515   ppIRExpr(e);
5516   vpanic("iselDblExpr_wrk");
5517}
5518
5519
5520/*---------------------------------------------------------*/
5521/*--- ISEL: Floating point expressions (32 bit)         ---*/
5522/*---------------------------------------------------------*/
5523
5524/* Compute a 32-bit floating point value into a register, the identity
5525   of which is returned.  As with iselIntExpr_R, the reg may be either
5526   real or virtual; in any case it must not be changed by subsequent
5527   code emitted by the caller.  */
5528
5529static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5530{
5531   HReg r = iselFltExpr_wrk( env, e );
5532#  if 0
5533   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5534#  endif
5535   vassert(hregClass(r) == HRcFlt32);
5536   vassert(hregIsVirtual(r));
5537   return r;
5538}
5539
5540/* DO NOT CALL THIS DIRECTLY */
5541static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5542{
5543   IRType ty = typeOfIRExpr(env->type_env,e);
5544   vassert(e);
5545   vassert(ty == Ity_F32);
5546
5547   if (e->tag == Iex_RdTmp) {
5548      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5549   }
5550
5551   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5552      ARMAModeV* am;
5553      HReg res = newVRegF(env);
5554      vassert(e->Iex.Load.ty == Ity_F32);
5555      am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5556      addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5557      return res;
5558   }
5559
5560   if (e->tag == Iex_Get) {
5561      // XXX This won't work if offset > 1020 or is not 0 % 4.
5562      // In which case we'll have to generate more longwinded code.
5563      ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5564      HReg       res = newVRegF(env);
5565      addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5566      return res;
5567   }
5568
5569   if (e->tag == Iex_Unop) {
5570      switch (e->Iex.Unop.op) {
5571         case Iop_ReinterpI32asF32: {
5572            HReg dst = newVRegF(env);
5573            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5574            addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5575            return dst;
5576         }
5577         case Iop_NegF32: {
5578            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5579            HReg dst = newVRegF(env);
5580            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5581            return dst;
5582         }
5583         case Iop_AbsF32: {
5584            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5585            HReg dst = newVRegF(env);
5586            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5587            return dst;
5588         }
5589         default:
5590            break;
5591      }
5592   }
5593
5594   if (e->tag == Iex_Binop) {
5595      switch (e->Iex.Binop.op) {
5596         case Iop_SqrtF32: {
5597            /* first arg is rounding mode; we ignore it. */
5598            HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5599            HReg dst = newVRegF(env);
5600            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5601            return dst;
5602         }
5603         case Iop_F64toF32: {
5604            HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5605            set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5606            HReg valS = newVRegF(env);
5607            /* FCVTSD valS, valD */
5608            addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5609            set_VFP_rounding_default(env);
5610            return valS;
5611         }
5612         default:
5613            break;
5614      }
5615   }
5616
5617   if (e->tag == Iex_Triop) {
5618      IRTriop *triop = e->Iex.Triop.details;
5619
5620      switch (triop->op) {
5621         case Iop_DivF32:
5622         case Iop_MulF32:
5623         case Iop_AddF32:
5624         case Iop_SubF32: {
5625            ARMVfpOp op = 0; /*INVALID*/
5626            HReg argL = iselFltExpr(env, triop->arg2);
5627            HReg argR = iselFltExpr(env, triop->arg3);
5628            HReg dst  = newVRegF(env);
5629            switch (triop->op) {
5630               case Iop_DivF32: op = ARMvfp_DIV; break;
5631               case Iop_MulF32: op = ARMvfp_MUL; break;
5632               case Iop_AddF32: op = ARMvfp_ADD; break;
5633               case Iop_SubF32: op = ARMvfp_SUB; break;
5634               default: vassert(0);
5635            }
5636            addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5637            return dst;
5638         }
5639         default:
5640            break;
5641      }
5642   }
5643
5644   if (e->tag == Iex_ITE) { // VFD
5645      if (ty == Ity_F32
5646          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5647         ARMCondCode cc;
5648         HReg r1  = iselFltExpr(env, e->Iex.ITE.iftrue);
5649         HReg r0  = iselFltExpr(env, e->Iex.ITE.iffalse);
5650         HReg dst = newVRegF(env);
5651         addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
5652         cc = iselCondCode(env, e->Iex.ITE.cond);
5653         addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
5654         return dst;
5655      }
5656   }
5657
5658   ppIRExpr(e);
5659   vpanic("iselFltExpr_wrk");
5660}
5661
5662
5663/*---------------------------------------------------------*/
5664/*--- ISEL: Statements                                  ---*/
5665/*---------------------------------------------------------*/
5666
5667static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5668{
5669   if (vex_traceflags & VEX_TRACE_VCODE) {
5670      vex_printf("\n-- ");
5671      ppIRStmt(stmt);
5672      vex_printf("\n");
5673   }
5674   switch (stmt->tag) {
5675
5676   /* --------- STORE --------- */
5677   /* little-endian write to memory */
5678   case Ist_Store: {
5679      IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5680      IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5681      IREndness end  = stmt->Ist.Store.end;
5682
5683      if (tya != Ity_I32 || end != Iend_LE)
5684         goto stmt_fail;
5685
5686      if (tyd == Ity_I32) {
5687         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5688         ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5689         addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5690         return;
5691      }
5692      if (tyd == Ity_I16) {
5693         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5694         ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5695         addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
5696                                       False/*!isLoad*/,
5697                                       False/*!isSignedLoad*/, rD, am));
5698         return;
5699      }
5700      if (tyd == Ity_I8) {
5701         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5702         ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5703         addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
5704         return;
5705      }
5706      if (tyd == Ity_I64) {
5707         if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5708            HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5709            ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5710            addInstr(env, ARMInstr_NLdStD(False, dD, am));
5711         } else {
5712            HReg rDhi, rDlo, rA;
5713            iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5714            rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5715            addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
5716                                          ARMAMode1_RI(rA,4)));
5717            addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
5718                                          ARMAMode1_RI(rA,0)));
5719         }
5720         return;
5721      }
5722      if (tyd == Ity_F64) {
5723         HReg       dD = iselDblExpr(env, stmt->Ist.Store.data);
5724         ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5725         addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5726         return;
5727      }
5728      if (tyd == Ity_F32) {
5729         HReg       fD = iselFltExpr(env, stmt->Ist.Store.data);
5730         ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5731         addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5732         return;
5733      }
5734      if (tyd == Ity_V128) {
5735         HReg       qD = iselNeonExpr(env, stmt->Ist.Store.data);
5736         ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5737         addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5738         return;
5739      }
5740
5741      break;
5742   }
5743
5744   /* --------- CONDITIONAL STORE --------- */
5745   /* conditional little-endian write to memory */
5746   case Ist_StoreG: {
5747      IRStoreG* sg   = stmt->Ist.StoreG.details;
5748      IRType    tya  = typeOfIRExpr(env->type_env, sg->addr);
5749      IRType    tyd  = typeOfIRExpr(env->type_env, sg->data);
5750      IREndness end  = sg->end;
5751
5752      if (tya != Ity_I32 || end != Iend_LE)
5753         goto stmt_fail;
5754
5755      switch (tyd) {
5756         case Ity_I8:
5757         case Ity_I32: {
5758            HReg        rD = iselIntExpr_R(env, sg->data);
5759            ARMAMode1*  am = iselIntExpr_AMode1(env, sg->addr);
5760            ARMCondCode cc = iselCondCode(env, sg->guard);
5761            addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
5762                             (cc, False/*!isLoad*/, rD, am));
5763            return;
5764         }
5765         case Ity_I16: {
5766            HReg        rD = iselIntExpr_R(env, sg->data);
5767            ARMAMode2*  am = iselIntExpr_AMode2(env, sg->addr);
5768            ARMCondCode cc = iselCondCode(env, sg->guard);
5769            addInstr(env, ARMInstr_LdSt16(cc,
5770                                          False/*!isLoad*/,
5771                                          False/*!isSignedLoad*/, rD, am));
5772            return;
5773         }
5774         default:
5775            break;
5776      }
5777      break;
5778   }
5779
5780   /* --------- CONDITIONAL LOAD --------- */
5781   /* conditional little-endian load from memory */
5782   case Ist_LoadG: {
5783      IRLoadG*  lg   = stmt->Ist.LoadG.details;
5784      IRType    tya  = typeOfIRExpr(env->type_env, lg->addr);
5785      IREndness end  = lg->end;
5786
5787      if (tya != Ity_I32 || end != Iend_LE)
5788         goto stmt_fail;
5789
5790      switch (lg->cvt) {
5791         case ILGop_8Uto32:
5792         case ILGop_Ident32: {
5793            HReg        rAlt = iselIntExpr_R(env, lg->alt);
5794            ARMAMode1*  am   = iselIntExpr_AMode1(env, lg->addr);
5795            HReg        rD   = lookupIRTemp(env, lg->dst);
5796            addInstr(env, mk_iMOVds_RR(rD, rAlt));
5797            ARMCondCode cc   = iselCondCode(env, lg->guard);
5798            addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
5799                                                    : ARMInstr_LdSt8U)
5800                             (cc, True/*isLoad*/, rD, am));
5801            return;
5802         }
5803         case ILGop_16Sto32:
5804         case ILGop_16Uto32:
5805         case ILGop_8Sto32: {
5806            HReg        rAlt = iselIntExpr_R(env, lg->alt);
5807            ARMAMode2*  am   = iselIntExpr_AMode2(env, lg->addr);
5808            HReg        rD   = lookupIRTemp(env, lg->dst);
5809            addInstr(env, mk_iMOVds_RR(rD, rAlt));
5810            ARMCondCode cc   = iselCondCode(env, lg->guard);
5811            if (lg->cvt == ILGop_8Sto32) {
5812               addInstr(env, ARMInstr_Ld8S(cc, rD, am));
5813            } else {
5814               vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
5815               Bool sx = lg->cvt == ILGop_16Sto32;
5816               addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
5817            }
5818            return;
5819         }
5820         default:
5821            break;
5822      }
5823      break;
5824   }
5825
5826   /* --------- PUT --------- */
5827   /* write guest state, fixed offset */
5828   case Ist_Put: {
5829       IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
5830
5831       if (tyd == Ity_I32) {
5832           HReg       rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5833           ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5834           addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5835           return;
5836       }
5837       if (tyd == Ity_I64) {
5838          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5839             HReg addr = newVRegI(env);
5840             HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5841             addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5842                                                stmt->Ist.Put.offset));
5843             addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5844          } else {
5845             HReg rDhi, rDlo;
5846             ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5847                                           stmt->Ist.Put.offset + 0);
5848             ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5849                                           stmt->Ist.Put.offset + 4);
5850             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5851             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
5852                                           rDhi, am4));
5853             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
5854                                           rDlo, am0));
5855          }
5856          return;
5857       }
5858       if (tyd == Ity_F64) {
5859          // XXX This won't work if offset > 1020 or is not 0 % 4.
5860          // In which case we'll have to generate more longwinded code.
5861          ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5862          HReg       rD = iselDblExpr(env, stmt->Ist.Put.data);
5863          addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5864          return;
5865       }
5866       if (tyd == Ity_F32) {
5867          // XXX This won't work if offset > 1020 or is not 0 % 4.
5868          // In which case we'll have to generate more longwinded code.
5869          ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5870          HReg       rD = iselFltExpr(env, stmt->Ist.Put.data);
5871          addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5872          return;
5873       }
5874       if (tyd == Ity_V128) {
5875          HReg addr = newVRegI(env);
5876          HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5877          addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5878                                       stmt->Ist.Put.offset));
5879          addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5880          return;
5881       }
5882       break;
5883   }
5884
5885   /* --------- TMP --------- */
5886   /* assign value to temporary */
5887   case Ist_WrTmp: {
5888      IRTemp tmp = stmt->Ist.WrTmp.tmp;
5889      IRType ty = typeOfIRTemp(env->type_env, tmp);
5890
5891      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
5892         ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5893                                          env, stmt->Ist.WrTmp.data);
5894         HReg     dst  = lookupIRTemp(env, tmp);
5895         addInstr(env, ARMInstr_Mov(dst,ri84));
5896         return;
5897      }
5898      if (ty == Ity_I1) {
5899         /* Here, we are generating a I1 value into a 32 bit register.
5900            Make sure the value in the register is only zero or one,
5901            but no other.  This allows optimisation of the
5902            1Uto32(tmp:I1) case, by making it simply a copy of the
5903            register holding 'tmp'.  The point being that the value in
5904            the register holding 'tmp' can only have been created
5905            here. */
5906         HReg        dst  = lookupIRTemp(env, tmp);
5907         ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5908         addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5909         addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5910         return;
5911      }
5912      if (ty == Ity_I64) {
5913         if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5914            HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5915            HReg dst = lookupIRTemp(env, tmp);
5916            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5917         } else {
5918            HReg rHi, rLo, dstHi, dstLo;
5919            iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5920            lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5921            addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5922            addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5923         }
5924         return;
5925      }
5926      if (ty == Ity_F64) {
5927         HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5928         HReg dst = lookupIRTemp(env, tmp);
5929         addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5930         return;
5931      }
5932      if (ty == Ity_F32) {
5933         HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5934         HReg dst = lookupIRTemp(env, tmp);
5935         addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5936         return;
5937      }
5938      if (ty == Ity_V128) {
5939         HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5940         HReg dst = lookupIRTemp(env, tmp);
5941         addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5942         return;
5943      }
5944      break;
5945   }
5946
5947   /* --------- Call to DIRTY helper --------- */
5948   /* call complex ("dirty") helper function */
5949   case Ist_Dirty: {
5950      IRDirty* d = stmt->Ist.Dirty.details;
5951
5952      /* Figure out the return type, if any. */
5953      IRType retty = Ity_INVALID;
5954      if (d->tmp != IRTemp_INVALID)
5955         retty = typeOfIRTemp(env->type_env, d->tmp);
5956
5957      Bool retty_ok = False;
5958      switch (retty) {
5959         case Ity_INVALID: /* function doesn't return anything */
5960         case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
5961         //case Ity_V128: //ATC
5962            retty_ok = True; break;
5963         default:
5964            break;
5965      }
5966      if (!retty_ok)
5967         break; /* will go to stmt_fail: */
5968
5969      /* Marshal args, do the call, and set the return value to 0x555..555
5970         if this is a conditional call that returns a value and the
5971         call is skipped. */
5972      UInt   addToSp = 0;
5973      RetLoc rloc    = mk_RetLoc_INVALID();
5974      doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
5975      vassert(is_sane_RetLoc(rloc));
5976
5977      /* Now figure out what to do with the returned value, if any. */
5978      switch (retty) {
5979         case Ity_INVALID: {
5980            /* No return value.  Nothing to do. */
5981            vassert(d->tmp == IRTemp_INVALID);
5982            vassert(rloc.pri == RLPri_None);
5983            vassert(addToSp == 0);
5984            return;
5985         }
5986         case Ity_I64: {
5987            vassert(rloc.pri == RLPri_2Int);
5988            vassert(addToSp == 0);
5989            if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5990               HReg tmp = lookupIRTemp(env, d->tmp);
5991               addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
5992                                                        hregARM_R0()));
5993            } else {
5994               HReg dstHi, dstLo;
5995               /* The returned value is in r1:r0.  Park it in the
5996                  register-pair associated with tmp. */
5997               lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
5998               addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
5999               addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
6000            }
6001            return;
6002         }
6003         case Ity_I32: case Ity_I16: case Ity_I8: {
6004            vassert(rloc.pri == RLPri_Int);
6005            vassert(addToSp == 0);
6006            /* The returned value is in r0.  Park it in the register
6007               associated with tmp. */
6008            HReg dst = lookupIRTemp(env, d->tmp);
6009            addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
6010            return;
6011         }
6012         case Ity_V128: {
6013            vassert(0); // ATC.  The code that this produces really
6014            // needs to be looked at, to verify correctness.
6015            // I don't think this can ever happen though, since the
6016            // ARM front end never produces 128-bit loads/stores.
6017            // Hence the following is mostly theoretical.
6018            /* The returned value is on the stack, and *retloc tells
6019               us where.  Fish it off the stack and then move the
6020               stack pointer upwards to clear it, as directed by
6021               doHelperCall. */
6022            vassert(rloc.pri == RLPri_V128SpRel);
6023            vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it
6024            vassert(addToSp >= 16);
6025            vassert(addToSp < 256); // ditto reason as for rloc.spOff
6026            HReg dst = lookupIRTemp(env, d->tmp);
6027            HReg tmp = newVRegI(env);
6028            HReg r13 = hregARM_R13(); // sp
6029            addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6030                                       tmp, r13, ARMRI84_I84(rloc.spOff,0)));
6031            ARMAModeN* am = mkARMAModeN_R(tmp);
6032            addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am));
6033            addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6034                                       r13, r13, ARMRI84_I84(addToSp,0)));
6035            return;
6036         }
6037         default:
6038            /*NOTREACHED*/
6039            vassert(0);
6040      }
6041      break;
6042   }
6043
6044   /* --------- Load Linked and Store Conditional --------- */
6045   case Ist_LLSC: {
6046      if (stmt->Ist.LLSC.storedata == NULL) {
6047         /* LL */
6048         IRTemp res = stmt->Ist.LLSC.result;
6049         IRType ty  = typeOfIRTemp(env->type_env, res);
6050         if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6051            Int  szB   = 0;
6052            HReg r_dst = lookupIRTemp(env, res);
6053            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6054            switch (ty) {
6055               case Ity_I8:  szB = 1; break;
6056               case Ity_I16: szB = 2; break;
6057               case Ity_I32: szB = 4; break;
6058               default:      vassert(0);
6059            }
6060            addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6061            addInstr(env, ARMInstr_LdrEX(szB));
6062            addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
6063            return;
6064         }
6065         if (ty == Ity_I64) {
6066            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6067            addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6068            addInstr(env, ARMInstr_LdrEX(8));
6069            /* Result is in r3:r2.  On a non-NEON capable CPU, we must
6070               move it into a result register pair.  On a NEON capable
6071               CPU, the result register will be a 64 bit NEON
6072               register, so we must move it there instead. */
6073            if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6074               HReg dst = lookupIRTemp(env, res);
6075               addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
6076                                                        hregARM_R2()));
6077            } else {
6078               HReg r_dst_hi, r_dst_lo;
6079               lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
6080               addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
6081               addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
6082            }
6083            return;
6084         }
6085         /*NOTREACHED*/
6086         vassert(0);
6087      } else {
6088         /* SC */
6089         IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6090         if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
6091            Int  szB = 0;
6092            HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6093            HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6094            switch (tyd) {
6095               case Ity_I8:  szB = 1; break;
6096               case Ity_I16: szB = 2; break;
6097               case Ity_I32: szB = 4; break;
6098               default:      vassert(0);
6099            }
6100            addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
6101            addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6102            addInstr(env, ARMInstr_StrEX(szB));
6103         } else {
6104            vassert(tyd == Ity_I64);
6105            /* This is really ugly.  There is no is/is-not NEON
6106               decision akin to the case for LL, because iselInt64Expr
6107               fudges this for us, and always gets the result into two
6108               GPRs even if this means moving it from a NEON
6109               register. */
6110            HReg rDhi, rDlo;
6111            iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
6112            HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6113            addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
6114            addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
6115            addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6116            addInstr(env, ARMInstr_StrEX(8));
6117         }
6118         /* now r0 is 1 if failed, 0 if success.  Change to IR
6119            conventions (0 is fail, 1 is success).  Also transfer
6120            result to r_res. */
6121         IRTemp   res   = stmt->Ist.LLSC.result;
6122         IRType   ty    = typeOfIRTemp(env->type_env, res);
6123         HReg     r_res = lookupIRTemp(env, res);
6124         ARMRI84* one   = ARMRI84_I84(1,0);
6125         vassert(ty == Ity_I1);
6126         addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
6127         /* And be conservative -- mask off all but the lowest bit */
6128         addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
6129         return;
6130      }
6131      break;
6132   }
6133
6134   /* --------- MEM FENCE --------- */
6135   case Ist_MBE:
6136      switch (stmt->Ist.MBE.event) {
6137         case Imbe_Fence:
6138            addInstr(env, ARMInstr_MFence());
6139            return;
6140         case Imbe_CancelReservation:
6141            addInstr(env, ARMInstr_CLREX());
6142            return;
6143         default:
6144            break;
6145      }
6146      break;
6147
6148   /* --------- INSTR MARK --------- */
6149   /* Doesn't generate any executable code ... */
6150   case Ist_IMark:
6151       return;
6152
6153   /* --------- NO-OP --------- */
6154   case Ist_NoOp:
6155       return;
6156
6157   /* --------- EXIT --------- */
6158   case Ist_Exit: {
6159      if (stmt->Ist.Exit.dst->tag != Ico_U32)
6160         vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
6161
6162      ARMCondCode cc     = iselCondCode(env, stmt->Ist.Exit.guard);
6163      ARMAMode1*  amR15T = ARMAMode1_RI(hregARM_R8(),
6164                                        stmt->Ist.Exit.offsIP);
6165
6166      /* Case: boring transfer to known address */
6167      if (stmt->Ist.Exit.jk == Ijk_Boring
6168          || stmt->Ist.Exit.jk == Ijk_Call
6169          || stmt->Ist.Exit.jk == Ijk_Ret) {
6170         if (env->chainingAllowed) {
6171            /* .. almost always true .. */
6172            /* Skip the event check at the dst if this is a forwards
6173               edge. */
6174            Bool toFastEP
6175               = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
6176            if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6177            addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6178                                           amR15T, cc, toFastEP));
6179         } else {
6180            /* .. very occasionally .. */
6181            /* We can't use chaining, so ask for an assisted transfer,
6182               as that's the only alternative that is allowable. */
6183            HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6184            addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6185         }
6186         return;
6187      }
6188
6189      /* Case: assisted transfer to arbitrary address */
6190      switch (stmt->Ist.Exit.jk) {
6191         /* Keep this list in sync with that in iselNext below */
6192         case Ijk_ClientReq:
6193         case Ijk_NoDecode:
6194         case Ijk_NoRedir:
6195         case Ijk_Sys_syscall:
6196         case Ijk_InvalICache:
6197         case Ijk_Yield:
6198         {
6199            HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6200            addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6201                                             stmt->Ist.Exit.jk));
6202            return;
6203         }
6204         default:
6205            break;
6206      }
6207
6208      /* Do we ever expect to see any other kind? */
6209      goto stmt_fail;
6210   }
6211
6212   default: break;
6213   }
6214  stmt_fail:
6215   ppIRStmt(stmt);
6216   vpanic("iselStmt");
6217}
6218
6219
6220/*---------------------------------------------------------*/
6221/*--- ISEL: Basic block terminators (Nexts)             ---*/
6222/*---------------------------------------------------------*/
6223
6224static void iselNext ( ISelEnv* env,
6225                       IRExpr* next, IRJumpKind jk, Int offsIP )
6226{
6227   if (vex_traceflags & VEX_TRACE_VCODE) {
6228      vex_printf( "\n-- PUT(%d) = ", offsIP);
6229      ppIRExpr( next );
6230      vex_printf( "; exit-");
6231      ppIRJumpKind(jk);
6232      vex_printf( "\n");
6233   }
6234
6235   /* Case: boring transfer to known address */
6236   if (next->tag == Iex_Const) {
6237      IRConst* cdst = next->Iex.Const.con;
6238      vassert(cdst->tag == Ico_U32);
6239      if (jk == Ijk_Boring || jk == Ijk_Call) {
6240         /* Boring transfer to known address */
6241         ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6242         if (env->chainingAllowed) {
6243            /* .. almost always true .. */
6244            /* Skip the event check at the dst if this is a forwards
6245               edge. */
6246            Bool toFastEP
6247               = ((Addr64)cdst->Ico.U32) > env->max_ga;
6248            if (0) vex_printf("%s", toFastEP ? "X" : ".");
6249            addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6250                                           amR15T, ARMcc_AL,
6251                                           toFastEP));
6252         } else {
6253            /* .. very occasionally .. */
6254            /* We can't use chaining, so ask for an assisted transfer,
6255               as that's the only alternative that is allowable. */
6256            HReg r = iselIntExpr_R(env, next);
6257            addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6258                                             Ijk_Boring));
6259         }
6260         return;
6261      }
6262   }
6263
6264   /* Case: call/return (==boring) transfer to any address */
6265   switch (jk) {
6266      case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6267         HReg       r      = iselIntExpr_R(env, next);
6268         ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6269         if (env->chainingAllowed) {
6270            addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6271         } else {
6272            addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6273                                                Ijk_Boring));
6274         }
6275         return;
6276      }
6277      default:
6278         break;
6279   }
6280
6281   /* Case: assisted transfer to arbitrary address */
6282   switch (jk) {
6283      /* Keep this list in sync with that for Ist_Exit above */
6284      case Ijk_ClientReq:
6285      case Ijk_NoDecode:
6286      case Ijk_NoRedir:
6287      case Ijk_Sys_syscall:
6288      case Ijk_InvalICache:
6289      case Ijk_Yield:
6290      {
6291         HReg       r      = iselIntExpr_R(env, next);
6292         ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6293         addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6294         return;
6295      }
6296      default:
6297         break;
6298   }
6299
6300   vex_printf( "\n-- PUT(%d) = ", offsIP);
6301   ppIRExpr( next );
6302   vex_printf( "; exit-");
6303   ppIRJumpKind(jk);
6304   vex_printf( "\n");
6305   vassert(0); // are we expecting any other kind?
6306}
6307
6308
6309/*---------------------------------------------------------*/
6310/*--- Insn selector top-level                           ---*/
6311/*---------------------------------------------------------*/
6312
6313/* Translate an entire SB to arm code. */
6314
6315HInstrArray* iselSB_ARM ( IRSB* bb,
6316                          VexArch      arch_host,
6317                          VexArchInfo* archinfo_host,
6318                          VexAbiInfo*  vbi/*UNUSED*/,
6319                          Int offs_Host_EvC_Counter,
6320                          Int offs_Host_EvC_FailAddr,
6321                          Bool chainingAllowed,
6322                          Bool addProfInc,
6323                          Addr64 max_ga )
6324{
6325   Int       i, j;
6326   HReg      hreg, hregHI;
6327   ISelEnv*  env;
6328   UInt      hwcaps_host = archinfo_host->hwcaps;
6329   ARMAMode1 *amCounter, *amFailAddr;
6330
6331   /* sanity ... */
6332   vassert(arch_host == VexArchARM);
6333
6334   /* guard against unexpected space regressions */
6335   vassert(sizeof(ARMInstr) <= 28);
6336
6337   /* hwcaps should not change from one ISEL call to another. */
6338   arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
6339
6340   /* Make up an initial environment to use. */
6341   env = LibVEX_Alloc(sizeof(ISelEnv));
6342   env->vreg_ctr = 0;
6343
6344   /* Set up output code array. */
6345   env->code = newHInstrArray();
6346
6347   /* Copy BB's type env. */
6348   env->type_env = bb->tyenv;
6349
6350   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
6351      change as we go along. */
6352   env->n_vregmap = bb->tyenv->types_used;
6353   env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6354   env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6355
6356   /* and finally ... */
6357   env->chainingAllowed = chainingAllowed;
6358   env->hwcaps          = hwcaps_host;
6359   env->max_ga          = max_ga;
6360
6361   /* For each IR temporary, allocate a suitably-kinded virtual
6362      register. */
6363   j = 0;
6364   for (i = 0; i < env->n_vregmap; i++) {
6365      hregHI = hreg = INVALID_HREG;
6366      switch (bb->tyenv->types[i]) {
6367         case Ity_I1:
6368         case Ity_I8:
6369         case Ity_I16:
6370         case Ity_I32:  hreg   = mkHReg(j++, HRcInt32, True); break;
6371         case Ity_I64:
6372            if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
6373               hreg = mkHReg(j++, HRcFlt64, True);
6374            } else {
6375               hregHI = mkHReg(j++, HRcInt32, True);
6376               hreg   = mkHReg(j++, HRcInt32, True);
6377            }
6378            break;
6379         case Ity_F32:  hreg   = mkHReg(j++, HRcFlt32, True); break;
6380         case Ity_F64:  hreg   = mkHReg(j++, HRcFlt64, True); break;
6381         case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True); break;
6382         default: ppIRType(bb->tyenv->types[i]);
6383                  vpanic("iselBB: IRTemp type");
6384      }
6385      env->vregmap[i]   = hreg;
6386      env->vregmapHI[i] = hregHI;
6387   }
6388   env->vreg_ctr = j;
6389
6390   /* The very first instruction must be an event check. */
6391   amCounter  = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6392   amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6393   addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6394
6395   /* Possibly a block counter increment (for profiling).  At this
6396      point we don't know the address of the counter, so just pretend
6397      it is zero.  It will have to be patched later, but before this
6398      translation is used, by a call to LibVEX_patchProfCtr. */
6399   if (addProfInc) {
6400      addInstr(env, ARMInstr_ProfInc());
6401   }
6402
6403   /* Ok, finally we can iterate over the statements. */
6404   for (i = 0; i < bb->stmts_used; i++)
6405      iselStmt(env, bb->stmts[i]);
6406
6407   iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6408
6409   /* record the number of vregs we used. */
6410   env->code->n_vregs = env->vreg_ctr;
6411   return env->code;
6412}
6413
6414
6415/*---------------------------------------------------------------*/
6416/*--- end                                     host_arm_isel.c ---*/
6417/*---------------------------------------------------------------*/
6418