1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_arm_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2013 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39#include "ir_match.h"
40
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"  // for 32-bit SIMD helpers
45#include "host_arm_defs.h"
46
47
48/*---------------------------------------------------------*/
49/*--- ARMvfp control word stuff                         ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53   exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54   flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
55   this corresponds to a FPSCR value of zero.
56
57   fpscr should therefore be zero on entry to Vex-generated code, and
58   should be unchanged at exit.  (Or at least the bottom 28 bits
59   should be zero).
60*/
61
62#define DEFAULT_FPSCR 0
63
64
65/*---------------------------------------------------------*/
66/*--- ISelEnv                                           ---*/
67/*---------------------------------------------------------*/
68
69/* This carries around:
70
71   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72     might encounter.  This is computed before insn selection starts,
73     and does not change.
74
75   - A mapping from IRTemp to HReg.  This tells the insn selector
76     which virtual register(s) are associated with each IRTemp
77     temporary.  This is computed before insn selection starts, and
78     does not change.  We expect this mapping to map precisely the
79     same set of IRTemps as the type mapping does.
80
81        - vregmap   holds the primary register for the IRTemp.
82        - vregmapHI is only used for 64-bit integer-typed
83             IRTemps.  It holds the identity of a second
84             32-bit virtual HReg, which holds the high half
85             of the value.
86
87   - The code array, that is, the insns selected so far.
88
89   - A counter, for generating new virtual registers.
90
91   - The host hardware capabilities word.  This is set at the start
92     and does not change.
93
94   - A Bool for indicating whether we may generate chain-me
95     instructions for control flow transfers, or whether we must use
96     XAssisted.
97
98   - The maximum guest address of any guest insn in this block.
99     Actually, the address of the highest-addressed byte from any insn
100     in this block.  Is set at the start and does not change.  This is
101     used for detecting jumps which are definitely forward-edges from
102     this block, and therefore can be made (chained) to the fast entry
103     point of the destination, thereby avoiding the destination's
104     event check.
105
106   Note, this is all (well, mostly) host-independent.
107*/
108
109typedef
110   struct {
111      /* Constant -- are set at the start and do not change. */
112      IRTypeEnv*   type_env;
113
114      HReg*        vregmap;
115      HReg*        vregmapHI;
116      Int          n_vregmap;
117
118      UInt         hwcaps;
119
120      Bool         chainingAllowed;
121      Addr32       max_ga;
122
123      /* These are modified as we go along. */
124      HInstrArray* code;
125      Int          vreg_ctr;
126   }
127   ISelEnv;
128
129static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
130{
131   vassert(tmp >= 0);
132   vassert(tmp < env->n_vregmap);
133   return env->vregmap[tmp];
134}
135
136static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
137{
138   vassert(tmp >= 0);
139   vassert(tmp < env->n_vregmap);
140   vassert(! hregIsInvalid(env->vregmapHI[tmp]));
141   *vrLO = env->vregmap[tmp];
142   *vrHI = env->vregmapHI[tmp];
143}
144
145static void addInstr ( ISelEnv* env, ARMInstr* instr )
146{
147   addHInstr(env->code, instr);
148   if (vex_traceflags & VEX_TRACE_VCODE) {
149      ppARMInstr(instr);
150      vex_printf("\n");
151   }
152}
153
154static HReg newVRegI ( ISelEnv* env )
155{
156   HReg reg = mkHReg(True/*virtual reg*/, HRcInt32, 0/*enc*/, env->vreg_ctr);
157   env->vreg_ctr++;
158   return reg;
159}
160
161static HReg newVRegD ( ISelEnv* env )
162{
163   HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0/*enc*/, env->vreg_ctr);
164   env->vreg_ctr++;
165   return reg;
166}
167
168static HReg newVRegF ( ISelEnv* env )
169{
170   HReg reg = mkHReg(True/*virtual reg*/, HRcFlt32, 0/*enc*/, env->vreg_ctr);
171   env->vreg_ctr++;
172   return reg;
173}
174
175static HReg newVRegV ( ISelEnv* env )
176{
177   HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0/*enc*/, env->vreg_ctr);
178   env->vreg_ctr++;
179   return reg;
180}
181
182/* These are duplicated in guest_arm_toIR.c */
183static IRExpr* unop ( IROp op, IRExpr* a )
184{
185   return IRExpr_Unop(op, a);
186}
187
188static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
189{
190   return IRExpr_Binop(op, a1, a2);
191}
192
193static IRExpr* bind ( Int binder )
194{
195   return IRExpr_Binder(binder);
196}
197
198
199/*---------------------------------------------------------*/
200/*--- ISEL: Forward declarations                        ---*/
201/*---------------------------------------------------------*/
202
203/* These are organised as iselXXX and iselXXX_wrk pairs.  The
204   iselXXX_wrk do the real work, but are not to be called directly.
205   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
206   checks that all returned registers are virtual.  You should not
207   call the _wrk version directly.
208*/
209static ARMAMode1*  iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
210static ARMAMode1*  iselIntExpr_AMode1     ( ISelEnv* env, IRExpr* e );
211
212static ARMAMode2*  iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
213static ARMAMode2*  iselIntExpr_AMode2     ( ISelEnv* env, IRExpr* e );
214
215static ARMAModeV*  iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
216static ARMAModeV*  iselIntExpr_AModeV     ( ISelEnv* env, IRExpr* e );
217
218static ARMAModeN*  iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
219static ARMAModeN*  iselIntExpr_AModeN     ( ISelEnv* env, IRExpr* e );
220
221static ARMRI84*    iselIntExpr_RI84_wrk
222        ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
223static ARMRI84*    iselIntExpr_RI84
224        ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
225
226static ARMRI5*     iselIntExpr_RI5_wrk    ( ISelEnv* env, IRExpr* e );
227static ARMRI5*     iselIntExpr_RI5        ( ISelEnv* env, IRExpr* e );
228
229static ARMCondCode iselCondCode_wrk       ( ISelEnv* env, IRExpr* e );
230static ARMCondCode iselCondCode           ( ISelEnv* env, IRExpr* e );
231
232static HReg        iselIntExpr_R_wrk      ( ISelEnv* env, IRExpr* e );
233static HReg        iselIntExpr_R          ( ISelEnv* env, IRExpr* e );
234
235static void        iselInt64Expr_wrk      ( HReg* rHi, HReg* rLo,
236                                            ISelEnv* env, IRExpr* e );
237static void        iselInt64Expr          ( HReg* rHi, HReg* rLo,
238                                            ISelEnv* env, IRExpr* e );
239
240static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
241static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
242
243static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
244static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
245
246static HReg        iselNeon64Expr_wrk     ( ISelEnv* env, IRExpr* e );
247static HReg        iselNeon64Expr         ( ISelEnv* env, IRExpr* e );
248
249static HReg        iselNeonExpr_wrk       ( ISelEnv* env, IRExpr* e );
250static HReg        iselNeonExpr           ( ISelEnv* env, IRExpr* e );
251
252/*---------------------------------------------------------*/
253/*--- ISEL: Misc helpers                                ---*/
254/*---------------------------------------------------------*/
255
256static UInt ROR32 ( UInt x, UInt sh ) {
257   vassert(sh >= 0 && sh < 32);
258   if (sh == 0)
259      return x;
260   else
261      return (x << (32-sh)) | (x >> sh);
262}
263
264/* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
265   form, and if so return the components. */
266static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
267{
268   UInt i;
269   for (i = 0; i < 16; i++) {
270      if (0 == (u & 0xFFFFFF00)) {
271         *u8 = u;
272         *u4 = i;
273         return True;
274      }
275      u = ROR32(u, 30);
276   }
277   vassert(i == 16);
278   return False;
279}
280
281/* Make a int reg-reg move. */
282static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
283{
284   vassert(hregClass(src) == HRcInt32);
285   vassert(hregClass(dst) == HRcInt32);
286   return ARMInstr_Mov(dst, ARMRI84_R(src));
287}
288
289/* Set the VFP unit's rounding mode to default (round to nearest). */
290static void set_VFP_rounding_default ( ISelEnv* env )
291{
292   /* mov rTmp, #DEFAULT_FPSCR
293      fmxr fpscr, rTmp
294   */
295   HReg rTmp = newVRegI(env);
296   addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
297   addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
298}
299
300/* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
301   expression denoting a value in the range 0 .. 3, indicating a round
302   mode encoded as per type IRRoundingMode.  Set FPSCR to have the
303   same rounding.
304*/
305static
306void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
307{
308   /* This isn't simple, because 'mode' carries an IR rounding
309      encoding, and we need to translate that to an ARMvfp one:
310      The IR encoding:
311         00  to nearest (the default)
312         10  to +infinity
313         01  to -infinity
314         11  to zero
315      The ARMvfp encoding:
316         00  to nearest
317         01  to +infinity
318         10  to -infinity
319         11  to zero
320      Easy enough to do; just swap the two bits.
321   */
322   HReg irrm = iselIntExpr_R(env, mode);
323   HReg tL   = newVRegI(env);
324   HReg tR   = newVRegI(env);
325   HReg t3   = newVRegI(env);
326   /* tL = irrm << 1;
327      tR = irrm >> 1;  if we're lucky, these will issue together
328      tL &= 2;
329      tR &= 1;         ditto
330      t3 = tL | tR;
331      t3 <<= 22;
332      fmxr fpscr, t3
333   */
334   addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
335   addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
336   addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
337   addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
338   addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
339   addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
340   addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
341}
342
343
344/*---------------------------------------------------------*/
345/*--- ISEL: Function call helpers                       ---*/
346/*---------------------------------------------------------*/
347
348/* Used only in doHelperCall.  See big comment in doHelperCall re
349   handling of register-parameter args.  This function figures out
350   whether evaluation of an expression might require use of a fixed
351   register.  If in doubt return True (safe but suboptimal).
352*/
353static
354Bool mightRequireFixedRegs ( IRExpr* e )
355{
356   if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
357      // These are always "safe" -- either a copy of r13(sp) in some
358      // arbitrary vreg, or a copy of r8, respectively.
359      return False;
360   }
361   /* Else it's a "normal" expression. */
362   switch (e->tag) {
363   case Iex_RdTmp: case Iex_Const: case Iex_Get:
364      return False;
365   default:
366      return True;
367   }
368}
369
370
371/* Do a complete function call.  |guard| is a Ity_Bit expression
372   indicating whether or not the call happens.  If guard==NULL, the
373   call is unconditional.  |retloc| is set to indicate where the
374   return value is after the call.  The caller (of this fn) must
375   generate code to add |stackAdjustAfterCall| to the stack pointer
376   after the call is done.  Returns True iff it managed to handle this
377   combination of arg/return types, else returns False. */
378
379static
380Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
381                    /*OUT*/RetLoc* retloc,
382                    ISelEnv* env,
383                    IRExpr* guard,
384                    IRCallee* cee, IRType retTy, IRExpr** args )
385{
386   ARMCondCode cc;
387   HReg        argregs[ARM_N_ARGREGS];
388   HReg        tmpregs[ARM_N_ARGREGS];
389   Bool        go_fast;
390   Int         n_args, i, nextArgReg;
391   Addr32      target;
392
393   vassert(ARM_N_ARGREGS == 4);
394
395   /* Set default returns.  We'll update them later if needed. */
396   *stackAdjustAfterCall = 0;
397   *retloc               = mk_RetLoc_INVALID();
398
399   /* These are used for cross-checking that IR-level constraints on
400      the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
401   UInt nVECRETs = 0;
402   UInt nBBPTRs  = 0;
403
404   /* Marshal args for a call and do the call.
405
406      This function only deals with a tiny set of possibilities, which
407      cover all helpers in practice.  The restrictions are that only
408      arguments in registers are supported, hence only ARM_N_REGPARMS
409      x 32 integer bits in total can be passed.  In fact the only
410      supported arg types are I32 and I64.
411
412      The return type can be I{64,32} or V128.  In the V128 case, it
413      is expected that |args| will contain the special node
414      IRExpr_VECRET(), in which case this routine generates code to
415      allocate space on the stack for the vector return value.  Since
416      we are not passing any scalars on the stack, it is enough to
417      preallocate the return space before marshalling any arguments,
418      in this case.
419
420      |args| may also contain IRExpr_BBPTR(), in which case the
421      value in r8 is passed as the corresponding argument.
422
423      Generating code which is both efficient and correct when
424      parameters are to be passed in registers is difficult, for the
425      reasons elaborated in detail in comments attached to
426      doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
427      of the method described in those comments.
428
429      The problem is split into two cases: the fast scheme and the
430      slow scheme.  In the fast scheme, arguments are computed
431      directly into the target (real) registers.  This is only safe
432      when we can be sure that computation of each argument will not
433      trash any real registers set by computation of any other
434      argument.
435
436      In the slow scheme, all args are first computed into vregs, and
437      once they are all done, they are moved to the relevant real
438      regs.  This always gives correct code, but it also gives a bunch
439      of vreg-to-rreg moves which are usually redundant but are hard
440      for the register allocator to get rid of.
441
442      To decide which scheme to use, all argument expressions are
443      first examined.  If they are all so simple that it is clear they
444      will be evaluated without use of any fixed registers, use the
445      fast scheme, else use the slow scheme.  Note also that only
446      unconditional calls may use the fast scheme, since having to
447      compute a condition expression could itself trash real
448      registers.
449
450      Note this requires being able to examine an expression and
451      determine whether or not evaluation of it might use a fixed
452      register.  That requires knowledge of how the rest of this insn
453      selector works.  Currently just the following 3 are regarded as
454      safe -- hopefully they cover the majority of arguments in
455      practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
456   */
457
458   /* Note that the cee->regparms field is meaningless on ARM hosts
459      (since there is only one calling convention) and so we always
460      ignore it. */
461
462   n_args = 0;
463   for (i = 0; args[i]; i++) {
464      IRExpr* arg = args[i];
465      if (UNLIKELY(arg->tag == Iex_VECRET)) {
466         nVECRETs++;
467      } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
468         nBBPTRs++;
469      }
470      n_args++;
471   }
472
473   argregs[0] = hregARM_R0();
474   argregs[1] = hregARM_R1();
475   argregs[2] = hregARM_R2();
476   argregs[3] = hregARM_R3();
477
478   tmpregs[0] = tmpregs[1] = tmpregs[2] =
479   tmpregs[3] = INVALID_HREG;
480
481   /* First decide which scheme (slow or fast) is to be used.  First
482      assume the fast scheme, and select slow if any contraindications
483      (wow) appear. */
484
485   go_fast = True;
486
487   if (guard) {
488      if (guard->tag == Iex_Const
489          && guard->Iex.Const.con->tag == Ico_U1
490          && guard->Iex.Const.con->Ico.U1 == True) {
491         /* unconditional */
492      } else {
493         /* Not manifestly unconditional -- be conservative. */
494         go_fast = False;
495      }
496   }
497
498   if (go_fast) {
499      for (i = 0; i < n_args; i++) {
500         if (mightRequireFixedRegs(args[i])) {
501            go_fast = False;
502            break;
503         }
504      }
505   }
506
507   if (go_fast) {
508      if (retTy == Ity_V128 || retTy == Ity_V256)
509         go_fast = False;
510   }
511
512   /* At this point the scheme to use has been established.  Generate
513      code to get the arg values into the argument rregs.  If we run
514      out of arg regs, give up. */
515
516   if (go_fast) {
517
518      /* FAST SCHEME */
519      nextArgReg = 0;
520
521      for (i = 0; i < n_args; i++) {
522         IRExpr* arg = args[i];
523
524         IRType  aTy = Ity_INVALID;
525         if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
526            aTy = typeOfIRExpr(env->type_env, arg);
527
528         if (nextArgReg >= ARM_N_ARGREGS)
529            return False; /* out of argregs */
530
531         if (aTy == Ity_I32) {
532            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
533                                        iselIntExpr_R(env, arg) ));
534            nextArgReg++;
535         }
536         else if (aTy == Ity_I64) {
537            /* 64-bit args must be passed in an a reg-pair of the form
538               n:n+1, where n is even.  Hence either r0:r1 or r2:r3.
539               On a little-endian host, the less significant word is
540               passed in the lower-numbered register. */
541            if (nextArgReg & 1) {
542               if (nextArgReg >= ARM_N_ARGREGS)
543                  return False; /* out of argregs */
544               addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
545               nextArgReg++;
546            }
547            if (nextArgReg >= ARM_N_ARGREGS)
548               return False; /* out of argregs */
549            HReg raHi, raLo;
550            iselInt64Expr(&raHi, &raLo, env, arg);
551            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
552            nextArgReg++;
553            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
554            nextArgReg++;
555         }
556         else if (arg->tag == Iex_BBPTR) {
557            vassert(0); //ATC
558            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
559                                        hregARM_R8() ));
560            nextArgReg++;
561         }
562         else if (arg->tag == Iex_VECRET) {
563            // If this happens, it denotes ill-formed IR
564            vassert(0);
565         }
566         else
567            return False; /* unhandled arg type */
568      }
569
570      /* Fast scheme only applies for unconditional calls.  Hence: */
571      cc = ARMcc_AL;
572
573   } else {
574
575      /* SLOW SCHEME; move via temporaries */
576      nextArgReg = 0;
577
578      for (i = 0; i < n_args; i++) {
579         IRExpr* arg = args[i];
580
581         IRType  aTy = Ity_INVALID;
582         if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
583            aTy  = typeOfIRExpr(env->type_env, arg);
584
585         if (nextArgReg >= ARM_N_ARGREGS)
586            return False; /* out of argregs */
587
588         if (aTy == Ity_I32) {
589            tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
590            nextArgReg++;
591         }
592         else if (aTy == Ity_I64) {
593            /* Same comment applies as in the Fast-scheme case. */
594            if (nextArgReg & 1)
595               nextArgReg++;
596            if (nextArgReg + 1 >= ARM_N_ARGREGS)
597               return False; /* out of argregs */
598            HReg raHi, raLo;
599            iselInt64Expr(&raHi, &raLo, env, args[i]);
600            tmpregs[nextArgReg] = raLo;
601            nextArgReg++;
602            tmpregs[nextArgReg] = raHi;
603            nextArgReg++;
604         }
605         else if (arg->tag == Iex_BBPTR) {
606            vassert(0); //ATC
607            tmpregs[nextArgReg] = hregARM_R8();
608            nextArgReg++;
609         }
610         else if (arg->tag == Iex_VECRET) {
611            // If this happens, it denotes ill-formed IR
612            vassert(0);
613         }
614         else
615            return False; /* unhandled arg type */
616      }
617
618      /* Now we can compute the condition.  We can't do it earlier
619         because the argument computations could trash the condition
620         codes.  Be a bit clever to handle the common case where the
621         guard is 1:Bit. */
622      cc = ARMcc_AL;
623      if (guard) {
624         if (guard->tag == Iex_Const
625             && guard->Iex.Const.con->tag == Ico_U1
626             && guard->Iex.Const.con->Ico.U1 == True) {
627            /* unconditional -- do nothing */
628         } else {
629            cc = iselCondCode( env, guard );
630         }
631      }
632
633      /* Move the args to their final destinations. */
634      for (i = 0; i < nextArgReg; i++) {
635         if (hregIsInvalid(tmpregs[i])) { // Skip invalid regs
636            addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
637            continue;
638         }
639         /* None of these insns, including any spill code that might
640            be generated, may alter the condition codes. */
641         addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
642      }
643
644   }
645
646   /* Should be assured by checks above */
647   vassert(nextArgReg <= ARM_N_ARGREGS);
648
649   /* Do final checks, set the return values, and generate the call
650      instruction proper. */
651   vassert(nBBPTRs == 0 || nBBPTRs == 1);
652   vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
653   vassert(*stackAdjustAfterCall == 0);
654   vassert(is_RetLoc_INVALID(*retloc));
655   switch (retTy) {
656         case Ity_INVALID:
657            /* Function doesn't return a value. */
658            *retloc = mk_RetLoc_simple(RLPri_None);
659            break;
660         case Ity_I64:
661            *retloc = mk_RetLoc_simple(RLPri_2Int);
662            break;
663         case Ity_I32: case Ity_I16: case Ity_I8:
664            *retloc = mk_RetLoc_simple(RLPri_Int);
665            break;
666         case Ity_V128:
667            vassert(0); // ATC
668            *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
669            *stackAdjustAfterCall = 16;
670            break;
671         case Ity_V256:
672            vassert(0); // ATC
673            *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
674            *stackAdjustAfterCall = 32;
675            break;
676         default:
677            /* IR can denote other possible return types, but we don't
678               handle those here. */
679           vassert(0);
680   }
681
682   /* Finally, generate the call itself.  This needs the *retloc value
683      set in the switch above, which is why it's at the end. */
684
685   /* nextArgReg doles out argument registers.  Since these are
686      assigned in the order r0, r1, r2, r3, its numeric value at this
687      point, which must be between 0 and 4 inclusive, is going to be
688      equal to the number of arg regs in use for the call.  Hence bake
689      that number into the call (we'll need to know it when doing
690      register allocation, to know what regs the call reads.)
691
692      There is a bit of a twist -- harmless but worth recording.
693      Suppose the arg types are (Ity_I32, Ity_I64).  Then we will have
694      the first arg in r0 and the second in r3:r2, but r1 isn't used.
695      We nevertheless have nextArgReg==4 and bake that into the call
696      instruction.  This will mean the register allocator wil believe
697      this insn reads r1 when in fact it doesn't.  But that's
698      harmless; it just artificially extends the live range of r1
699      unnecessarily.  The best fix would be to put into the
700      instruction, a bitmask indicating which of r0/1/2/3 carry live
701      values.  But that's too much hassle. */
702
703   target = (Addr)cee->addr;
704   addInstr(env, ARMInstr_Call( cc, target, nextArgReg, *retloc ));
705
706   return True; /* success */
707}
708
709
710/*---------------------------------------------------------*/
711/*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
712/*---------------------------------------------------------*/
713
714/* Select insns for an integer-typed expression, and add them to the
715   code list.  Return a reg holding the result.  This reg will be a
716   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
717   want to modify it, ask for a new vreg, copy it in there, and modify
718   the copy.  The register allocator will do its best to map both
719   vregs to the same real register, so the copies will often disappear
720   later in the game.
721
722   This should handle expressions of 32, 16 and 8-bit type.  All
723   results are returned in a 32-bit register.  For 16- and 8-bit
724   expressions, the upper 16/24 bits are arbitrary, so you should mask
725   or sign extend partial values if necessary.
726*/
727
728/* --------------------- AMode1 --------------------- */
729
730/* Return an AMode1 which computes the value of the specified
731   expression, possibly also adding insns to the code list as a
732   result.  The expression may only be a 32-bit one.
733*/
734
735static Bool sane_AMode1 ( ARMAMode1* am )
736{
737   switch (am->tag) {
738      case ARMam1_RI:
739         return
740            toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
741                    && (hregIsVirtual(am->ARMam1.RI.reg)
742                        || sameHReg(am->ARMam1.RI.reg, hregARM_R8()))
743                    && am->ARMam1.RI.simm13 >= -4095
744                    && am->ARMam1.RI.simm13 <= 4095 );
745      case ARMam1_RRS:
746         return
747            toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
748                    && hregIsVirtual(am->ARMam1.RRS.base)
749                    && hregClass(am->ARMam1.RRS.index) == HRcInt32
750                    && hregIsVirtual(am->ARMam1.RRS.index)
751                    && am->ARMam1.RRS.shift >= 0
752                    && am->ARMam1.RRS.shift <= 3 );
753      default:
754         vpanic("sane_AMode: unknown ARM AMode1 tag");
755   }
756}
757
758static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
759{
760   ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
761   vassert(sane_AMode1(am));
762   return am;
763}
764
765static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
766{
767   IRType ty = typeOfIRExpr(env->type_env,e);
768   vassert(ty == Ity_I32);
769
770   /* FIXME: add RRS matching */
771
772   /* {Add32,Sub32}(expr,simm13) */
773   if (e->tag == Iex_Binop
774       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
775       && e->Iex.Binop.arg2->tag == Iex_Const
776       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
777      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
778      if (simm >= -4095 && simm <= 4095) {
779         HReg reg;
780         if (e->Iex.Binop.op == Iop_Sub32)
781            simm = -simm;
782         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
783         return ARMAMode1_RI(reg, simm);
784      }
785   }
786
787   /* Doesn't match anything in particular.  Generate it into
788      a register and use that. */
789   {
790      HReg reg = iselIntExpr_R(env, e);
791      return ARMAMode1_RI(reg, 0);
792   }
793
794}
795
796
797/* --------------------- AMode2 --------------------- */
798
799/* Return an AMode2 which computes the value of the specified
800   expression, possibly also adding insns to the code list as a
801   result.  The expression may only be a 32-bit one.
802*/
803
804static Bool sane_AMode2 ( ARMAMode2* am )
805{
806   switch (am->tag) {
807      case ARMam2_RI:
808         return
809            toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
810                    && hregIsVirtual(am->ARMam2.RI.reg)
811                    && am->ARMam2.RI.simm9 >= -255
812                    && am->ARMam2.RI.simm9 <= 255 );
813      case ARMam2_RR:
814         return
815            toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
816                    && hregIsVirtual(am->ARMam2.RR.base)
817                    && hregClass(am->ARMam2.RR.index) == HRcInt32
818                    && hregIsVirtual(am->ARMam2.RR.index) );
819      default:
820         vpanic("sane_AMode: unknown ARM AMode2 tag");
821   }
822}
823
824static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
825{
826   ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
827   vassert(sane_AMode2(am));
828   return am;
829}
830
831static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
832{
833   IRType ty = typeOfIRExpr(env->type_env,e);
834   vassert(ty == Ity_I32);
835
836   /* FIXME: add RR matching */
837
838   /* {Add32,Sub32}(expr,simm8) */
839   if (e->tag == Iex_Binop
840       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
841       && e->Iex.Binop.arg2->tag == Iex_Const
842       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
843      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
844      if (simm >= -255 && simm <= 255) {
845         HReg reg;
846         if (e->Iex.Binop.op == Iop_Sub32)
847            simm = -simm;
848         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
849         return ARMAMode2_RI(reg, simm);
850      }
851   }
852
853   /* Doesn't match anything in particular.  Generate it into
854      a register and use that. */
855   {
856      HReg reg = iselIntExpr_R(env, e);
857      return ARMAMode2_RI(reg, 0);
858   }
859
860}
861
862
863/* --------------------- AModeV --------------------- */
864
865/* Return an AModeV which computes the value of the specified
866   expression, possibly also adding insns to the code list as a
867   result.  The expression may only be a 32-bit one.
868*/
869
870static Bool sane_AModeV ( ARMAModeV* am )
871{
872  return toBool( hregClass(am->reg) == HRcInt32
873                 && hregIsVirtual(am->reg)
874                 && am->simm11 >= -1020 && am->simm11 <= 1020
875                 && 0 == (am->simm11 & 3) );
876}
877
878static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
879{
880   ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
881   vassert(sane_AModeV(am));
882   return am;
883}
884
885static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
886{
887   IRType ty = typeOfIRExpr(env->type_env,e);
888   vassert(ty == Ity_I32);
889
890   /* {Add32,Sub32}(expr, simm8 << 2) */
891   if (e->tag == Iex_Binop
892       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
893       && e->Iex.Binop.arg2->tag == Iex_Const
894       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
895      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
896      if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
897         HReg reg;
898         if (e->Iex.Binop.op == Iop_Sub32)
899            simm = -simm;
900         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
901         return mkARMAModeV(reg, simm);
902      }
903   }
904
905   /* Doesn't match anything in particular.  Generate it into
906      a register and use that. */
907   {
908      HReg reg = iselIntExpr_R(env, e);
909      return mkARMAModeV(reg, 0);
910   }
911
912}
913
914/* -------------------- AModeN -------------------- */
915
916static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
917{
918   return iselIntExpr_AModeN_wrk(env, e);
919}
920
921static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
922{
923   HReg reg = iselIntExpr_R(env, e);
924   return mkARMAModeN_R(reg);
925}
926
927
928/* --------------------- RI84 --------------------- */
929
930/* Select instructions to generate 'e' into a RI84.  If mayInv is
931   true, then the caller will also accept an I84 form that denotes
932   'not e'.  In this case didInv may not be NULL, and *didInv is set
933   to True.  This complication is so as to allow generation of an RI84
934   which is suitable for use in either an AND or BIC instruction,
935   without knowing (before this call) which one.
936*/
937static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
938                                   ISelEnv* env, IRExpr* e )
939{
940   ARMRI84* ri;
941   if (mayInv)
942      vassert(didInv != NULL);
943   ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
944   /* sanity checks ... */
945   switch (ri->tag) {
946      case ARMri84_I84:
947         return ri;
948      case ARMri84_R:
949         vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
950         vassert(hregIsVirtual(ri->ARMri84.R.reg));
951         return ri;
952      default:
953         vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
954   }
955}
956
957/* DO NOT CALL THIS DIRECTLY ! */
958static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
959                                       ISelEnv* env, IRExpr* e )
960{
961   IRType ty = typeOfIRExpr(env->type_env,e);
962   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
963
964   if (didInv) *didInv = False;
965
966   /* special case: immediate */
967   if (e->tag == Iex_Const) {
968      UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
969      switch (e->Iex.Const.con->tag) {
970         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
971         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
972         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
973         default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
974      }
975      if (fitsIn8x4(&u8, &u4, u)) {
976         return ARMRI84_I84( (UShort)u8, (UShort)u4 );
977      }
978      if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
979         vassert(didInv);
980         *didInv = True;
981         return ARMRI84_I84( (UShort)u8, (UShort)u4 );
982      }
983      /* else fail, fall through to default case */
984   }
985
986   /* default case: calculate into a register and return that */
987   {
988      HReg r = iselIntExpr_R ( env, e );
989      return ARMRI84_R(r);
990   }
991}
992
993
994/* --------------------- RI5 --------------------- */
995
996/* Select instructions to generate 'e' into a RI5. */
997
998static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
999{
1000   ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
1001   /* sanity checks ... */
1002   switch (ri->tag) {
1003      case ARMri5_I5:
1004         return ri;
1005      case ARMri5_R:
1006         vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
1007         vassert(hregIsVirtual(ri->ARMri5.R.reg));
1008         return ri;
1009      default:
1010         vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
1011   }
1012}
1013
1014/* DO NOT CALL THIS DIRECTLY ! */
1015static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
1016{
1017   IRType ty = typeOfIRExpr(env->type_env,e);
1018   vassert(ty == Ity_I32 || ty == Ity_I8);
1019
1020   /* special case: immediate */
1021   if (e->tag == Iex_Const) {
1022      UInt u; /* both invalid */
1023      switch (e->Iex.Const.con->tag) {
1024         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1025         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1026         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1027         default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
1028      }
1029      if (u >= 1 && u <= 31) {
1030         return ARMRI5_I5(u);
1031      }
1032      /* else fail, fall through to default case */
1033   }
1034
1035   /* default case: calculate into a register and return that */
1036   {
1037      HReg r = iselIntExpr_R ( env, e );
1038      return ARMRI5_R(r);
1039   }
1040}
1041
1042
1043/* ------------------- CondCode ------------------- */
1044
1045/* Generate code to evaluated a bit-typed expression, returning the
1046   condition code which would correspond when the expression would
1047   notionally have returned 1. */
1048
1049static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1050{
1051   ARMCondCode cc = iselCondCode_wrk(env,e);
1052   vassert(cc != ARMcc_NV);
1053   return cc;
1054}
1055
1056static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1057{
1058   vassert(e);
1059   vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1060
1061   /* var */
1062   if (e->tag == Iex_RdTmp) {
1063      HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1064      /* CmpOrTst doesn't modify rTmp; so this is OK. */
1065      ARMRI84* one  = ARMRI84_I84(1,0);
1066      addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1067      return ARMcc_NE;
1068   }
1069
1070   /* Not1(e) */
1071   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1072      /* Generate code for the arg, and negate the test condition */
1073      return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
1074   }
1075
1076   /* --- patterns rooted at: 32to1 --- */
1077
1078   if (e->tag == Iex_Unop
1079       && e->Iex.Unop.op == Iop_32to1) {
1080      HReg     rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1081      ARMRI84* one  = ARMRI84_I84(1,0);
1082      addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
1083      return ARMcc_NE;
1084   }
1085
1086   /* --- patterns rooted at: CmpNEZ8 --- */
1087
1088   if (e->tag == Iex_Unop
1089       && e->Iex.Unop.op == Iop_CmpNEZ8) {
1090      HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1091      ARMRI84* xFF  = ARMRI84_I84(0xFF,0);
1092      addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1093      return ARMcc_NE;
1094   }
1095
1096   /* --- patterns rooted at: CmpNEZ32 --- */
1097
1098   if (e->tag == Iex_Unop
1099       && e->Iex.Unop.op == Iop_CmpNEZ32) {
1100      HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1101      ARMRI84* zero = ARMRI84_I84(0,0);
1102      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1103      return ARMcc_NE;
1104   }
1105
1106   /* --- patterns rooted at: CmpNEZ64 --- */
1107
1108   if (e->tag == Iex_Unop
1109       && e->Iex.Unop.op == Iop_CmpNEZ64) {
1110      HReg     tHi, tLo;
1111      HReg     tmp  = newVRegI(env);
1112      ARMRI84* zero = ARMRI84_I84(0,0);
1113      iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1114      addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1115      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1116      return ARMcc_NE;
1117   }
1118
1119   /* --- Cmp*32*(x,y) --- */
1120   if (e->tag == Iex_Binop
1121       && (e->Iex.Binop.op == Iop_CmpEQ32
1122           || e->Iex.Binop.op == Iop_CmpNE32
1123           || e->Iex.Binop.op == Iop_CmpLT32S
1124           || e->Iex.Binop.op == Iop_CmpLT32U
1125           || e->Iex.Binop.op == Iop_CmpLE32S
1126           || e->Iex.Binop.op == Iop_CmpLE32U)) {
1127      HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1128      ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1129                                       env, e->Iex.Binop.arg2);
1130      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1131      switch (e->Iex.Binop.op) {
1132         case Iop_CmpEQ32:  return ARMcc_EQ;
1133         case Iop_CmpNE32:  return ARMcc_NE;
1134         case Iop_CmpLT32S: return ARMcc_LT;
1135         case Iop_CmpLT32U: return ARMcc_LO;
1136         case Iop_CmpLE32S: return ARMcc_LE;
1137         case Iop_CmpLE32U: return ARMcc_LS;
1138         default: vpanic("iselCondCode(arm): CmpXX32");
1139      }
1140   }
1141
1142   /* const */
1143   /* Constant 1:Bit */
1144   if (e->tag == Iex_Const) {
1145      HReg r;
1146      vassert(e->Iex.Const.con->tag == Ico_U1);
1147      vassert(e->Iex.Const.con->Ico.U1 == True
1148              || e->Iex.Const.con->Ico.U1 == False);
1149      r = newVRegI(env);
1150      addInstr(env, ARMInstr_Imm32(r, 0));
1151      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1152      return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1153   }
1154
1155   // JRS 2013-Jan-03: this seems completely nonsensical
1156   /* --- CasCmpEQ* --- */
1157   /* Ist_Cas has a dummy argument to compare with, so comparison is
1158      always true. */
1159   //if (e->tag == Iex_Binop
1160   //    && (e->Iex.Binop.op == Iop_CasCmpEQ32
1161   //        || e->Iex.Binop.op == Iop_CasCmpEQ16
1162   //        || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1163   //   return ARMcc_AL;
1164   //}
1165
1166   ppIRExpr(e);
1167   vpanic("iselCondCode");
1168}
1169
1170
1171/* --------------------- Reg --------------------- */
1172
1173static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1174{
1175   HReg r = iselIntExpr_R_wrk(env, e);
1176   /* sanity checks ... */
1177#  if 0
1178   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1179#  endif
1180   vassert(hregClass(r) == HRcInt32);
1181   vassert(hregIsVirtual(r));
1182   return r;
1183}
1184
1185/* DO NOT CALL THIS DIRECTLY ! */
1186static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1187{
1188   IRType ty = typeOfIRExpr(env->type_env,e);
1189   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1190
1191   switch (e->tag) {
1192
1193   /* --------- TEMP --------- */
1194   case Iex_RdTmp: {
1195      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1196   }
1197
1198   /* --------- LOAD --------- */
1199   case Iex_Load: {
1200      HReg dst  = newVRegI(env);
1201
1202      if (e->Iex.Load.end != Iend_LE)
1203         goto irreducible;
1204
1205      if (ty == Ity_I32) {
1206         ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1207         addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dst, amode));
1208         return dst;
1209      }
1210      if (ty == Ity_I16) {
1211         ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1212         addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
1213                                       True/*isLoad*/, False/*!signedLoad*/,
1214                                       dst, amode));
1215         return dst;
1216      }
1217      if (ty == Ity_I8) {
1218         ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1219         addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dst, amode));
1220         return dst;
1221      }
1222      break;
1223   }
1224
1225//zz   /* --------- TERNARY OP --------- */
1226//zz   case Iex_Triop: {
1227//zz      IRTriop *triop = e->Iex.Triop.details;
1228//zz      /* C3210 flags following FPU partial remainder (fprem), both
1229//zz         IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1230//zz      if (triop->op == Iop_PRemC3210F64
1231//zz          || triop->op == Iop_PRem1C3210F64) {
1232//zz         HReg junk = newVRegF(env);
1233//zz         HReg dst  = newVRegI(env);
1234//zz         HReg srcL = iselDblExpr(env, triop->arg2);
1235//zz         HReg srcR = iselDblExpr(env, triop->arg3);
1236//zz         /* XXXROUNDINGFIXME */
1237//zz         /* set roundingmode here */
1238//zz         addInstr(env, X86Instr_FpBinary(
1239//zz                           e->Iex.Binop.op==Iop_PRemC3210F64
1240//zz                              ? Xfp_PREM : Xfp_PREM1,
1241//zz                           srcL,srcR,junk
1242//zz                 ));
1243//zz         /* The previous pseudo-insn will have left the FPU's C3210
1244//zz            flags set correctly.  So bag them. */
1245//zz         addInstr(env, X86Instr_FpStSW_AX());
1246//zz         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1247//zz         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1248//zz         return dst;
1249//zz      }
1250//zz
1251//zz      break;
1252//zz   }
1253
1254   /* --------- BINARY OP --------- */
1255   case Iex_Binop: {
1256
1257      ARMAluOp   aop = 0; /* invalid */
1258      ARMShiftOp sop = 0; /* invalid */
1259
1260      /* ADD/SUB/AND/OR/XOR */
1261      switch (e->Iex.Binop.op) {
1262         case Iop_And32: {
1263            Bool     didInv = False;
1264            HReg     dst    = newVRegI(env);
1265            HReg     argL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1266            ARMRI84* argR   = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1267                                               env, e->Iex.Binop.arg2);
1268            addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1269                                       dst, argL, argR));
1270            return dst;
1271         }
1272         case Iop_Or32:  aop = ARMalu_OR;  goto std_binop;
1273         case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1274         case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1275         case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1276         std_binop: {
1277            HReg     dst  = newVRegI(env);
1278            HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1279            ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1280                                             env, e->Iex.Binop.arg2);
1281            addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1282            return dst;
1283         }
1284         default: break;
1285      }
1286
1287      /* SHL/SHR/SAR */
1288      switch (e->Iex.Binop.op) {
1289         case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1290         case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1291         case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1292         sh_binop: {
1293            HReg    dst  = newVRegI(env);
1294            HReg    argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1295            ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1296            addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1297            vassert(ty == Ity_I32); /* else the IR is ill-typed */
1298            return dst;
1299         }
1300         default: break;
1301      }
1302
1303      /* MUL */
1304      if (e->Iex.Binop.op == Iop_Mul32) {
1305         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1306         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1307         HReg dst  = newVRegI(env);
1308         addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1309         addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1310         addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1311         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1312         return dst;
1313      }
1314
1315      /* Handle misc other ops. */
1316
1317      if (e->Iex.Binop.op == Iop_Max32U) {
1318         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1319         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1320         HReg dst  = newVRegI(env);
1321         addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1322                                         ARMRI84_R(argR)));
1323         addInstr(env, mk_iMOVds_RR(dst, argL));
1324         addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1325         return dst;
1326      }
1327
1328      if (e->Iex.Binop.op == Iop_CmpF64) {
1329         HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1330         HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1331         HReg dst = newVRegI(env);
1332         /* Do the compare (FCMPD) and set NZCV in FPSCR.  Then also do
1333            FMSTAT, so we can examine the results directly. */
1334         addInstr(env, ARMInstr_VCmpD(dL, dR));
1335         /* Create in dst, the IRCmpF64Result encoded result. */
1336         addInstr(env, ARMInstr_Imm32(dst, 0));
1337         addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1338         addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1339         addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1340         addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1341         return dst;
1342      }
1343
1344      if (e->Iex.Binop.op == Iop_F64toI32S
1345          || e->Iex.Binop.op == Iop_F64toI32U) {
1346         /* Wretched uglyness all round, due to having to deal
1347            with rounding modes.  Oh well. */
1348         /* FIXME: if arg1 is a constant indicating round-to-zero,
1349            then we could skip all this arsing around with FPSCR and
1350            simply emit FTO{S,U}IZD. */
1351         Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1352         HReg valD  = iselDblExpr(env, e->Iex.Binop.arg2);
1353         set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1354         /* FTO{S,U}ID valF, valD */
1355         HReg valF = newVRegF(env);
1356         addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1357                                       valF, valD));
1358         set_VFP_rounding_default(env);
1359         /* VMOV dst, valF */
1360         HReg dst = newVRegI(env);
1361         addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1362         return dst;
1363      }
1364
1365      if (e->Iex.Binop.op == Iop_GetElem8x8
1366          || e->Iex.Binop.op == Iop_GetElem16x4
1367          || e->Iex.Binop.op == Iop_GetElem32x2) {
1368         if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1369            HReg res = newVRegI(env);
1370            HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1371            UInt index, size;
1372            if (e->Iex.Binop.arg2->tag != Iex_Const ||
1373                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1374               vpanic("ARM target supports GetElem with constant "
1375                      "second argument only (neon)\n");
1376            }
1377            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1378            switch (e->Iex.Binop.op) {
1379               case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1380               case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1381               case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1382               default: vassert(0);
1383            }
1384            addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1385                                           mkARMNRS(ARMNRS_Reg, res, 0),
1386                                           mkARMNRS(ARMNRS_Scalar, arg, index),
1387                                           size, False));
1388            return res;
1389         }
1390      }
1391
1392      if (e->Iex.Binop.op == Iop_GetElem32x2
1393          && e->Iex.Binop.arg2->tag == Iex_Const
1394          && !(env->hwcaps & VEX_HWCAPS_ARM_NEON)) {
1395         /* We may have to do GetElem32x2 on a non-NEON capable
1396            target. */
1397         IRConst* con = e->Iex.Binop.arg2->Iex.Const.con;
1398         vassert(con->tag == Ico_U8); /* else IR is ill-typed */
1399         UInt index = con->Ico.U8;
1400         if (index >= 0 && index <= 1) {
1401            HReg rHi, rLo;
1402            iselInt64Expr(&rHi, &rLo, env, e->Iex.Binop.arg1);
1403            return index == 0 ? rLo : rHi;
1404         }
1405      }
1406
1407      if (e->Iex.Binop.op == Iop_GetElem8x16
1408          || e->Iex.Binop.op == Iop_GetElem16x8
1409          || e->Iex.Binop.op == Iop_GetElem32x4) {
1410         if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1411            HReg res = newVRegI(env);
1412            HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1413            UInt index, size;
1414            if (e->Iex.Binop.arg2->tag != Iex_Const ||
1415                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1416               vpanic("ARM target supports GetElem with constant "
1417                      "second argument only (neon)\n");
1418            }
1419            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1420            switch (e->Iex.Binop.op) {
1421               case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1422               case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1423               case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1424               default: vassert(0);
1425            }
1426            addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1427                                           mkARMNRS(ARMNRS_Reg, res, 0),
1428                                           mkARMNRS(ARMNRS_Scalar, arg, index),
1429                                           size, True));
1430            return res;
1431         }
1432      }
1433
1434      /* All cases involving host-side helper calls. */
1435      void* fn = NULL;
1436      switch (e->Iex.Binop.op) {
1437         case Iop_Add16x2:
1438            fn = &h_generic_calc_Add16x2; break;
1439         case Iop_Sub16x2:
1440            fn = &h_generic_calc_Sub16x2; break;
1441         case Iop_HAdd16Ux2:
1442            fn = &h_generic_calc_HAdd16Ux2; break;
1443         case Iop_HAdd16Sx2:
1444            fn = &h_generic_calc_HAdd16Sx2; break;
1445         case Iop_HSub16Ux2:
1446            fn = &h_generic_calc_HSub16Ux2; break;
1447         case Iop_HSub16Sx2:
1448            fn = &h_generic_calc_HSub16Sx2; break;
1449         case Iop_QAdd16Sx2:
1450            fn = &h_generic_calc_QAdd16Sx2; break;
1451         case Iop_QAdd16Ux2:
1452            fn = &h_generic_calc_QAdd16Ux2; break;
1453         case Iop_QSub16Sx2:
1454            fn = &h_generic_calc_QSub16Sx2; break;
1455         case Iop_Add8x4:
1456            fn = &h_generic_calc_Add8x4; break;
1457         case Iop_Sub8x4:
1458            fn = &h_generic_calc_Sub8x4; break;
1459         case Iop_HAdd8Ux4:
1460            fn = &h_generic_calc_HAdd8Ux4; break;
1461         case Iop_HAdd8Sx4:
1462            fn = &h_generic_calc_HAdd8Sx4; break;
1463         case Iop_HSub8Ux4:
1464            fn = &h_generic_calc_HSub8Ux4; break;
1465         case Iop_HSub8Sx4:
1466            fn = &h_generic_calc_HSub8Sx4; break;
1467         case Iop_QAdd8Sx4:
1468            fn = &h_generic_calc_QAdd8Sx4; break;
1469         case Iop_QAdd8Ux4:
1470            fn = &h_generic_calc_QAdd8Ux4; break;
1471         case Iop_QSub8Sx4:
1472            fn = &h_generic_calc_QSub8Sx4; break;
1473         case Iop_QSub8Ux4:
1474            fn = &h_generic_calc_QSub8Ux4; break;
1475         case Iop_Sad8Ux4:
1476            fn = &h_generic_calc_Sad8Ux4; break;
1477         case Iop_QAdd32S:
1478            fn = &h_generic_calc_QAdd32S; break;
1479         case Iop_QSub32S:
1480            fn = &h_generic_calc_QSub32S; break;
1481         case Iop_QSub16Ux2:
1482            fn = &h_generic_calc_QSub16Ux2; break;
1483         case Iop_DivU32:
1484            fn = &h_calc_udiv32_w_arm_semantics; break;
1485         case Iop_DivS32:
1486            fn = &h_calc_sdiv32_w_arm_semantics; break;
1487         default:
1488            break;
1489      }
1490
1491      if (fn) {
1492         HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1493         HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1494         HReg res  = newVRegI(env);
1495         addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1496         addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1497         addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1498                                      2, mk_RetLoc_simple(RLPri_Int) ));
1499         addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1500         return res;
1501      }
1502
1503      break;
1504   }
1505
1506   /* --------- UNARY OP --------- */
1507   case Iex_Unop: {
1508
1509//zz      /* 1Uto8(32to1(expr32)) */
1510//zz      if (e->Iex.Unop.op == Iop_1Uto8) {
1511//zz         DECLARE_PATTERN(p_32to1_then_1Uto8);
1512//zz         DEFINE_PATTERN(p_32to1_then_1Uto8,
1513//zz                        unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1514//zz         if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1515//zz            IRExpr* expr32 = mi.bindee[0];
1516//zz            HReg dst = newVRegI(env);
1517//zz            HReg src = iselIntExpr_R(env, expr32);
1518//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1519//zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1520//zz                                          X86RMI_Imm(1), dst));
1521//zz            return dst;
1522//zz         }
1523//zz      }
1524//zz
1525//zz      /* 8Uto32(LDle(expr32)) */
1526//zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1527//zz         DECLARE_PATTERN(p_LDle8_then_8Uto32);
1528//zz         DEFINE_PATTERN(p_LDle8_then_8Uto32,
1529//zz                        unop(Iop_8Uto32,
1530//zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1531//zz         if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1532//zz            HReg dst = newVRegI(env);
1533//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1534//zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1535//zz            return dst;
1536//zz         }
1537//zz      }
1538//zz
1539//zz      /* 8Sto32(LDle(expr32)) */
1540//zz      if (e->Iex.Unop.op == Iop_8Sto32) {
1541//zz         DECLARE_PATTERN(p_LDle8_then_8Sto32);
1542//zz         DEFINE_PATTERN(p_LDle8_then_8Sto32,
1543//zz                        unop(Iop_8Sto32,
1544//zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1545//zz         if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1546//zz            HReg dst = newVRegI(env);
1547//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1548//zz            addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1549//zz            return dst;
1550//zz         }
1551//zz      }
1552//zz
1553//zz      /* 16Uto32(LDle(expr32)) */
1554//zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1555//zz         DECLARE_PATTERN(p_LDle16_then_16Uto32);
1556//zz         DEFINE_PATTERN(p_LDle16_then_16Uto32,
1557//zz                        unop(Iop_16Uto32,
1558//zz                             IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1559//zz         if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1560//zz            HReg dst = newVRegI(env);
1561//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1562//zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1563//zz            return dst;
1564//zz         }
1565//zz      }
1566//zz
1567//zz      /* 8Uto32(GET:I8) */
1568//zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1569//zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1570//zz            HReg      dst;
1571//zz            X86AMode* amode;
1572//zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1573//zz            dst = newVRegI(env);
1574//zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1575//zz                                hregX86_EBP());
1576//zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1577//zz            return dst;
1578//zz         }
1579//zz      }
1580//zz
1581//zz      /* 16to32(GET:I16) */
1582//zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1583//zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1584//zz            HReg      dst;
1585//zz            X86AMode* amode;
1586//zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1587//zz            dst = newVRegI(env);
1588//zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1589//zz                                hregX86_EBP());
1590//zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1591//zz            return dst;
1592//zz         }
1593//zz      }
1594
1595      switch (e->Iex.Unop.op) {
1596         case Iop_8Uto32: {
1597            HReg dst = newVRegI(env);
1598            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1599            addInstr(env, ARMInstr_Alu(ARMalu_AND,
1600                                       dst, src, ARMRI84_I84(0xFF,0)));
1601            return dst;
1602         }
1603//zz         case Iop_8Uto16:
1604//zz         case Iop_8Uto32:
1605//zz         case Iop_16Uto32: {
1606//zz            HReg dst = newVRegI(env);
1607//zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1608//zz            UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1609//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1610//zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1611//zz                                          X86RMI_Imm(mask), dst));
1612//zz            return dst;
1613//zz         }
1614//zz         case Iop_8Sto16:
1615//zz         case Iop_8Sto32:
1616         case Iop_16Uto32: {
1617            HReg dst = newVRegI(env);
1618            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1619            ARMRI5* amt = ARMRI5_I5(16);
1620            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1621            addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1622            return dst;
1623         }
1624         case Iop_8Sto32:
1625         case Iop_16Sto32: {
1626            HReg dst = newVRegI(env);
1627            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1628            ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1629            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1630            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1631            return dst;
1632         }
1633//zz         case Iop_Not8:
1634//zz         case Iop_Not16:
1635         case Iop_Not32: {
1636            HReg dst = newVRegI(env);
1637            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1638            addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1639            return dst;
1640         }
1641         case Iop_64HIto32: {
1642            HReg rHi, rLo;
1643            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1644            return rHi; /* and abandon rLo .. poor wee thing :-) */
1645         }
1646         case Iop_64to32: {
1647            HReg rHi, rLo;
1648            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1649            return rLo; /* similar stupid comment to the above ... */
1650         }
1651         case Iop_64to8: {
1652            HReg rHi, rLo;
1653            if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1654               HReg tHi = newVRegI(env);
1655               HReg tLo = newVRegI(env);
1656               HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1657               addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1658               rHi = tHi;
1659               rLo = tLo;
1660            } else {
1661               iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1662            }
1663            return rLo;
1664         }
1665
1666         case Iop_1Uto32:
1667            /* 1Uto32(tmp).  Since I1 values generated into registers
1668               are guaranteed to have value either only zero or one,
1669               we can simply return the value of the register in this
1670               case. */
1671            if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1672               HReg dst = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1673               return dst;
1674            }
1675            /* else fall through */
1676         case Iop_1Uto8: {
1677            HReg        dst  = newVRegI(env);
1678            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1679            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1680            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1681            return dst;
1682         }
1683
1684         case Iop_1Sto32: {
1685            HReg        dst  = newVRegI(env);
1686            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1687            ARMRI5*     amt  = ARMRI5_I5(31);
1688            /* This is really rough.  We could do much better here;
1689               perhaps mvn{cond} dst, #0 as the second insn?
1690               (same applies to 1Sto64) */
1691            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1692            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1693            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1694            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1695            return dst;
1696         }
1697
1698
1699//zz         case Iop_1Sto8:
1700//zz         case Iop_1Sto16:
1701//zz         case Iop_1Sto32: {
1702//zz            /* could do better than this, but for now ... */
1703//zz            HReg dst         = newVRegI(env);
1704//zz            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1705//zz            addInstr(env, X86Instr_Set32(cond,dst));
1706//zz            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1707//zz            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1708//zz            return dst;
1709//zz         }
1710//zz         case Iop_Ctz32: {
1711//zz            /* Count trailing zeroes, implemented by x86 'bsfl' */
1712//zz            HReg dst = newVRegI(env);
1713//zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1714//zz            addInstr(env, X86Instr_Bsfr32(True,src,dst));
1715//zz            return dst;
1716//zz         }
1717         case Iop_Clz32: {
1718            /* Count leading zeroes; easy on ARM. */
1719            HReg dst = newVRegI(env);
1720            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1721            addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1722            return dst;
1723         }
1724
1725         case Iop_CmpwNEZ32: {
1726            HReg dst = newVRegI(env);
1727            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1728            addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1729            addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1730            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1731            return dst;
1732         }
1733
1734         case Iop_Left32: {
1735            HReg dst = newVRegI(env);
1736            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1737            addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1738            addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1739            return dst;
1740         }
1741
1742//zz         case Iop_V128to32: {
1743//zz            HReg      dst  = newVRegI(env);
1744//zz            HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1745//zz            X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1746//zz            sub_from_esp(env, 16);
1747//zz            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1748//zz            addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1749//zz            add_to_esp(env, 16);
1750//zz            return dst;
1751//zz         }
1752//zz
1753         case Iop_ReinterpF32asI32: {
1754            HReg dst = newVRegI(env);
1755            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1756            addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1757            return dst;
1758         }
1759
1760//zz
1761//zz         case Iop_16to8:
1762         case Iop_32to8:
1763         case Iop_32to16:
1764            /* These are no-ops. */
1765            return iselIntExpr_R(env, e->Iex.Unop.arg);
1766
1767         default:
1768            break;
1769      }
1770
1771      /* All Unop cases involving host-side helper calls. */
1772      void* fn = NULL;
1773      switch (e->Iex.Unop.op) {
1774         case Iop_CmpNEZ16x2:
1775            fn = &h_generic_calc_CmpNEZ16x2; break;
1776         case Iop_CmpNEZ8x4:
1777            fn = &h_generic_calc_CmpNEZ8x4; break;
1778         default:
1779            break;
1780      }
1781
1782      if (fn) {
1783         HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1784         HReg res = newVRegI(env);
1785         addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1786         addInstr(env, ARMInstr_Call( ARMcc_AL, (Addr)fn,
1787                                      1, mk_RetLoc_simple(RLPri_Int) ));
1788         addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1789         return res;
1790      }
1791
1792      break;
1793   }
1794
1795   /* --------- GET --------- */
1796   case Iex_Get: {
1797      if (ty == Ity_I32
1798          && 0 == (e->Iex.Get.offset & 3)
1799          && e->Iex.Get.offset < 4096-4) {
1800         HReg dst = newVRegI(env);
1801         addInstr(env, ARMInstr_LdSt32(
1802                          ARMcc_AL, True/*isLoad*/,
1803                          dst,
1804                          ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1805         return dst;
1806      }
1807//zz      if (ty == Ity_I8 || ty == Ity_I16) {
1808//zz         HReg dst = newVRegI(env);
1809//zz         addInstr(env, X86Instr_LoadEX(
1810//zz                          toUChar(ty==Ity_I8 ? 1 : 2),
1811//zz                          False,
1812//zz                          X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1813//zz                          dst));
1814//zz         return dst;
1815//zz      }
1816      break;
1817   }
1818
1819//zz   case Iex_GetI: {
1820//zz      X86AMode* am
1821//zz         = genGuestArrayOffset(
1822//zz              env, e->Iex.GetI.descr,
1823//zz                   e->Iex.GetI.ix, e->Iex.GetI.bias );
1824//zz      HReg dst = newVRegI(env);
1825//zz      if (ty == Ity_I8) {
1826//zz         addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1827//zz         return dst;
1828//zz      }
1829//zz      if (ty == Ity_I32) {
1830//zz         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1831//zz         return dst;
1832//zz      }
1833//zz      break;
1834//zz   }
1835
1836   /* --------- CCALL --------- */
1837   case Iex_CCall: {
1838      HReg    dst = newVRegI(env);
1839      vassert(ty == e->Iex.CCall.retty);
1840
1841      /* be very restrictive for now.  Only 32/64-bit ints allowed for
1842         args, and 32 bits for return type.  Don't forget to change
1843         the RetLoc if more types are allowed in future. */
1844      if (e->Iex.CCall.retty != Ity_I32)
1845         goto irreducible;
1846
1847      /* Marshal args, do the call, clear stack. */
1848      UInt   addToSp = 0;
1849      RetLoc rloc    = mk_RetLoc_INVALID();
1850      Bool   ok      = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
1851                                     e->Iex.CCall.cee, e->Iex.CCall.retty,
1852                                     e->Iex.CCall.args );
1853      /* */
1854      if (ok) {
1855         vassert(is_sane_RetLoc(rloc));
1856         vassert(rloc.pri == RLPri_Int);
1857         vassert(addToSp == 0);
1858         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1859         return dst;
1860      }
1861      /* else fall through; will hit the irreducible: label */
1862   }
1863
1864   /* --------- LITERAL --------- */
1865   /* 32 literals */
1866   case Iex_Const: {
1867      UInt u   = 0;
1868      HReg dst = newVRegI(env);
1869      switch (e->Iex.Const.con->tag) {
1870         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1871         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1872         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1873         default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
1874      }
1875      addInstr(env, ARMInstr_Imm32(dst, u));
1876      return dst;
1877   }
1878
1879   /* --------- MULTIPLEX --------- */
1880   case Iex_ITE: { // VFD
1881      /* ITE(ccexpr, iftrue, iffalse) */
1882      if (ty == Ity_I32) {
1883         ARMCondCode cc;
1884         HReg     r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
1885         ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.ITE.iffalse);
1886         HReg     dst = newVRegI(env);
1887         addInstr(env, mk_iMOVds_RR(dst, r1));
1888         cc = iselCondCode(env, e->Iex.ITE.cond);
1889         addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1890         return dst;
1891      }
1892      break;
1893   }
1894
1895   default:
1896   break;
1897   } /* switch (e->tag) */
1898
1899   /* We get here if no pattern matched. */
1900  irreducible:
1901   ppIRExpr(e);
1902   vpanic("iselIntExpr_R: cannot reduce tree");
1903}
1904
1905
1906/* -------------------- 64-bit -------------------- */
1907
1908/* Compute a 64-bit value into a register pair, which is returned as
1909   the first two parameters.  As with iselIntExpr_R, these may be
1910   either real or virtual regs; in any case they must not be changed
1911   by subsequent code emitted by the caller.  */
1912
1913static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1914{
1915   iselInt64Expr_wrk(rHi, rLo, env, e);
1916#  if 0
1917   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1918#  endif
1919   vassert(hregClass(*rHi) == HRcInt32);
1920   vassert(hregIsVirtual(*rHi));
1921   vassert(hregClass(*rLo) == HRcInt32);
1922   vassert(hregIsVirtual(*rLo));
1923}
1924
1925/* DO NOT CALL THIS DIRECTLY ! */
1926static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1927{
1928   vassert(e);
1929   vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1930
1931   /* 64-bit literal */
1932   if (e->tag == Iex_Const) {
1933      ULong   w64 = e->Iex.Const.con->Ico.U64;
1934      UInt    wHi = toUInt(w64 >> 32);
1935      UInt    wLo = toUInt(w64);
1936      HReg    tHi = newVRegI(env);
1937      HReg    tLo = newVRegI(env);
1938      vassert(e->Iex.Const.con->tag == Ico_U64);
1939      addInstr(env, ARMInstr_Imm32(tHi, wHi));
1940      addInstr(env, ARMInstr_Imm32(tLo, wLo));
1941      *rHi = tHi;
1942      *rLo = tLo;
1943      return;
1944   }
1945
1946   /* read 64-bit IRTemp */
1947   if (e->tag == Iex_RdTmp) {
1948      if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1949         HReg tHi = newVRegI(env);
1950         HReg tLo = newVRegI(env);
1951         HReg tmp = iselNeon64Expr(env, e);
1952         addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1953         *rHi = tHi;
1954         *rLo = tLo;
1955      } else {
1956         lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1957      }
1958      return;
1959   }
1960
1961   /* 64-bit load */
1962   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1963      HReg      tLo, tHi, rA;
1964      vassert(e->Iex.Load.ty == Ity_I64);
1965      rA  = iselIntExpr_R(env, e->Iex.Load.addr);
1966      tHi = newVRegI(env);
1967      tLo = newVRegI(env);
1968      addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
1969                                    tHi, ARMAMode1_RI(rA, 4)));
1970      addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
1971                                    tLo, ARMAMode1_RI(rA, 0)));
1972      *rHi = tHi;
1973      *rLo = tLo;
1974      return;
1975   }
1976
1977   /* 64-bit GET */
1978   if (e->tag == Iex_Get) {
1979      ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1980      ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1981      HReg tHi = newVRegI(env);
1982      HReg tLo = newVRegI(env);
1983      addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
1984      addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
1985      *rHi = tHi;
1986      *rLo = tLo;
1987      return;
1988   }
1989
1990   /* --------- BINARY ops --------- */
1991   if (e->tag == Iex_Binop) {
1992      switch (e->Iex.Binop.op) {
1993
1994         /* 32 x 32 -> 64 multiply */
1995         case Iop_MullS32:
1996         case Iop_MullU32: {
1997            HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1998            HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1999            HReg     tHi  = newVRegI(env);
2000            HReg     tLo  = newVRegI(env);
2001            ARMMulOp mop  = e->Iex.Binop.op == Iop_MullS32
2002                               ? ARMmul_SX : ARMmul_ZX;
2003            addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2004            addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2005            addInstr(env, ARMInstr_Mul(mop));
2006            addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2007            addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2008            *rHi = tHi;
2009            *rLo = tLo;
2010            return;
2011         }
2012
2013         case Iop_Or64: {
2014            HReg xLo, xHi, yLo, yHi;
2015            HReg tHi = newVRegI(env);
2016            HReg tLo = newVRegI(env);
2017            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2018            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2019            addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2020            addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2021            *rHi = tHi;
2022            *rLo = tLo;
2023            return;
2024         }
2025
2026         case Iop_Add64: {
2027            HReg xLo, xHi, yLo, yHi;
2028            HReg tHi = newVRegI(env);
2029            HReg tLo = newVRegI(env);
2030            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2031            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2032            addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2033            addInstr(env, ARMInstr_Alu(ARMalu_ADC,  tHi, xHi, ARMRI84_R(yHi)));
2034            *rHi = tHi;
2035            *rLo = tLo;
2036            return;
2037         }
2038
2039         /* 32HLto64(e1,e2) */
2040         case Iop_32HLto64: {
2041            *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2042            *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2043            return;
2044         }
2045
2046         default:
2047            break;
2048      }
2049   }
2050
2051   /* --------- UNARY ops --------- */
2052   if (e->tag == Iex_Unop) {
2053      switch (e->Iex.Unop.op) {
2054
2055         /* ReinterpF64asI64 */
2056         case Iop_ReinterpF64asI64: {
2057            HReg dstHi = newVRegI(env);
2058            HReg dstLo = newVRegI(env);
2059            HReg src   = iselDblExpr(env, e->Iex.Unop.arg);
2060            addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2061            *rHi = dstHi;
2062            *rLo = dstLo;
2063            return;
2064         }
2065
2066         /* Left64(e) */
2067         case Iop_Left64: {
2068            HReg yLo, yHi;
2069            HReg tHi  = newVRegI(env);
2070            HReg tLo  = newVRegI(env);
2071            HReg zero = newVRegI(env);
2072            /* yHi:yLo = arg */
2073            iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2074            /* zero = 0 */
2075            addInstr(env, ARMInstr_Imm32(zero, 0));
2076            /* tLo = 0 - yLo, and set carry */
2077            addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2078                                       tLo, zero, ARMRI84_R(yLo)));
2079            /* tHi = 0 - yHi - carry */
2080            addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2081                                       tHi, zero, ARMRI84_R(yHi)));
2082            /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
2083               back in, so as to give the final result
2084               tHi:tLo = arg | -arg. */
2085            addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2086            addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2087            *rHi = tHi;
2088            *rLo = tLo;
2089            return;
2090         }
2091
2092         /* CmpwNEZ64(e) */
2093         case Iop_CmpwNEZ64: {
2094            HReg srcLo, srcHi;
2095            HReg tmp1 = newVRegI(env);
2096            HReg tmp2 = newVRegI(env);
2097            /* srcHi:srcLo = arg */
2098            iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2099            /* tmp1 = srcHi | srcLo */
2100            addInstr(env, ARMInstr_Alu(ARMalu_OR,
2101                                       tmp1, srcHi, ARMRI84_R(srcLo)));
2102            /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2103            addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2104            addInstr(env, ARMInstr_Alu(ARMalu_OR,
2105                                       tmp2, tmp2, ARMRI84_R(tmp1)));
2106            addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2107                                         tmp2, tmp2, ARMRI5_I5(31)));
2108            *rHi = tmp2;
2109            *rLo = tmp2;
2110            return;
2111         }
2112
2113         case Iop_1Sto64: {
2114            HReg        dst  = newVRegI(env);
2115            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2116            ARMRI5*     amt  = ARMRI5_I5(31);
2117            /* This is really rough.  We could do much better here;
2118               perhaps mvn{cond} dst, #0 as the second insn?
2119               (same applies to 1Sto32) */
2120            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2121            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2122            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2123            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2124            *rHi = dst;
2125            *rLo = dst;
2126            return;
2127         }
2128
2129         default:
2130            break;
2131      }
2132   } /* if (e->tag == Iex_Unop) */
2133
2134   /* --------- MULTIPLEX --------- */
2135   if (e->tag == Iex_ITE) { // VFD
2136      IRType tyC;
2137      HReg   r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2138      ARMCondCode cc;
2139      tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2140      vassert(tyC == Ity_I1);
2141      iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2142      iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2143      dstHi = newVRegI(env);
2144      dstLo = newVRegI(env);
2145      addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2146      addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2147      cc = iselCondCode(env, e->Iex.ITE.cond);
2148      addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2149      addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2150      *rHi = dstHi;
2151      *rLo = dstLo;
2152      return;
2153   }
2154
2155   /* It is convenient sometimes to call iselInt64Expr even when we
2156      have NEON support (e.g. in do_helper_call we need 64-bit
2157      arguments as 2 x 32 regs). */
2158   if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2159      HReg tHi = newVRegI(env);
2160      HReg tLo = newVRegI(env);
2161      HReg tmp = iselNeon64Expr(env, e);
2162      addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2163      *rHi = tHi;
2164      *rLo = tLo;
2165      return ;
2166   }
2167
2168   ppIRExpr(e);
2169   vpanic("iselInt64Expr");
2170}
2171
2172
2173/*---------------------------------------------------------*/
2174/*--- ISEL: Vector (NEON) expressions (64 or 128 bit)   ---*/
2175/*---------------------------------------------------------*/
2176
2177static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2178{
2179   HReg r;
2180   vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
2181   r = iselNeon64Expr_wrk( env, e );
2182   vassert(hregClass(r) == HRcFlt64);
2183   vassert(hregIsVirtual(r));
2184   return r;
2185}
2186
2187/* DO NOT CALL THIS DIRECTLY */
2188static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2189{
2190   IRType ty = typeOfIRExpr(env->type_env, e);
2191   MatchInfo mi;
2192   vassert(e);
2193   vassert(ty == Ity_I64);
2194
2195   if (e->tag == Iex_RdTmp) {
2196      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2197   }
2198
2199   if (e->tag == Iex_Const) {
2200      HReg rLo, rHi;
2201      HReg res = newVRegD(env);
2202      iselInt64Expr(&rHi, &rLo, env, e);
2203      addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2204      return res;
2205   }
2206
2207   /* 64-bit load */
2208   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2209      HReg res = newVRegD(env);
2210      ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2211      vassert(ty == Ity_I64);
2212      addInstr(env, ARMInstr_NLdStD(True, res, am));
2213      return res;
2214   }
2215
2216   /* 64-bit GET */
2217   if (e->tag == Iex_Get) {
2218      HReg addr = newVRegI(env);
2219      HReg res = newVRegD(env);
2220      vassert(ty == Ity_I64);
2221      addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2222      addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2223      return res;
2224   }
2225
2226   /* --------- BINARY ops --------- */
2227   if (e->tag == Iex_Binop) {
2228      switch (e->Iex.Binop.op) {
2229
2230         /* 32 x 32 -> 64 multiply */
2231         case Iop_MullS32:
2232         case Iop_MullU32: {
2233            HReg rLo, rHi;
2234            HReg res = newVRegD(env);
2235            iselInt64Expr(&rHi, &rLo, env, e);
2236            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2237            return res;
2238         }
2239
2240         case Iop_And64: {
2241            HReg res = newVRegD(env);
2242            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2243            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2244            addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2245                                           res, argL, argR, 4, False));
2246            return res;
2247         }
2248         case Iop_Or64: {
2249            HReg res = newVRegD(env);
2250            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2251            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2252            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2253                                           res, argL, argR, 4, False));
2254            return res;
2255         }
2256         case Iop_Xor64: {
2257            HReg res = newVRegD(env);
2258            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2259            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2260            addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2261                                           res, argL, argR, 4, False));
2262            return res;
2263         }
2264
2265         /* 32HLto64(e1,e2) */
2266         case Iop_32HLto64: {
2267            HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2268            HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2269            HReg res = newVRegD(env);
2270            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2271            return res;
2272         }
2273
2274         case Iop_Add8x8:
2275         case Iop_Add16x4:
2276         case Iop_Add32x2:
2277         case Iop_Add64: {
2278            HReg res = newVRegD(env);
2279            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2280            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2281            UInt size;
2282            switch (e->Iex.Binop.op) {
2283               case Iop_Add8x8: size = 0; break;
2284               case Iop_Add16x4: size = 1; break;
2285               case Iop_Add32x2: size = 2; break;
2286               case Iop_Add64: size = 3; break;
2287               default: vassert(0);
2288            }
2289            addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2290                                           res, argL, argR, size, False));
2291            return res;
2292         }
2293         case Iop_Add32Fx2: {
2294            HReg res = newVRegD(env);
2295            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2296            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2297            UInt size = 0;
2298            addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2299                                           res, argL, argR, size, False));
2300            return res;
2301         }
2302         case Iop_RecipStep32Fx2: {
2303            HReg res = newVRegD(env);
2304            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2305            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2306            UInt size = 0;
2307            addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2308                                           res, argL, argR, size, False));
2309            return res;
2310         }
2311         case Iop_RSqrtStep32Fx2: {
2312            HReg res = newVRegD(env);
2313            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2314            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2315            UInt size = 0;
2316            addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2317                                           res, argL, argR, size, False));
2318            return res;
2319         }
2320
2321         // These 6 verified 18 Apr 2013
2322         case Iop_InterleaveHI32x2:
2323         case Iop_InterleaveLO32x2:
2324         case Iop_InterleaveOddLanes8x8:
2325         case Iop_InterleaveEvenLanes8x8:
2326         case Iop_InterleaveOddLanes16x4:
2327         case Iop_InterleaveEvenLanes16x4: {
2328            HReg rD   = newVRegD(env);
2329            HReg rM   = newVRegD(env);
2330            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2331            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2332            UInt size;
2333            Bool resRd;  // is the result in rD or rM ?
2334            switch (e->Iex.Binop.op) {
2335               case Iop_InterleaveOddLanes8x8:   resRd = False; size = 0; break;
2336               case Iop_InterleaveEvenLanes8x8:  resRd = True;  size = 0; break;
2337               case Iop_InterleaveOddLanes16x4:  resRd = False; size = 1; break;
2338               case Iop_InterleaveEvenLanes16x4: resRd = True;  size = 1; break;
2339               case Iop_InterleaveHI32x2:        resRd = False; size = 2; break;
2340               case Iop_InterleaveLO32x2:        resRd = True;  size = 2; break;
2341               default: vassert(0);
2342            }
2343            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2344            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2345            addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2346            return resRd ? rD : rM;
2347         }
2348
2349         // These 4 verified 18 Apr 2013
2350         case Iop_InterleaveHI8x8:
2351         case Iop_InterleaveLO8x8:
2352         case Iop_InterleaveHI16x4:
2353         case Iop_InterleaveLO16x4: {
2354            HReg rD   = newVRegD(env);
2355            HReg rM   = newVRegD(env);
2356            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2357            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2358            UInt size;
2359            Bool resRd;  // is the result in rD or rM ?
2360            switch (e->Iex.Binop.op) {
2361               case Iop_InterleaveHI8x8:  resRd = False; size = 0; break;
2362               case Iop_InterleaveLO8x8:  resRd = True;  size = 0; break;
2363               case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2364               case Iop_InterleaveLO16x4: resRd = True;  size = 1; break;
2365               default: vassert(0);
2366            }
2367            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2368            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2369            addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2370            return resRd ? rD : rM;
2371         }
2372
2373         // These 4 verified 18 Apr 2013
2374         case Iop_CatOddLanes8x8:
2375         case Iop_CatEvenLanes8x8:
2376         case Iop_CatOddLanes16x4:
2377         case Iop_CatEvenLanes16x4: {
2378            HReg rD   = newVRegD(env);
2379            HReg rM   = newVRegD(env);
2380            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2381            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2382            UInt size;
2383            Bool resRd;  // is the result in rD or rM ?
2384            switch (e->Iex.Binop.op) {
2385               case Iop_CatOddLanes8x8:   resRd = False; size = 0; break;
2386               case Iop_CatEvenLanes8x8:  resRd = True;  size = 0; break;
2387               case Iop_CatOddLanes16x4:  resRd = False; size = 1; break;
2388               case Iop_CatEvenLanes16x4: resRd = True;  size = 1; break;
2389               default: vassert(0);
2390            }
2391            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2392            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2393            addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2394            return resRd ? rD : rM;
2395         }
2396
2397         case Iop_QAdd8Ux8:
2398         case Iop_QAdd16Ux4:
2399         case Iop_QAdd32Ux2:
2400         case Iop_QAdd64Ux1: {
2401            HReg res = newVRegD(env);
2402            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2403            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2404            UInt size;
2405            switch (e->Iex.Binop.op) {
2406               case Iop_QAdd8Ux8: size = 0; break;
2407               case Iop_QAdd16Ux4: size = 1; break;
2408               case Iop_QAdd32Ux2: size = 2; break;
2409               case Iop_QAdd64Ux1: size = 3; break;
2410               default: vassert(0);
2411            }
2412            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2413                                           res, argL, argR, size, False));
2414            return res;
2415         }
2416         case Iop_QAdd8Sx8:
2417         case Iop_QAdd16Sx4:
2418         case Iop_QAdd32Sx2:
2419         case Iop_QAdd64Sx1: {
2420            HReg res = newVRegD(env);
2421            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2422            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2423            UInt size;
2424            switch (e->Iex.Binop.op) {
2425               case Iop_QAdd8Sx8: size = 0; break;
2426               case Iop_QAdd16Sx4: size = 1; break;
2427               case Iop_QAdd32Sx2: size = 2; break;
2428               case Iop_QAdd64Sx1: size = 3; break;
2429               default: vassert(0);
2430            }
2431            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2432                                           res, argL, argR, size, False));
2433            return res;
2434         }
2435         case Iop_Sub8x8:
2436         case Iop_Sub16x4:
2437         case Iop_Sub32x2:
2438         case Iop_Sub64: {
2439            HReg res = newVRegD(env);
2440            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2441            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2442            UInt size;
2443            switch (e->Iex.Binop.op) {
2444               case Iop_Sub8x8: size = 0; break;
2445               case Iop_Sub16x4: size = 1; break;
2446               case Iop_Sub32x2: size = 2; break;
2447               case Iop_Sub64: size = 3; break;
2448               default: vassert(0);
2449            }
2450            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2451                                           res, argL, argR, size, False));
2452            return res;
2453         }
2454         case Iop_Sub32Fx2: {
2455            HReg res = newVRegD(env);
2456            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2457            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2458            UInt size = 0;
2459            addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2460                                           res, argL, argR, size, False));
2461            return res;
2462         }
2463         case Iop_QSub8Ux8:
2464         case Iop_QSub16Ux4:
2465         case Iop_QSub32Ux2:
2466         case Iop_QSub64Ux1: {
2467            HReg res = newVRegD(env);
2468            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2469            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2470            UInt size;
2471            switch (e->Iex.Binop.op) {
2472               case Iop_QSub8Ux8: size = 0; break;
2473               case Iop_QSub16Ux4: size = 1; break;
2474               case Iop_QSub32Ux2: size = 2; break;
2475               case Iop_QSub64Ux1: size = 3; break;
2476               default: vassert(0);
2477            }
2478            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2479                                           res, argL, argR, size, False));
2480            return res;
2481         }
2482         case Iop_QSub8Sx8:
2483         case Iop_QSub16Sx4:
2484         case Iop_QSub32Sx2:
2485         case Iop_QSub64Sx1: {
2486            HReg res = newVRegD(env);
2487            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2488            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2489            UInt size;
2490            switch (e->Iex.Binop.op) {
2491               case Iop_QSub8Sx8: size = 0; break;
2492               case Iop_QSub16Sx4: size = 1; break;
2493               case Iop_QSub32Sx2: size = 2; break;
2494               case Iop_QSub64Sx1: size = 3; break;
2495               default: vassert(0);
2496            }
2497            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2498                                           res, argL, argR, size, False));
2499            return res;
2500         }
2501         case Iop_Max8Ux8:
2502         case Iop_Max16Ux4:
2503         case Iop_Max32Ux2: {
2504            HReg res = newVRegD(env);
2505            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2506            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2507            UInt size;
2508            switch (e->Iex.Binop.op) {
2509               case Iop_Max8Ux8: size = 0; break;
2510               case Iop_Max16Ux4: size = 1; break;
2511               case Iop_Max32Ux2: size = 2; break;
2512               default: vassert(0);
2513            }
2514            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2515                                           res, argL, argR, size, False));
2516            return res;
2517         }
2518         case Iop_Max8Sx8:
2519         case Iop_Max16Sx4:
2520         case Iop_Max32Sx2: {
2521            HReg res = newVRegD(env);
2522            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2523            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2524            UInt size;
2525            switch (e->Iex.Binop.op) {
2526               case Iop_Max8Sx8: size = 0; break;
2527               case Iop_Max16Sx4: size = 1; break;
2528               case Iop_Max32Sx2: size = 2; break;
2529               default: vassert(0);
2530            }
2531            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2532                                           res, argL, argR, size, False));
2533            return res;
2534         }
2535         case Iop_Min8Ux8:
2536         case Iop_Min16Ux4:
2537         case Iop_Min32Ux2: {
2538            HReg res = newVRegD(env);
2539            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2540            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2541            UInt size;
2542            switch (e->Iex.Binop.op) {
2543               case Iop_Min8Ux8: size = 0; break;
2544               case Iop_Min16Ux4: size = 1; break;
2545               case Iop_Min32Ux2: size = 2; break;
2546               default: vassert(0);
2547            }
2548            addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2549                                           res, argL, argR, size, False));
2550            return res;
2551         }
2552         case Iop_Min8Sx8:
2553         case Iop_Min16Sx4:
2554         case Iop_Min32Sx2: {
2555            HReg res = newVRegD(env);
2556            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2557            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2558            UInt size;
2559            switch (e->Iex.Binop.op) {
2560               case Iop_Min8Sx8: size = 0; break;
2561               case Iop_Min16Sx4: size = 1; break;
2562               case Iop_Min32Sx2: size = 2; break;
2563               default: vassert(0);
2564            }
2565            addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2566                                           res, argL, argR, size, False));
2567            return res;
2568         }
2569         case Iop_Sar8x8:
2570         case Iop_Sar16x4:
2571         case Iop_Sar32x2: {
2572            HReg res = newVRegD(env);
2573            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2574            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2575            HReg argR2 = newVRegD(env);
2576            HReg zero = newVRegD(env);
2577            UInt size;
2578            switch (e->Iex.Binop.op) {
2579               case Iop_Sar8x8: size = 0; break;
2580               case Iop_Sar16x4: size = 1; break;
2581               case Iop_Sar32x2: size = 2; break;
2582               case Iop_Sar64: size = 3; break;
2583               default: vassert(0);
2584            }
2585            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2586            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2587                                           argR2, zero, argR, size, False));
2588            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2589                                          res, argL, argR2, size, False));
2590            return res;
2591         }
2592         case Iop_Sal8x8:
2593         case Iop_Sal16x4:
2594         case Iop_Sal32x2:
2595         case Iop_Sal64x1: {
2596            HReg res = newVRegD(env);
2597            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2598            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2599            UInt size;
2600            switch (e->Iex.Binop.op) {
2601               case Iop_Sal8x8: size = 0; break;
2602               case Iop_Sal16x4: size = 1; break;
2603               case Iop_Sal32x2: size = 2; break;
2604               case Iop_Sal64x1: size = 3; break;
2605               default: vassert(0);
2606            }
2607            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2608                                          res, argL, argR, size, False));
2609            return res;
2610         }
2611         case Iop_Shr8x8:
2612         case Iop_Shr16x4:
2613         case Iop_Shr32x2: {
2614            HReg res = newVRegD(env);
2615            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2616            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2617            HReg argR2 = newVRegD(env);
2618            HReg zero = newVRegD(env);
2619            UInt size;
2620            switch (e->Iex.Binop.op) {
2621               case Iop_Shr8x8: size = 0; break;
2622               case Iop_Shr16x4: size = 1; break;
2623               case Iop_Shr32x2: size = 2; break;
2624               default: vassert(0);
2625            }
2626            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2627            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2628                                           argR2, zero, argR, size, False));
2629            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2630                                          res, argL, argR2, size, False));
2631            return res;
2632         }
2633         case Iop_Shl8x8:
2634         case Iop_Shl16x4:
2635         case Iop_Shl32x2: {
2636            HReg res = newVRegD(env);
2637            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2638            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2639            UInt size;
2640            switch (e->Iex.Binop.op) {
2641               case Iop_Shl8x8: size = 0; break;
2642               case Iop_Shl16x4: size = 1; break;
2643               case Iop_Shl32x2: size = 2; break;
2644               default: vassert(0);
2645            }
2646            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2647                                          res, argL, argR, size, False));
2648            return res;
2649         }
2650         case Iop_QShl8x8:
2651         case Iop_QShl16x4:
2652         case Iop_QShl32x2:
2653         case Iop_QShl64x1: {
2654            HReg res = newVRegD(env);
2655            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2656            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2657            UInt size;
2658            switch (e->Iex.Binop.op) {
2659               case Iop_QShl8x8: size = 0; break;
2660               case Iop_QShl16x4: size = 1; break;
2661               case Iop_QShl32x2: size = 2; break;
2662               case Iop_QShl64x1: size = 3; break;
2663               default: vassert(0);
2664            }
2665            addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2666                                          res, argL, argR, size, False));
2667            return res;
2668         }
2669         case Iop_QSal8x8:
2670         case Iop_QSal16x4:
2671         case Iop_QSal32x2:
2672         case Iop_QSal64x1: {
2673            HReg res = newVRegD(env);
2674            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2675            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2676            UInt size;
2677            switch (e->Iex.Binop.op) {
2678               case Iop_QSal8x8: size = 0; break;
2679               case Iop_QSal16x4: size = 1; break;
2680               case Iop_QSal32x2: size = 2; break;
2681               case Iop_QSal64x1: size = 3; break;
2682               default: vassert(0);
2683            }
2684            addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2685                                          res, argL, argR, size, False));
2686            return res;
2687         }
2688         case Iop_QShlNsatUU8x8:
2689         case Iop_QShlNsatUU16x4:
2690         case Iop_QShlNsatUU32x2:
2691         case Iop_QShlNsatUU64x1: {
2692            HReg res = newVRegD(env);
2693            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2694            UInt size, imm;
2695            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2696                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2697               vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
2698                      "second argument only\n");
2699            }
2700            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2701            switch (e->Iex.Binop.op) {
2702               case Iop_QShlNsatUU8x8: size = 8 | imm; break;
2703               case Iop_QShlNsatUU16x4: size = 16 | imm; break;
2704               case Iop_QShlNsatUU32x2: size = 32 | imm; break;
2705               case Iop_QShlNsatUU64x1: size = 64 | imm; break;
2706               default: vassert(0);
2707            }
2708            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2709                                          res, argL, size, False));
2710            return res;
2711         }
2712         case Iop_QShlNsatSU8x8:
2713         case Iop_QShlNsatSU16x4:
2714         case Iop_QShlNsatSU32x2:
2715         case Iop_QShlNsatSU64x1: {
2716            HReg res = newVRegD(env);
2717            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2718            UInt size, imm;
2719            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2720                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2721               vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
2722                      "second argument only\n");
2723            }
2724            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2725            switch (e->Iex.Binop.op) {
2726               case Iop_QShlNsatSU8x8: size = 8 | imm; break;
2727               case Iop_QShlNsatSU16x4: size = 16 | imm; break;
2728               case Iop_QShlNsatSU32x2: size = 32 | imm; break;
2729               case Iop_QShlNsatSU64x1: size = 64 | imm; break;
2730               default: vassert(0);
2731            }
2732            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2733                                          res, argL, size, False));
2734            return res;
2735         }
2736         case Iop_QShlNsatSS8x8:
2737         case Iop_QShlNsatSS16x4:
2738         case Iop_QShlNsatSS32x2:
2739         case Iop_QShlNsatSS64x1: {
2740            HReg res = newVRegD(env);
2741            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2742            UInt size, imm;
2743            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2744                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2745               vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
2746                      "second argument only\n");
2747            }
2748            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2749            switch (e->Iex.Binop.op) {
2750               case Iop_QShlNsatSS8x8: size = 8 | imm; break;
2751               case Iop_QShlNsatSS16x4: size = 16 | imm; break;
2752               case Iop_QShlNsatSS32x2: size = 32 | imm; break;
2753               case Iop_QShlNsatSS64x1: size = 64 | imm; break;
2754               default: vassert(0);
2755            }
2756            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2757                                          res, argL, size, False));
2758            return res;
2759         }
2760         case Iop_ShrN8x8:
2761         case Iop_ShrN16x4:
2762         case Iop_ShrN32x2:
2763         case Iop_Shr64: {
2764            HReg res = newVRegD(env);
2765            HReg tmp = newVRegD(env);
2766            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2767            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2768            HReg argR2 = newVRegI(env);
2769            UInt size;
2770            switch (e->Iex.Binop.op) {
2771               case Iop_ShrN8x8: size = 0; break;
2772               case Iop_ShrN16x4: size = 1; break;
2773               case Iop_ShrN32x2: size = 2; break;
2774               case Iop_Shr64: size = 3; break;
2775               default: vassert(0);
2776            }
2777            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2778            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2779            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2780                                          res, argL, tmp, size, False));
2781            return res;
2782         }
2783         case Iop_ShlN8x8:
2784         case Iop_ShlN16x4:
2785         case Iop_ShlN32x2:
2786         case Iop_Shl64: {
2787            HReg res = newVRegD(env);
2788            HReg tmp = newVRegD(env);
2789            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2790            /* special-case Shl64(x, imm8) since the Neon front
2791               end produces a lot of those for V{LD,ST}{1,2,3,4}. */
2792            if (e->Iex.Binop.op == Iop_Shl64
2793                && e->Iex.Binop.arg2->tag == Iex_Const) {
2794               vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
2795               Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2796               if (nshift >= 1 && nshift <= 63) {
2797                  addInstr(env, ARMInstr_NShl64(res, argL, nshift));
2798                  return res;
2799               }
2800               /* else fall through to general case */
2801            }
2802            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2803            UInt size;
2804            switch (e->Iex.Binop.op) {
2805               case Iop_ShlN8x8:  size = 0; break;
2806               case Iop_ShlN16x4: size = 1; break;
2807               case Iop_ShlN32x2: size = 2; break;
2808               case Iop_Shl64:    size = 3; break;
2809               default: vassert(0);
2810            }
2811            addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
2812                                          tmp, argR, 0, False));
2813            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2814                                          res, argL, tmp, size, False));
2815            return res;
2816         }
2817         case Iop_SarN8x8:
2818         case Iop_SarN16x4:
2819         case Iop_SarN32x2:
2820         case Iop_Sar64: {
2821            HReg res = newVRegD(env);
2822            HReg tmp = newVRegD(env);
2823            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2824            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2825            HReg argR2 = newVRegI(env);
2826            UInt size;
2827            switch (e->Iex.Binop.op) {
2828               case Iop_SarN8x8: size = 0; break;
2829               case Iop_SarN16x4: size = 1; break;
2830               case Iop_SarN32x2: size = 2; break;
2831               case Iop_Sar64: size = 3; break;
2832               default: vassert(0);
2833            }
2834            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2835            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2836            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2837                                          res, argL, tmp, size, False));
2838            return res;
2839         }
2840         case Iop_CmpGT8Ux8:
2841         case Iop_CmpGT16Ux4:
2842         case Iop_CmpGT32Ux2: {
2843            HReg res = newVRegD(env);
2844            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2845            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2846            UInt size;
2847            switch (e->Iex.Binop.op) {
2848               case Iop_CmpGT8Ux8: size = 0; break;
2849               case Iop_CmpGT16Ux4: size = 1; break;
2850               case Iop_CmpGT32Ux2: size = 2; break;
2851               default: vassert(0);
2852            }
2853            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2854                                           res, argL, argR, size, False));
2855            return res;
2856         }
2857         case Iop_CmpGT8Sx8:
2858         case Iop_CmpGT16Sx4:
2859         case Iop_CmpGT32Sx2: {
2860            HReg res = newVRegD(env);
2861            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2862            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2863            UInt size;
2864            switch (e->Iex.Binop.op) {
2865               case Iop_CmpGT8Sx8: size = 0; break;
2866               case Iop_CmpGT16Sx4: size = 1; break;
2867               case Iop_CmpGT32Sx2: size = 2; break;
2868               default: vassert(0);
2869            }
2870            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2871                                           res, argL, argR, size, False));
2872            return res;
2873         }
2874         case Iop_CmpEQ8x8:
2875         case Iop_CmpEQ16x4:
2876         case Iop_CmpEQ32x2: {
2877            HReg res = newVRegD(env);
2878            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2879            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2880            UInt size;
2881            switch (e->Iex.Binop.op) {
2882               case Iop_CmpEQ8x8: size = 0; break;
2883               case Iop_CmpEQ16x4: size = 1; break;
2884               case Iop_CmpEQ32x2: size = 2; break;
2885               default: vassert(0);
2886            }
2887            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2888                                           res, argL, argR, size, False));
2889            return res;
2890         }
2891         case Iop_Mul8x8:
2892         case Iop_Mul16x4:
2893         case Iop_Mul32x2: {
2894            HReg res = newVRegD(env);
2895            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2896            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2897            UInt size = 0;
2898            switch(e->Iex.Binop.op) {
2899               case Iop_Mul8x8: size = 0; break;
2900               case Iop_Mul16x4: size = 1; break;
2901               case Iop_Mul32x2: size = 2; break;
2902               default: vassert(0);
2903            }
2904            addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2905                                           res, argL, argR, size, False));
2906            return res;
2907         }
2908         case Iop_Mul32Fx2: {
2909            HReg res = newVRegD(env);
2910            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2911            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2912            UInt size = 0;
2913            addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2914                                           res, argL, argR, size, False));
2915            return res;
2916         }
2917         case Iop_QDMulHi16Sx4:
2918         case Iop_QDMulHi32Sx2: {
2919            HReg res = newVRegD(env);
2920            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2921            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2922            UInt size = 0;
2923            switch(e->Iex.Binop.op) {
2924               case Iop_QDMulHi16Sx4: size = 1; break;
2925               case Iop_QDMulHi32Sx2: size = 2; break;
2926               default: vassert(0);
2927            }
2928            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2929                                           res, argL, argR, size, False));
2930            return res;
2931         }
2932
2933         case Iop_QRDMulHi16Sx4:
2934         case Iop_QRDMulHi32Sx2: {
2935            HReg res = newVRegD(env);
2936            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2937            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2938            UInt size = 0;
2939            switch(e->Iex.Binop.op) {
2940               case Iop_QRDMulHi16Sx4: size = 1; break;
2941               case Iop_QRDMulHi32Sx2: size = 2; break;
2942               default: vassert(0);
2943            }
2944            addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2945                                           res, argL, argR, size, False));
2946            return res;
2947         }
2948
2949         case Iop_PwAdd8x8:
2950         case Iop_PwAdd16x4:
2951         case Iop_PwAdd32x2: {
2952            HReg res = newVRegD(env);
2953            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2954            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2955            UInt size = 0;
2956            switch(e->Iex.Binop.op) {
2957               case Iop_PwAdd8x8: size = 0; break;
2958               case Iop_PwAdd16x4: size = 1; break;
2959               case Iop_PwAdd32x2: size = 2; break;
2960               default: vassert(0);
2961            }
2962            addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2963                                           res, argL, argR, size, False));
2964            return res;
2965         }
2966         case Iop_PwAdd32Fx2: {
2967            HReg res = newVRegD(env);
2968            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2969            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2970            UInt size = 0;
2971            addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2972                                           res, argL, argR, size, False));
2973            return res;
2974         }
2975         case Iop_PwMin8Ux8:
2976         case Iop_PwMin16Ux4:
2977         case Iop_PwMin32Ux2: {
2978            HReg res = newVRegD(env);
2979            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2980            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2981            UInt size = 0;
2982            switch(e->Iex.Binop.op) {
2983               case Iop_PwMin8Ux8: size = 0; break;
2984               case Iop_PwMin16Ux4: size = 1; break;
2985               case Iop_PwMin32Ux2: size = 2; break;
2986               default: vassert(0);
2987            }
2988            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2989                                           res, argL, argR, size, False));
2990            return res;
2991         }
2992         case Iop_PwMin8Sx8:
2993         case Iop_PwMin16Sx4:
2994         case Iop_PwMin32Sx2: {
2995            HReg res = newVRegD(env);
2996            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2997            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2998            UInt size = 0;
2999            switch(e->Iex.Binop.op) {
3000               case Iop_PwMin8Sx8: size = 0; break;
3001               case Iop_PwMin16Sx4: size = 1; break;
3002               case Iop_PwMin32Sx2: size = 2; break;
3003               default: vassert(0);
3004            }
3005            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3006                                           res, argL, argR, size, False));
3007            return res;
3008         }
3009         case Iop_PwMax8Ux8:
3010         case Iop_PwMax16Ux4:
3011         case Iop_PwMax32Ux2: {
3012            HReg res = newVRegD(env);
3013            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3014            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3015            UInt size = 0;
3016            switch(e->Iex.Binop.op) {
3017               case Iop_PwMax8Ux8: size = 0; break;
3018               case Iop_PwMax16Ux4: size = 1; break;
3019               case Iop_PwMax32Ux2: size = 2; break;
3020               default: vassert(0);
3021            }
3022            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3023                                           res, argL, argR, size, False));
3024            return res;
3025         }
3026         case Iop_PwMax8Sx8:
3027         case Iop_PwMax16Sx4:
3028         case Iop_PwMax32Sx2: {
3029            HReg res = newVRegD(env);
3030            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3031            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3032            UInt size = 0;
3033            switch(e->Iex.Binop.op) {
3034               case Iop_PwMax8Sx8: size = 0; break;
3035               case Iop_PwMax16Sx4: size = 1; break;
3036               case Iop_PwMax32Sx2: size = 2; break;
3037               default: vassert(0);
3038            }
3039            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3040                                           res, argL, argR, size, False));
3041            return res;
3042         }
3043         case Iop_Perm8x8: {
3044            HReg res = newVRegD(env);
3045            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3046            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3047            addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3048                                           res, argL, argR, 0, False));
3049            return res;
3050         }
3051         case Iop_PolynomialMul8x8: {
3052            HReg res = newVRegD(env);
3053            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3054            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3055            UInt size = 0;
3056            addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3057                                           res, argL, argR, size, False));
3058            return res;
3059         }
3060         case Iop_Max32Fx2: {
3061            HReg res = newVRegD(env);
3062            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3063            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3064            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3065                                           res, argL, argR, 2, False));
3066            return res;
3067         }
3068         case Iop_Min32Fx2: {
3069            HReg res = newVRegD(env);
3070            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3071            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3072            addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3073                                           res, argL, argR, 2, False));
3074            return res;
3075         }
3076         case Iop_PwMax32Fx2: {
3077            HReg res = newVRegD(env);
3078            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3079            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3080            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3081                                           res, argL, argR, 2, False));
3082            return res;
3083         }
3084         case Iop_PwMin32Fx2: {
3085            HReg res = newVRegD(env);
3086            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3087            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3088            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3089                                           res, argL, argR, 2, False));
3090            return res;
3091         }
3092         case Iop_CmpGT32Fx2: {
3093            HReg res = newVRegD(env);
3094            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3095            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3096            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3097                                           res, argL, argR, 2, False));
3098            return res;
3099         }
3100         case Iop_CmpGE32Fx2: {
3101            HReg res = newVRegD(env);
3102            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3103            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3104            addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3105                                           res, argL, argR, 2, False));
3106            return res;
3107         }
3108         case Iop_CmpEQ32Fx2: {
3109            HReg res = newVRegD(env);
3110            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3111            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3112            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3113                                           res, argL, argR, 2, False));
3114            return res;
3115         }
3116         case Iop_F32ToFixed32Ux2_RZ:
3117         case Iop_F32ToFixed32Sx2_RZ:
3118         case Iop_Fixed32UToF32x2_RN:
3119         case Iop_Fixed32SToF32x2_RN: {
3120            HReg res = newVRegD(env);
3121            HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3122            ARMNeonUnOp op;
3123            UInt imm6;
3124            if (e->Iex.Binop.arg2->tag != Iex_Const ||
3125               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3126                  vpanic("ARM supports FP <-> Fixed conversion with constant "
3127                         "second argument less than 33 only\n");
3128            }
3129            imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3130            vassert(imm6 <= 32 && imm6 > 0);
3131            imm6 = 64 - imm6;
3132            switch(e->Iex.Binop.op) {
3133               case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3134               case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3135               case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3136               case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3137               default: vassert(0);
3138            }
3139            addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3140            return res;
3141         }
3142         /*
3143         FIXME: is this here or not?
3144         case Iop_VDup8x8:
3145         case Iop_VDup16x4:
3146         case Iop_VDup32x2: {
3147            HReg res = newVRegD(env);
3148            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3149            UInt index;
3150            UInt imm4;
3151            UInt size = 0;
3152            if (e->Iex.Binop.arg2->tag != Iex_Const ||
3153               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3154                  vpanic("ARM supports Iop_VDup with constant "
3155                         "second argument less than 16 only\n");
3156            }
3157            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3158            switch(e->Iex.Binop.op) {
3159               case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3160               case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3161               case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3162               default: vassert(0);
3163            }
3164            if (imm4 >= 16) {
3165               vpanic("ARM supports Iop_VDup with constant "
3166                      "second argument less than 16 only\n");
3167            }
3168            addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3169                                          res, argL, imm4, False));
3170            return res;
3171         }
3172         */
3173         default:
3174            break;
3175      }
3176   }
3177
3178   /* --------- UNARY ops --------- */
3179   if (e->tag == Iex_Unop) {
3180      switch (e->Iex.Unop.op) {
3181
3182         /* 32Uto64 */
3183         case Iop_32Uto64: {
3184            HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3185            HReg rHi = newVRegI(env);
3186            HReg res = newVRegD(env);
3187            addInstr(env, ARMInstr_Imm32(rHi, 0));
3188            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3189            return res;
3190         }
3191
3192         /* 32Sto64 */
3193         case Iop_32Sto64: {
3194            HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3195            HReg rHi = newVRegI(env);
3196            addInstr(env, mk_iMOVds_RR(rHi, rLo));
3197            addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3198            HReg res = newVRegD(env);
3199            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3200            return res;
3201         }
3202
3203         /* The next 3 are pass-throughs */
3204         /* ReinterpF64asI64 */
3205         case Iop_ReinterpF64asI64:
3206         /* Left64(e) */
3207         case Iop_Left64:
3208         /* CmpwNEZ64(e) */
3209         case Iop_1Sto64: {
3210            HReg rLo, rHi;
3211            HReg res = newVRegD(env);
3212            iselInt64Expr(&rHi, &rLo, env, e);
3213            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3214            return res;
3215         }
3216
3217         case Iop_Not64: {
3218            DECLARE_PATTERN(p_veqz_8x8);
3219            DECLARE_PATTERN(p_veqz_16x4);
3220            DECLARE_PATTERN(p_veqz_32x2);
3221            DECLARE_PATTERN(p_vcge_8sx8);
3222            DECLARE_PATTERN(p_vcge_16sx4);
3223            DECLARE_PATTERN(p_vcge_32sx2);
3224            DECLARE_PATTERN(p_vcge_8ux8);
3225            DECLARE_PATTERN(p_vcge_16ux4);
3226            DECLARE_PATTERN(p_vcge_32ux2);
3227            DEFINE_PATTERN(p_veqz_8x8,
3228                  unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3229            DEFINE_PATTERN(p_veqz_16x4,
3230                  unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3231            DEFINE_PATTERN(p_veqz_32x2,
3232                  unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3233            DEFINE_PATTERN(p_vcge_8sx8,
3234                  unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3235            DEFINE_PATTERN(p_vcge_16sx4,
3236                  unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3237            DEFINE_PATTERN(p_vcge_32sx2,
3238                  unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3239            DEFINE_PATTERN(p_vcge_8ux8,
3240                  unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3241            DEFINE_PATTERN(p_vcge_16ux4,
3242                  unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3243            DEFINE_PATTERN(p_vcge_32ux2,
3244                  unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3245            if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3246               HReg res = newVRegD(env);
3247               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3248               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3249               return res;
3250            } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3251               HReg res = newVRegD(env);
3252               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3253               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3254               return res;
3255            } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3256               HReg res = newVRegD(env);
3257               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3258               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3259               return res;
3260            } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3261               HReg res = newVRegD(env);
3262               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3263               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3264               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3265                                              res, argL, argR, 0, False));
3266               return res;
3267            } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3268               HReg res = newVRegD(env);
3269               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3270               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3271               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3272                                              res, argL, argR, 1, False));
3273               return res;
3274            } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3275               HReg res = newVRegD(env);
3276               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3277               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3278               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3279                                              res, argL, argR, 2, False));
3280               return res;
3281            } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3282               HReg res = newVRegD(env);
3283               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3284               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3285               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3286                                              res, argL, argR, 0, False));
3287               return res;
3288            } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3289               HReg res = newVRegD(env);
3290               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3291               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3292               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3293                                              res, argL, argR, 1, False));
3294               return res;
3295            } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3296               HReg res = newVRegD(env);
3297               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3298               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3299               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3300                                              res, argL, argR, 2, False));
3301               return res;
3302            } else {
3303               HReg res = newVRegD(env);
3304               HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3305               addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3306               return res;
3307            }
3308         }
3309         case Iop_Dup8x8:
3310         case Iop_Dup16x4:
3311         case Iop_Dup32x2: {
3312            HReg res, arg;
3313            UInt size;
3314            DECLARE_PATTERN(p_vdup_8x8);
3315            DECLARE_PATTERN(p_vdup_16x4);
3316            DECLARE_PATTERN(p_vdup_32x2);
3317            DEFINE_PATTERN(p_vdup_8x8,
3318                  unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3319            DEFINE_PATTERN(p_vdup_16x4,
3320                  unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3321            DEFINE_PATTERN(p_vdup_32x2,
3322                  unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3323            if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3324               UInt index;
3325               UInt imm4;
3326               if (mi.bindee[1]->tag == Iex_Const &&
3327                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3328                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3329                  imm4 = (index << 1) + 1;
3330                  if (index < 8) {
3331                     res = newVRegD(env);
3332                     arg = iselNeon64Expr(env, mi.bindee[0]);
3333                     addInstr(env, ARMInstr_NUnaryS(
3334                                      ARMneon_VDUP,
3335                                      mkARMNRS(ARMNRS_Reg, res, 0),
3336                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3337                                      imm4, False
3338                             ));
3339                     return res;
3340                  }
3341               }
3342            } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3343               UInt index;
3344               UInt imm4;
3345               if (mi.bindee[1]->tag == Iex_Const &&
3346                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3347                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3348                  imm4 = (index << 2) + 2;
3349                  if (index < 4) {
3350                     res = newVRegD(env);
3351                     arg = iselNeon64Expr(env, mi.bindee[0]);
3352                     addInstr(env, ARMInstr_NUnaryS(
3353                                      ARMneon_VDUP,
3354                                      mkARMNRS(ARMNRS_Reg, res, 0),
3355                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3356                                      imm4, False
3357                             ));
3358                     return res;
3359                  }
3360               }
3361            } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3362               UInt index;
3363               UInt imm4;
3364               if (mi.bindee[1]->tag == Iex_Const &&
3365                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3366                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3367                  imm4 = (index << 3) + 4;
3368                  if (index < 2) {
3369                     res = newVRegD(env);
3370                     arg = iselNeon64Expr(env, mi.bindee[0]);
3371                     addInstr(env, ARMInstr_NUnaryS(
3372                                      ARMneon_VDUP,
3373                                      mkARMNRS(ARMNRS_Reg, res, 0),
3374                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3375                                      imm4, False
3376                             ));
3377                     return res;
3378                  }
3379               }
3380            }
3381            arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3382            res = newVRegD(env);
3383            switch (e->Iex.Unop.op) {
3384               case Iop_Dup8x8: size = 0; break;
3385               case Iop_Dup16x4: size = 1; break;
3386               case Iop_Dup32x2: size = 2; break;
3387               default: vassert(0);
3388            }
3389            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3390            return res;
3391         }
3392         case Iop_Abs8x8:
3393         case Iop_Abs16x4:
3394         case Iop_Abs32x2: {
3395            HReg res = newVRegD(env);
3396            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3397            UInt size = 0;
3398            switch(e->Iex.Binop.op) {
3399               case Iop_Abs8x8: size = 0; break;
3400               case Iop_Abs16x4: size = 1; break;
3401               case Iop_Abs32x2: size = 2; break;
3402               default: vassert(0);
3403            }
3404            addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3405            return res;
3406         }
3407         case Iop_Reverse8sIn64_x1:
3408         case Iop_Reverse16sIn64_x1:
3409         case Iop_Reverse32sIn64_x1: {
3410            HReg res = newVRegD(env);
3411            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3412            UInt size = 0;
3413            switch(e->Iex.Binop.op) {
3414               case Iop_Reverse8sIn64_x1: size = 0; break;
3415               case Iop_Reverse16sIn64_x1: size = 1; break;
3416               case Iop_Reverse32sIn64_x1: size = 2; break;
3417               default: vassert(0);
3418            }
3419            addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3420                                          res, arg, size, False));
3421            return res;
3422         }
3423         case Iop_Reverse8sIn32_x2:
3424         case Iop_Reverse16sIn32_x2: {
3425            HReg res = newVRegD(env);
3426            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3427            UInt size = 0;
3428            switch(e->Iex.Binop.op) {
3429               case Iop_Reverse8sIn32_x2: size = 0; break;
3430               case Iop_Reverse16sIn32_x2: size = 1; break;
3431               default: vassert(0);
3432            }
3433            addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3434                                          res, arg, size, False));
3435            return res;
3436         }
3437         case Iop_Reverse8sIn16_x4: {
3438            HReg res = newVRegD(env);
3439            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3440            UInt size = 0;
3441            addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3442                                          res, arg, size, False));
3443            return res;
3444         }
3445         case Iop_CmpwNEZ64: {
3446            HReg x_lsh = newVRegD(env);
3447            HReg x_rsh = newVRegD(env);
3448            HReg lsh_amt = newVRegD(env);
3449            HReg rsh_amt = newVRegD(env);
3450            HReg zero = newVRegD(env);
3451            HReg tmp = newVRegD(env);
3452            HReg tmp2 = newVRegD(env);
3453            HReg res = newVRegD(env);
3454            HReg x = newVRegD(env);
3455            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3456            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3457            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3458            addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3459            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3460            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3461                                           rsh_amt, zero, lsh_amt, 2, False));
3462            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3463                                          x_lsh, x, lsh_amt, 3, False));
3464            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3465                                          x_rsh, x, rsh_amt, 3, False));
3466            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3467                                           tmp, x_lsh, x_rsh, 0, False));
3468            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3469                                           res, tmp, x, 0, False));
3470            return res;
3471         }
3472         case Iop_CmpNEZ8x8:
3473         case Iop_CmpNEZ16x4:
3474         case Iop_CmpNEZ32x2: {
3475            HReg res = newVRegD(env);
3476            HReg tmp = newVRegD(env);
3477            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3478            UInt size;
3479            switch (e->Iex.Unop.op) {
3480               case Iop_CmpNEZ8x8: size = 0; break;
3481               case Iop_CmpNEZ16x4: size = 1; break;
3482               case Iop_CmpNEZ32x2: size = 2; break;
3483               default: vassert(0);
3484            }
3485            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3486            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3487            return res;
3488         }
3489         case Iop_NarrowUn16to8x8:
3490         case Iop_NarrowUn32to16x4:
3491         case Iop_NarrowUn64to32x2: {
3492            HReg res = newVRegD(env);
3493            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3494            UInt size = 0;
3495            switch(e->Iex.Binop.op) {
3496               case Iop_NarrowUn16to8x8:  size = 0; break;
3497               case Iop_NarrowUn32to16x4: size = 1; break;
3498               case Iop_NarrowUn64to32x2: size = 2; break;
3499               default: vassert(0);
3500            }
3501            addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3502                                          res, arg, size, False));
3503            return res;
3504         }
3505         case Iop_QNarrowUn16Sto8Sx8:
3506         case Iop_QNarrowUn32Sto16Sx4:
3507         case Iop_QNarrowUn64Sto32Sx2: {
3508            HReg res = newVRegD(env);
3509            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3510            UInt size = 0;
3511            switch(e->Iex.Binop.op) {
3512               case Iop_QNarrowUn16Sto8Sx8:  size = 0; break;
3513               case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3514               case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
3515               default: vassert(0);
3516            }
3517            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3518                                          res, arg, size, False));
3519            return res;
3520         }
3521         case Iop_QNarrowUn16Sto8Ux8:
3522         case Iop_QNarrowUn32Sto16Ux4:
3523         case Iop_QNarrowUn64Sto32Ux2: {
3524            HReg res = newVRegD(env);
3525            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3526            UInt size = 0;
3527            switch(e->Iex.Binop.op) {
3528               case Iop_QNarrowUn16Sto8Ux8:  size = 0; break;
3529               case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3530               case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
3531               default: vassert(0);
3532            }
3533            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3534                                          res, arg, size, False));
3535            return res;
3536         }
3537         case Iop_QNarrowUn16Uto8Ux8:
3538         case Iop_QNarrowUn32Uto16Ux4:
3539         case Iop_QNarrowUn64Uto32Ux2: {
3540            HReg res = newVRegD(env);
3541            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3542            UInt size = 0;
3543            switch(e->Iex.Binop.op) {
3544               case Iop_QNarrowUn16Uto8Ux8:  size = 0; break;
3545               case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3546               case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
3547               default: vassert(0);
3548            }
3549            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3550                                          res, arg, size, False));
3551            return res;
3552         }
3553         case Iop_PwAddL8Sx8:
3554         case Iop_PwAddL16Sx4:
3555         case Iop_PwAddL32Sx2: {
3556            HReg res = newVRegD(env);
3557            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3558            UInt size = 0;
3559            switch(e->Iex.Binop.op) {
3560               case Iop_PwAddL8Sx8: size = 0; break;
3561               case Iop_PwAddL16Sx4: size = 1; break;
3562               case Iop_PwAddL32Sx2: size = 2; break;
3563               default: vassert(0);
3564            }
3565            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3566                                          res, arg, size, False));
3567            return res;
3568         }
3569         case Iop_PwAddL8Ux8:
3570         case Iop_PwAddL16Ux4:
3571         case Iop_PwAddL32Ux2: {
3572            HReg res = newVRegD(env);
3573            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3574            UInt size = 0;
3575            switch(e->Iex.Binop.op) {
3576               case Iop_PwAddL8Ux8: size = 0; break;
3577               case Iop_PwAddL16Ux4: size = 1; break;
3578               case Iop_PwAddL32Ux2: size = 2; break;
3579               default: vassert(0);
3580            }
3581            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3582                                          res, arg, size, False));
3583            return res;
3584         }
3585         case Iop_Cnt8x8: {
3586            HReg res = newVRegD(env);
3587            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3588            UInt size = 0;
3589            addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3590                                          res, arg, size, False));
3591            return res;
3592         }
3593         case Iop_Clz8x8:
3594         case Iop_Clz16x4:
3595         case Iop_Clz32x2: {
3596            HReg res = newVRegD(env);
3597            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3598            UInt size = 0;
3599            switch(e->Iex.Binop.op) {
3600               case Iop_Clz8x8: size = 0; break;
3601               case Iop_Clz16x4: size = 1; break;
3602               case Iop_Clz32x2: size = 2; break;
3603               default: vassert(0);
3604            }
3605            addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3606                                          res, arg, size, False));
3607            return res;
3608         }
3609         case Iop_Cls8x8:
3610         case Iop_Cls16x4:
3611         case Iop_Cls32x2: {
3612            HReg res = newVRegD(env);
3613            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3614            UInt size = 0;
3615            switch(e->Iex.Binop.op) {
3616               case Iop_Cls8x8: size = 0; break;
3617               case Iop_Cls16x4: size = 1; break;
3618               case Iop_Cls32x2: size = 2; break;
3619               default: vassert(0);
3620            }
3621            addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3622                                          res, arg, size, False));
3623            return res;
3624         }
3625         case Iop_FtoI32Sx2_RZ: {
3626            HReg res = newVRegD(env);
3627            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3628            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3629                                          res, arg, 2, False));
3630            return res;
3631         }
3632         case Iop_FtoI32Ux2_RZ: {
3633            HReg res = newVRegD(env);
3634            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3635            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3636                                          res, arg, 2, False));
3637            return res;
3638         }
3639         case Iop_I32StoFx2: {
3640            HReg res = newVRegD(env);
3641            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3642            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3643                                          res, arg, 2, False));
3644            return res;
3645         }
3646         case Iop_I32UtoFx2: {
3647            HReg res = newVRegD(env);
3648            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3649            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3650                                          res, arg, 2, False));
3651            return res;
3652         }
3653         case Iop_F32toF16x4: {
3654            HReg res = newVRegD(env);
3655            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3656            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3657                                          res, arg, 2, False));
3658            return res;
3659         }
3660         case Iop_RecipEst32Fx2: {
3661            HReg res = newVRegD(env);
3662            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3663            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3664                                          res, argL, 0, False));
3665            return res;
3666         }
3667         case Iop_RecipEst32Ux2: {
3668            HReg res = newVRegD(env);
3669            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3670            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3671                                          res, argL, 0, False));
3672            return res;
3673         }
3674         case Iop_Abs32Fx2: {
3675            DECLARE_PATTERN(p_vabd_32fx2);
3676            DEFINE_PATTERN(p_vabd_32fx2,
3677                           unop(Iop_Abs32Fx2,
3678                                binop(Iop_Sub32Fx2,
3679                                      bind(0),
3680                                      bind(1))));
3681            if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3682               HReg res = newVRegD(env);
3683               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3684               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3685               addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3686                                              res, argL, argR, 0, False));
3687               return res;
3688            } else {
3689               HReg res = newVRegD(env);
3690               HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3691               addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3692                                             res, arg, 0, False));
3693               return res;
3694            }
3695         }
3696         case Iop_RSqrtEst32Fx2: {
3697            HReg res = newVRegD(env);
3698            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3699            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3700                                          res, arg, 0, False));
3701            return res;
3702         }
3703         case Iop_RSqrtEst32Ux2: {
3704            HReg res = newVRegD(env);
3705            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3706            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3707                                          res, arg, 0, False));
3708            return res;
3709         }
3710         case Iop_Neg32Fx2: {
3711            HReg res = newVRegD(env);
3712            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3713            addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3714                                          res, arg, 0, False));
3715            return res;
3716         }
3717         default:
3718            break;
3719      }
3720   } /* if (e->tag == Iex_Unop) */
3721
3722   if (e->tag == Iex_Triop) {
3723      IRTriop *triop = e->Iex.Triop.details;
3724
3725      switch (triop->op) {
3726         case Iop_Slice64: {
3727            HReg res = newVRegD(env);
3728            HReg argL = iselNeon64Expr(env, triop->arg2);
3729            HReg argR = iselNeon64Expr(env, triop->arg1);
3730            UInt imm4;
3731            if (triop->arg3->tag != Iex_Const ||
3732                typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
3733               vpanic("ARM target supports Iop_Extract64 with constant "
3734                      "third argument less than 16 only\n");
3735            }
3736            imm4 = triop->arg3->Iex.Const.con->Ico.U8;
3737            if (imm4 >= 8) {
3738               vpanic("ARM target supports Iop_Extract64 with constant "
3739                      "third argument less than 16 only\n");
3740            }
3741            addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3742                                           res, argL, argR, imm4, False));
3743            return res;
3744         }
3745         case Iop_SetElem8x8:
3746         case Iop_SetElem16x4:
3747         case Iop_SetElem32x2: {
3748            HReg res = newVRegD(env);
3749            HReg dreg = iselNeon64Expr(env, triop->arg1);
3750            HReg arg = iselIntExpr_R(env, triop->arg3);
3751            UInt index, size;
3752            if (triop->arg2->tag != Iex_Const ||
3753                typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
3754               vpanic("ARM target supports SetElem with constant "
3755                      "second argument only\n");
3756            }
3757            index = triop->arg2->Iex.Const.con->Ico.U8;
3758            switch (triop->op) {
3759               case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3760               case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3761               case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3762               default: vassert(0);
3763            }
3764            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3765            addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3766                                           mkARMNRS(ARMNRS_Scalar, res, index),
3767                                           mkARMNRS(ARMNRS_Reg, arg, 0),
3768                                           size, False));
3769            return res;
3770         }
3771         default:
3772            break;
3773      }
3774   }
3775
3776   /* --------- MULTIPLEX --------- */
3777   if (e->tag == Iex_ITE) { // VFD
3778      HReg rLo, rHi;
3779      HReg res = newVRegD(env);
3780      iselInt64Expr(&rHi, &rLo, env, e);
3781      addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3782      return res;
3783   }
3784
3785   ppIRExpr(e);
3786   vpanic("iselNeon64Expr");
3787}
3788
3789
3790static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3791{
3792   HReg r;
3793   vassert(env->hwcaps & VEX_HWCAPS_ARM_NEON);
3794   r = iselNeonExpr_wrk( env, e );
3795   vassert(hregClass(r) == HRcVec128);
3796   vassert(hregIsVirtual(r));
3797   return r;
3798}
3799
3800/* DO NOT CALL THIS DIRECTLY */
3801static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3802{
3803   IRType ty = typeOfIRExpr(env->type_env, e);
3804   MatchInfo mi;
3805   vassert(e);
3806   vassert(ty == Ity_V128);
3807
3808   if (e->tag == Iex_RdTmp) {
3809      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3810   }
3811
3812   if (e->tag == Iex_Const) {
3813      /* At the moment there should be no 128-bit constants in IR for ARM
3814         generated during disassemble. They are represented as Iop_64HLtoV128
3815         binary operation and are handled among binary ops. */
3816      /* But zero can be created by valgrind internal optimizer */
3817      if (e->Iex.Const.con->Ico.V128 == 0x0000) {
3818         HReg res = newVRegV(env);
3819         addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 0)));
3820         return res;
3821      }
3822      if (e->Iex.Const.con->Ico.V128 == 0xFFFF) {
3823         HReg res = newVRegV(env);
3824         addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(6, 255)));
3825         return res;
3826      }
3827      ppIRExpr(e);
3828      vpanic("128-bit constant is not implemented");
3829   }
3830
3831   if (e->tag == Iex_Load) {
3832      HReg res = newVRegV(env);
3833      ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3834      vassert(ty == Ity_V128);
3835      addInstr(env, ARMInstr_NLdStQ(True, res, am));
3836      return res;
3837   }
3838
3839   if (e->tag == Iex_Get) {
3840      HReg addr = newVRegI(env);
3841      HReg res = newVRegV(env);
3842      vassert(ty == Ity_V128);
3843      addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3844      addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3845      return res;
3846   }
3847
3848   if (e->tag == Iex_Unop) {
3849      switch (e->Iex.Unop.op) {
3850         case Iop_NotV128: {
3851            DECLARE_PATTERN(p_veqz_8x16);
3852            DECLARE_PATTERN(p_veqz_16x8);
3853            DECLARE_PATTERN(p_veqz_32x4);
3854            DECLARE_PATTERN(p_vcge_8sx16);
3855            DECLARE_PATTERN(p_vcge_16sx8);
3856            DECLARE_PATTERN(p_vcge_32sx4);
3857            DECLARE_PATTERN(p_vcge_8ux16);
3858            DECLARE_PATTERN(p_vcge_16ux8);
3859            DECLARE_PATTERN(p_vcge_32ux4);
3860            DEFINE_PATTERN(p_veqz_8x16,
3861                  unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3862            DEFINE_PATTERN(p_veqz_16x8,
3863                  unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3864            DEFINE_PATTERN(p_veqz_32x4,
3865                  unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3866            DEFINE_PATTERN(p_vcge_8sx16,
3867                  unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3868            DEFINE_PATTERN(p_vcge_16sx8,
3869                  unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3870            DEFINE_PATTERN(p_vcge_32sx4,
3871                  unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3872            DEFINE_PATTERN(p_vcge_8ux16,
3873                  unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3874            DEFINE_PATTERN(p_vcge_16ux8,
3875                  unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3876            DEFINE_PATTERN(p_vcge_32ux4,
3877                  unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3878            if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3879               HReg res = newVRegV(env);
3880               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3881               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3882               return res;
3883            } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3884               HReg res = newVRegV(env);
3885               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3886               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3887               return res;
3888            } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3889               HReg res = newVRegV(env);
3890               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3891               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3892               return res;
3893            } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3894               HReg res = newVRegV(env);
3895               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3896               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3897               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3898                                              res, argL, argR, 0, True));
3899               return res;
3900            } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3901               HReg res = newVRegV(env);
3902               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3903               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3904               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3905                                              res, argL, argR, 1, True));
3906               return res;
3907            } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3908               HReg res = newVRegV(env);
3909               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3910               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3911               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3912                                              res, argL, argR, 2, True));
3913               return res;
3914            } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3915               HReg res = newVRegV(env);
3916               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3917               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3918               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3919                                              res, argL, argR, 0, True));
3920               return res;
3921            } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3922               HReg res = newVRegV(env);
3923               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3924               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3925               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3926                                              res, argL, argR, 1, True));
3927               return res;
3928            } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3929               HReg res = newVRegV(env);
3930               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3931               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3932               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3933                                              res, argL, argR, 2, True));
3934               return res;
3935            } else {
3936               HReg res = newVRegV(env);
3937               HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3938               addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3939               return res;
3940            }
3941         }
3942         case Iop_Dup8x16:
3943         case Iop_Dup16x8:
3944         case Iop_Dup32x4: {
3945            HReg res, arg;
3946            UInt size;
3947            DECLARE_PATTERN(p_vdup_8x16);
3948            DECLARE_PATTERN(p_vdup_16x8);
3949            DECLARE_PATTERN(p_vdup_32x4);
3950            DEFINE_PATTERN(p_vdup_8x16,
3951                  unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3952            DEFINE_PATTERN(p_vdup_16x8,
3953                  unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3954            DEFINE_PATTERN(p_vdup_32x4,
3955                  unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3956            if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3957               UInt index;
3958               UInt imm4;
3959               if (mi.bindee[1]->tag == Iex_Const &&
3960                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3961                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3962                  imm4 = (index << 1) + 1;
3963                  if (index < 8) {
3964                     res = newVRegV(env);
3965                     arg = iselNeon64Expr(env, mi.bindee[0]);
3966                     addInstr(env, ARMInstr_NUnaryS(
3967                                      ARMneon_VDUP,
3968                                      mkARMNRS(ARMNRS_Reg, res, 0),
3969                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3970                                      imm4, True
3971                             ));
3972                     return res;
3973                  }
3974               }
3975            } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3976               UInt index;
3977               UInt imm4;
3978               if (mi.bindee[1]->tag == Iex_Const &&
3979                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3980                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3981                  imm4 = (index << 2) + 2;
3982                  if (index < 4) {
3983                     res = newVRegV(env);
3984                     arg = iselNeon64Expr(env, mi.bindee[0]);
3985                     addInstr(env, ARMInstr_NUnaryS(
3986                                      ARMneon_VDUP,
3987                                      mkARMNRS(ARMNRS_Reg, res, 0),
3988                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3989                                      imm4, True
3990                             ));
3991                     return res;
3992                  }
3993               }
3994            } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3995               UInt index;
3996               UInt imm4;
3997               if (mi.bindee[1]->tag == Iex_Const &&
3998                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3999                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4000                  imm4 = (index << 3) + 4;
4001                  if (index < 2) {
4002                     res = newVRegV(env);
4003                     arg = iselNeon64Expr(env, mi.bindee[0]);
4004                     addInstr(env, ARMInstr_NUnaryS(
4005                                      ARMneon_VDUP,
4006                                      mkARMNRS(ARMNRS_Reg, res, 0),
4007                                      mkARMNRS(ARMNRS_Scalar, arg, index),
4008                                      imm4, True
4009                             ));
4010                     return res;
4011                  }
4012               }
4013            }
4014            arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4015            res = newVRegV(env);
4016            switch (e->Iex.Unop.op) {
4017               case Iop_Dup8x16: size = 0; break;
4018               case Iop_Dup16x8: size = 1; break;
4019               case Iop_Dup32x4: size = 2; break;
4020               default: vassert(0);
4021            }
4022            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4023            return res;
4024         }
4025         case Iop_Abs8x16:
4026         case Iop_Abs16x8:
4027         case Iop_Abs32x4: {
4028            HReg res = newVRegV(env);
4029            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4030            UInt size = 0;
4031            switch(e->Iex.Binop.op) {
4032               case Iop_Abs8x16: size = 0; break;
4033               case Iop_Abs16x8: size = 1; break;
4034               case Iop_Abs32x4: size = 2; break;
4035               default: vassert(0);
4036            }
4037            addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4038            return res;
4039         }
4040         case Iop_Reverse8sIn64_x2:
4041         case Iop_Reverse16sIn64_x2:
4042         case Iop_Reverse32sIn64_x2: {
4043            HReg res = newVRegV(env);
4044            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4045            UInt size = 0;
4046            switch(e->Iex.Binop.op) {
4047               case Iop_Reverse8sIn64_x2: size = 0; break;
4048               case Iop_Reverse16sIn64_x2: size = 1; break;
4049               case Iop_Reverse32sIn64_x2: size = 2; break;
4050               default: vassert(0);
4051            }
4052            addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4053                                          res, arg, size, True));
4054            return res;
4055         }
4056         case Iop_Reverse8sIn32_x4:
4057         case Iop_Reverse16sIn32_x4: {
4058            HReg res = newVRegV(env);
4059            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4060            UInt size = 0;
4061            switch(e->Iex.Binop.op) {
4062               case Iop_Reverse8sIn32_x4: size = 0; break;
4063               case Iop_Reverse16sIn32_x4: size = 1; break;
4064               default: vassert(0);
4065            }
4066            addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4067                                          res, arg, size, True));
4068            return res;
4069         }
4070         case Iop_Reverse8sIn16_x8: {
4071            HReg res = newVRegV(env);
4072            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4073            UInt size = 0;
4074            addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4075                                          res, arg, size, True));
4076            return res;
4077         }
4078         case Iop_CmpNEZ64x2: {
4079            HReg x_lsh = newVRegV(env);
4080            HReg x_rsh = newVRegV(env);
4081            HReg lsh_amt = newVRegV(env);
4082            HReg rsh_amt = newVRegV(env);
4083            HReg zero = newVRegV(env);
4084            HReg tmp = newVRegV(env);
4085            HReg tmp2 = newVRegV(env);
4086            HReg res = newVRegV(env);
4087            HReg x = newVRegV(env);
4088            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4089            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4090            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4091            addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4092            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4093            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4094                                           rsh_amt, zero, lsh_amt, 2, True));
4095            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4096                                          x_lsh, x, lsh_amt, 3, True));
4097            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4098                                          x_rsh, x, rsh_amt, 3, True));
4099            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4100                                           tmp, x_lsh, x_rsh, 0, True));
4101            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4102                                           res, tmp, x, 0, True));
4103            return res;
4104         }
4105         case Iop_CmpNEZ8x16:
4106         case Iop_CmpNEZ16x8:
4107         case Iop_CmpNEZ32x4: {
4108            HReg res = newVRegV(env);
4109            HReg tmp = newVRegV(env);
4110            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4111            UInt size;
4112            switch (e->Iex.Unop.op) {
4113               case Iop_CmpNEZ8x16: size = 0; break;
4114               case Iop_CmpNEZ16x8: size = 1; break;
4115               case Iop_CmpNEZ32x4: size = 2; break;
4116               default: vassert(0);
4117            }
4118            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4119            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4120            return res;
4121         }
4122         case Iop_Widen8Uto16x8:
4123         case Iop_Widen16Uto32x4:
4124         case Iop_Widen32Uto64x2: {
4125            HReg res = newVRegV(env);
4126            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4127            UInt size;
4128            switch (e->Iex.Unop.op) {
4129               case Iop_Widen8Uto16x8:  size = 0; break;
4130               case Iop_Widen16Uto32x4: size = 1; break;
4131               case Iop_Widen32Uto64x2: size = 2; break;
4132               default: vassert(0);
4133            }
4134            addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4135                                          res, arg, size, True));
4136            return res;
4137         }
4138         case Iop_Widen8Sto16x8:
4139         case Iop_Widen16Sto32x4:
4140         case Iop_Widen32Sto64x2: {
4141            HReg res = newVRegV(env);
4142            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4143            UInt size;
4144            switch (e->Iex.Unop.op) {
4145               case Iop_Widen8Sto16x8:  size = 0; break;
4146               case Iop_Widen16Sto32x4: size = 1; break;
4147               case Iop_Widen32Sto64x2: size = 2; break;
4148               default: vassert(0);
4149            }
4150            addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4151                                          res, arg, size, True));
4152            return res;
4153         }
4154         case Iop_PwAddL8Sx16:
4155         case Iop_PwAddL16Sx8:
4156         case Iop_PwAddL32Sx4: {
4157            HReg res = newVRegV(env);
4158            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4159            UInt size = 0;
4160            switch(e->Iex.Binop.op) {
4161               case Iop_PwAddL8Sx16: size = 0; break;
4162               case Iop_PwAddL16Sx8: size = 1; break;
4163               case Iop_PwAddL32Sx4: size = 2; break;
4164               default: vassert(0);
4165            }
4166            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4167                                          res, arg, size, True));
4168            return res;
4169         }
4170         case Iop_PwAddL8Ux16:
4171         case Iop_PwAddL16Ux8:
4172         case Iop_PwAddL32Ux4: {
4173            HReg res = newVRegV(env);
4174            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4175            UInt size = 0;
4176            switch(e->Iex.Binop.op) {
4177               case Iop_PwAddL8Ux16: size = 0; break;
4178               case Iop_PwAddL16Ux8: size = 1; break;
4179               case Iop_PwAddL32Ux4: size = 2; break;
4180               default: vassert(0);
4181            }
4182            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4183                                          res, arg, size, True));
4184            return res;
4185         }
4186         case Iop_Cnt8x16: {
4187            HReg res = newVRegV(env);
4188            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4189            UInt size = 0;
4190            addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4191            return res;
4192         }
4193         case Iop_Clz8x16:
4194         case Iop_Clz16x8:
4195         case Iop_Clz32x4: {
4196            HReg res = newVRegV(env);
4197            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4198            UInt size = 0;
4199            switch(e->Iex.Binop.op) {
4200               case Iop_Clz8x16: size = 0; break;
4201               case Iop_Clz16x8: size = 1; break;
4202               case Iop_Clz32x4: size = 2; break;
4203               default: vassert(0);
4204            }
4205            addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4206            return res;
4207         }
4208         case Iop_Cls8x16:
4209         case Iop_Cls16x8:
4210         case Iop_Cls32x4: {
4211            HReg res = newVRegV(env);
4212            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4213            UInt size = 0;
4214            switch(e->Iex.Binop.op) {
4215               case Iop_Cls8x16: size = 0; break;
4216               case Iop_Cls16x8: size = 1; break;
4217               case Iop_Cls32x4: size = 2; break;
4218               default: vassert(0);
4219            }
4220            addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4221            return res;
4222         }
4223         case Iop_FtoI32Sx4_RZ: {
4224            HReg res = newVRegV(env);
4225            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4226            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4227                                          res, arg, 2, True));
4228            return res;
4229         }
4230         case Iop_FtoI32Ux4_RZ: {
4231            HReg res = newVRegV(env);
4232            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4233            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4234                                          res, arg, 2, True));
4235            return res;
4236         }
4237         case Iop_I32StoFx4: {
4238            HReg res = newVRegV(env);
4239            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4240            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4241                                          res, arg, 2, True));
4242            return res;
4243         }
4244         case Iop_I32UtoFx4: {
4245            HReg res = newVRegV(env);
4246            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4247            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4248                                          res, arg, 2, True));
4249            return res;
4250         }
4251         case Iop_F16toF32x4: {
4252            HReg res = newVRegV(env);
4253            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4254            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4255                                          res, arg, 2, True));
4256            return res;
4257         }
4258         case Iop_RecipEst32Fx4: {
4259            HReg res = newVRegV(env);
4260            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4261            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4262                                          res, argL, 0, True));
4263            return res;
4264         }
4265         case Iop_RecipEst32Ux4: {
4266            HReg res = newVRegV(env);
4267            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4268            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4269                                          res, argL, 0, True));
4270            return res;
4271         }
4272         case Iop_Abs32Fx4: {
4273            HReg res = newVRegV(env);
4274            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4275            addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4276                                          res, argL, 0, True));
4277            return res;
4278         }
4279         case Iop_RSqrtEst32Fx4: {
4280            HReg res = newVRegV(env);
4281            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4282            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4283                                          res, argL, 0, True));
4284            return res;
4285         }
4286         case Iop_RSqrtEst32Ux4: {
4287            HReg res = newVRegV(env);
4288            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4289            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4290                                          res, argL, 0, True));
4291            return res;
4292         }
4293         case Iop_Neg32Fx4: {
4294            HReg res = newVRegV(env);
4295            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4296            addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4297                                          res, arg, 0, True));
4298            return res;
4299         }
4300         /* ... */
4301         default:
4302            break;
4303      }
4304   }
4305
4306   if (e->tag == Iex_Binop) {
4307      switch (e->Iex.Binop.op) {
4308         case Iop_64HLtoV128:
4309            /* Try to match into single "VMOV reg, imm" instruction */
4310            if (e->Iex.Binop.arg1->tag == Iex_Const &&
4311                e->Iex.Binop.arg2->tag == Iex_Const &&
4312                typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4313                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4314                e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4315                           e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4316               ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4317               ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4318               if (imm) {
4319                  HReg res = newVRegV(env);
4320                  addInstr(env, ARMInstr_NeonImm(res, imm));
4321                  return res;
4322               }
4323               if ((imm64 >> 32) == 0LL &&
4324                   (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4325                  HReg tmp1 = newVRegV(env);
4326                  HReg tmp2 = newVRegV(env);
4327                  HReg res = newVRegV(env);
4328                  if (imm->type < 10) {
4329                     addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4330                     addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4331                     addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4332                                                    res, tmp1, tmp2, 4, True));
4333                     return res;
4334                  }
4335               }
4336               if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4337                   (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4338                  HReg tmp1 = newVRegV(env);
4339                  HReg tmp2 = newVRegV(env);
4340                  HReg res = newVRegV(env);
4341                  if (imm->type < 10) {
4342                     addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4343                     addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4344                     addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4345                                                    res, tmp1, tmp2, 4, True));
4346                     return res;
4347                  }
4348               }
4349            }
4350            /* Does not match "VMOV Reg, Imm" form.  We'll have to do
4351               it the slow way. */
4352            {
4353               /* local scope */
4354               /* Done via the stack for ease of use. */
4355               /* FIXME: assumes little endian host */
4356               HReg       w3, w2, w1, w0;
4357               HReg       res  = newVRegV(env);
4358               ARMAMode1* sp_0  = ARMAMode1_RI(hregARM_R13(), 0);
4359               ARMAMode1* sp_4  = ARMAMode1_RI(hregARM_R13(), 4);
4360               ARMAMode1* sp_8  = ARMAMode1_RI(hregARM_R13(), 8);
4361               ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
4362               ARMRI84*   c_16  = ARMRI84_I84(16,0);
4363               /* Make space for SP */
4364               addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
4365                                                      hregARM_R13(), c_16));
4366
4367               /* Store the less significant 64 bits */
4368               iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
4369               addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4370                                             w0, sp_0));
4371               addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4372                                             w1, sp_4));
4373
4374               /* Store the more significant 64 bits */
4375               iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
4376               addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4377                                             w2, sp_8));
4378               addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*store*/,
4379                                             w3, sp_12));
4380
4381                /* Load result back from stack. */
4382                addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
4383                                              mkARMAModeN_R(hregARM_R13())));
4384
4385                /* Restore SP */
4386                addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
4387                                           hregARM_R13(), c_16));
4388                return res;
4389            } /* local scope */
4390            goto neon_expr_bad;
4391         case Iop_AndV128: {
4392            HReg res = newVRegV(env);
4393            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4394            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4395            addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4396                                           res, argL, argR, 4, True));
4397            return res;
4398         }
4399         case Iop_OrV128: {
4400            HReg res = newVRegV(env);
4401            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4402            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4403            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4404                                           res, argL, argR, 4, True));
4405            return res;
4406         }
4407         case Iop_XorV128: {
4408            HReg res = newVRegV(env);
4409            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4410            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4411            addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4412                                           res, argL, argR, 4, True));
4413            return res;
4414         }
4415         case Iop_Add8x16:
4416         case Iop_Add16x8:
4417         case Iop_Add32x4:
4418         case Iop_Add64x2: {
4419            /*
4420            FIXME: remove this if not used
4421            DECLARE_PATTERN(p_vrhadd_32sx4);
4422            ULong one = (1LL << 32) | 1LL;
4423            DEFINE_PATTERN(p_vrhadd_32sx4,
4424                  binop(Iop_Add32x4,
4425                        binop(Iop_Add32x4,
4426                              binop(Iop_SarN32x4,
4427                                    bind(0),
4428                                    mkU8(1)),
4429                              binop(Iop_SarN32x4,
4430                                    bind(1),
4431                                    mkU8(1))),
4432                        binop(Iop_SarN32x4,
4433                              binop(Iop_Add32x4,
4434                                    binop(Iop_Add32x4,
4435                                          binop(Iop_AndV128,
4436                                                bind(0),
4437                                                mkU128(one)),
4438                                          binop(Iop_AndV128,
4439                                                bind(1),
4440                                                mkU128(one))),
4441                                    mkU128(one)),
4442                              mkU8(1))));
4443            */
4444            HReg res = newVRegV(env);
4445            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4446            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4447            UInt size;
4448            switch (e->Iex.Binop.op) {
4449               case Iop_Add8x16: size = 0; break;
4450               case Iop_Add16x8: size = 1; break;
4451               case Iop_Add32x4: size = 2; break;
4452               case Iop_Add64x2: size = 3; break;
4453               default:
4454                  ppIROp(e->Iex.Binop.op);
4455                  vpanic("Illegal element size in VADD");
4456            }
4457            addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4458                                           res, argL, argR, size, True));
4459            return res;
4460         }
4461         case Iop_RecipStep32Fx4: {
4462            HReg res = newVRegV(env);
4463            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4464            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4465            UInt size = 0;
4466            addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4467                                           res, argL, argR, size, True));
4468            return res;
4469         }
4470         case Iop_RSqrtStep32Fx4: {
4471            HReg res = newVRegV(env);
4472            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4473            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4474            UInt size = 0;
4475            addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4476                                           res, argL, argR, size, True));
4477            return res;
4478         }
4479
4480         // These 6 verified 18 Apr 2013
4481         case Iop_InterleaveEvenLanes8x16:
4482         case Iop_InterleaveOddLanes8x16:
4483         case Iop_InterleaveEvenLanes16x8:
4484         case Iop_InterleaveOddLanes16x8:
4485         case Iop_InterleaveEvenLanes32x4:
4486         case Iop_InterleaveOddLanes32x4: {
4487            HReg rD   = newVRegV(env);
4488            HReg rM   = newVRegV(env);
4489            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4490            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4491            UInt size;
4492            Bool resRd;  // is the result in rD or rM ?
4493            switch (e->Iex.Binop.op) {
4494               case Iop_InterleaveOddLanes8x16:  resRd = False; size = 0; break;
4495               case Iop_InterleaveEvenLanes8x16: resRd = True;  size = 0; break;
4496               case Iop_InterleaveOddLanes16x8:  resRd = False; size = 1; break;
4497               case Iop_InterleaveEvenLanes16x8: resRd = True;  size = 1; break;
4498               case Iop_InterleaveOddLanes32x4:  resRd = False; size = 2; break;
4499               case Iop_InterleaveEvenLanes32x4: resRd = True;  size = 2; break;
4500               default: vassert(0);
4501            }
4502            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4503            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4504            addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
4505            return resRd ? rD : rM;
4506         }
4507
4508         // These 6 verified 18 Apr 2013
4509         case Iop_InterleaveHI8x16:
4510         case Iop_InterleaveLO8x16:
4511         case Iop_InterleaveHI16x8:
4512         case Iop_InterleaveLO16x8:
4513         case Iop_InterleaveHI32x4:
4514         case Iop_InterleaveLO32x4: {
4515            HReg rD   = newVRegV(env);
4516            HReg rM   = newVRegV(env);
4517            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4518            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4519            UInt size;
4520            Bool resRd;  // is the result in rD or rM ?
4521            switch (e->Iex.Binop.op) {
4522               case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
4523               case Iop_InterleaveLO8x16: resRd = True;  size = 0; break;
4524               case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
4525               case Iop_InterleaveLO16x8: resRd = True;  size = 1; break;
4526               case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
4527               case Iop_InterleaveLO32x4: resRd = True;  size = 2; break;
4528               default: vassert(0);
4529            }
4530            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4531            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4532            addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
4533            return resRd ? rD : rM;
4534         }
4535
4536         // These 6 verified 18 Apr 2013
4537         case Iop_CatOddLanes8x16:
4538         case Iop_CatEvenLanes8x16:
4539         case Iop_CatOddLanes16x8:
4540         case Iop_CatEvenLanes16x8:
4541         case Iop_CatOddLanes32x4:
4542         case Iop_CatEvenLanes32x4: {
4543            HReg rD   = newVRegV(env);
4544            HReg rM   = newVRegV(env);
4545            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4546            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4547            UInt size;
4548            Bool resRd;  // is the result in rD or rM ?
4549            switch (e->Iex.Binop.op) {
4550               case Iop_CatOddLanes8x16:  resRd = False; size = 0; break;
4551               case Iop_CatEvenLanes8x16: resRd = True;  size = 0; break;
4552               case Iop_CatOddLanes16x8:  resRd = False; size = 1; break;
4553               case Iop_CatEvenLanes16x8: resRd = True;  size = 1; break;
4554               case Iop_CatOddLanes32x4:  resRd = False; size = 2; break;
4555               case Iop_CatEvenLanes32x4: resRd = True;  size = 2; break;
4556               default: vassert(0);
4557            }
4558            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
4559            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
4560            addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
4561            return resRd ? rD : rM;
4562         }
4563
4564         case Iop_QAdd8Ux16:
4565         case Iop_QAdd16Ux8:
4566         case Iop_QAdd32Ux4:
4567         case Iop_QAdd64Ux2: {
4568            HReg res = newVRegV(env);
4569            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4570            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4571            UInt size;
4572            switch (e->Iex.Binop.op) {
4573               case Iop_QAdd8Ux16: size = 0; break;
4574               case Iop_QAdd16Ux8: size = 1; break;
4575               case Iop_QAdd32Ux4: size = 2; break;
4576               case Iop_QAdd64Ux2: size = 3; break;
4577               default:
4578                  ppIROp(e->Iex.Binop.op);
4579                  vpanic("Illegal element size in VQADDU");
4580            }
4581            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4582                                           res, argL, argR, size, True));
4583            return res;
4584         }
4585         case Iop_QAdd8Sx16:
4586         case Iop_QAdd16Sx8:
4587         case Iop_QAdd32Sx4:
4588         case Iop_QAdd64Sx2: {
4589            HReg res = newVRegV(env);
4590            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4591            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4592            UInt size;
4593            switch (e->Iex.Binop.op) {
4594               case Iop_QAdd8Sx16: size = 0; break;
4595               case Iop_QAdd16Sx8: size = 1; break;
4596               case Iop_QAdd32Sx4: size = 2; break;
4597               case Iop_QAdd64Sx2: size = 3; break;
4598               default:
4599                  ppIROp(e->Iex.Binop.op);
4600                  vpanic("Illegal element size in VQADDS");
4601            }
4602            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4603                                           res, argL, argR, size, True));
4604            return res;
4605         }
4606         case Iop_Sub8x16:
4607         case Iop_Sub16x8:
4608         case Iop_Sub32x4:
4609         case Iop_Sub64x2: {
4610            HReg res = newVRegV(env);
4611            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4612            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4613            UInt size;
4614            switch (e->Iex.Binop.op) {
4615               case Iop_Sub8x16: size = 0; break;
4616               case Iop_Sub16x8: size = 1; break;
4617               case Iop_Sub32x4: size = 2; break;
4618               case Iop_Sub64x2: size = 3; break;
4619               default:
4620                  ppIROp(e->Iex.Binop.op);
4621                  vpanic("Illegal element size in VSUB");
4622            }
4623            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4624                                           res, argL, argR, size, True));
4625            return res;
4626         }
4627         case Iop_QSub8Ux16:
4628         case Iop_QSub16Ux8:
4629         case Iop_QSub32Ux4:
4630         case Iop_QSub64Ux2: {
4631            HReg res = newVRegV(env);
4632            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4633            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4634            UInt size;
4635            switch (e->Iex.Binop.op) {
4636               case Iop_QSub8Ux16: size = 0; break;
4637               case Iop_QSub16Ux8: size = 1; break;
4638               case Iop_QSub32Ux4: size = 2; break;
4639               case Iop_QSub64Ux2: size = 3; break;
4640               default:
4641                  ppIROp(e->Iex.Binop.op);
4642                  vpanic("Illegal element size in VQSUBU");
4643            }
4644            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4645                                           res, argL, argR, size, True));
4646            return res;
4647         }
4648         case Iop_QSub8Sx16:
4649         case Iop_QSub16Sx8:
4650         case Iop_QSub32Sx4:
4651         case Iop_QSub64Sx2: {
4652            HReg res = newVRegV(env);
4653            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4654            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4655            UInt size;
4656            switch (e->Iex.Binop.op) {
4657               case Iop_QSub8Sx16: size = 0; break;
4658               case Iop_QSub16Sx8: size = 1; break;
4659               case Iop_QSub32Sx4: size = 2; break;
4660               case Iop_QSub64Sx2: size = 3; break;
4661               default:
4662                  ppIROp(e->Iex.Binop.op);
4663                  vpanic("Illegal element size in VQSUBS");
4664            }
4665            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4666                                           res, argL, argR, size, True));
4667            return res;
4668         }
4669         case Iop_Max8Ux16:
4670         case Iop_Max16Ux8:
4671         case Iop_Max32Ux4: {
4672            HReg res = newVRegV(env);
4673            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4674            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4675            UInt size;
4676            switch (e->Iex.Binop.op) {
4677               case Iop_Max8Ux16: size = 0; break;
4678               case Iop_Max16Ux8: size = 1; break;
4679               case Iop_Max32Ux4: size = 2; break;
4680               default: vpanic("Illegal element size in VMAXU");
4681            }
4682            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4683                                           res, argL, argR, size, True));
4684            return res;
4685         }
4686         case Iop_Max8Sx16:
4687         case Iop_Max16Sx8:
4688         case Iop_Max32Sx4: {
4689            HReg res = newVRegV(env);
4690            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4691            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4692            UInt size;
4693            switch (e->Iex.Binop.op) {
4694               case Iop_Max8Sx16: size = 0; break;
4695               case Iop_Max16Sx8: size = 1; break;
4696               case Iop_Max32Sx4: size = 2; break;
4697               default: vpanic("Illegal element size in VMAXU");
4698            }
4699            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4700                                           res, argL, argR, size, True));
4701            return res;
4702         }
4703         case Iop_Min8Ux16:
4704         case Iop_Min16Ux8:
4705         case Iop_Min32Ux4: {
4706            HReg res = newVRegV(env);
4707            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4708            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4709            UInt size;
4710            switch (e->Iex.Binop.op) {
4711               case Iop_Min8Ux16: size = 0; break;
4712               case Iop_Min16Ux8: size = 1; break;
4713               case Iop_Min32Ux4: size = 2; break;
4714               default: vpanic("Illegal element size in VMAXU");
4715            }
4716            addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4717                                           res, argL, argR, size, True));
4718            return res;
4719         }
4720         case Iop_Min8Sx16:
4721         case Iop_Min16Sx8:
4722         case Iop_Min32Sx4: {
4723            HReg res = newVRegV(env);
4724            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4725            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4726            UInt size;
4727            switch (e->Iex.Binop.op) {
4728               case Iop_Min8Sx16: size = 0; break;
4729               case Iop_Min16Sx8: size = 1; break;
4730               case Iop_Min32Sx4: size = 2; break;
4731               default: vpanic("Illegal element size in VMAXU");
4732            }
4733            addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4734                                           res, argL, argR, size, True));
4735            return res;
4736         }
4737         case Iop_Sar8x16:
4738         case Iop_Sar16x8:
4739         case Iop_Sar32x4:
4740         case Iop_Sar64x2: {
4741            HReg res = newVRegV(env);
4742            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4743            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4744            HReg argR2 = newVRegV(env);
4745            HReg zero = newVRegV(env);
4746            UInt size;
4747            switch (e->Iex.Binop.op) {
4748               case Iop_Sar8x16: size = 0; break;
4749               case Iop_Sar16x8: size = 1; break;
4750               case Iop_Sar32x4: size = 2; break;
4751               case Iop_Sar64x2: size = 3; break;
4752               default: vassert(0);
4753            }
4754            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4755            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4756                                           argR2, zero, argR, size, True));
4757            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4758                                          res, argL, argR2, size, True));
4759            return res;
4760         }
4761         case Iop_Sal8x16:
4762         case Iop_Sal16x8:
4763         case Iop_Sal32x4:
4764         case Iop_Sal64x2: {
4765            HReg res = newVRegV(env);
4766            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4767            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4768            UInt size;
4769            switch (e->Iex.Binop.op) {
4770               case Iop_Sal8x16: size = 0; break;
4771               case Iop_Sal16x8: size = 1; break;
4772               case Iop_Sal32x4: size = 2; break;
4773               case Iop_Sal64x2: size = 3; break;
4774               default: vassert(0);
4775            }
4776            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4777                                          res, argL, argR, size, True));
4778            return res;
4779         }
4780         case Iop_Shr8x16:
4781         case Iop_Shr16x8:
4782         case Iop_Shr32x4:
4783         case Iop_Shr64x2: {
4784            HReg res = newVRegV(env);
4785            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4786            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4787            HReg argR2 = newVRegV(env);
4788            HReg zero = newVRegV(env);
4789            UInt size;
4790            switch (e->Iex.Binop.op) {
4791               case Iop_Shr8x16: size = 0; break;
4792               case Iop_Shr16x8: size = 1; break;
4793               case Iop_Shr32x4: size = 2; break;
4794               case Iop_Shr64x2: size = 3; break;
4795               default: vassert(0);
4796            }
4797            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4798            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4799                                           argR2, zero, argR, size, True));
4800            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4801                                          res, argL, argR2, size, True));
4802            return res;
4803         }
4804         case Iop_Shl8x16:
4805         case Iop_Shl16x8:
4806         case Iop_Shl32x4:
4807         case Iop_Shl64x2: {
4808            HReg res = newVRegV(env);
4809            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4810            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4811            UInt size;
4812            switch (e->Iex.Binop.op) {
4813               case Iop_Shl8x16: size = 0; break;
4814               case Iop_Shl16x8: size = 1; break;
4815               case Iop_Shl32x4: size = 2; break;
4816               case Iop_Shl64x2: size = 3; break;
4817               default: vassert(0);
4818            }
4819            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4820                                          res, argL, argR, size, True));
4821            return res;
4822         }
4823         case Iop_QShl8x16:
4824         case Iop_QShl16x8:
4825         case Iop_QShl32x4:
4826         case Iop_QShl64x2: {
4827            HReg res = newVRegV(env);
4828            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4829            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4830            UInt size;
4831            switch (e->Iex.Binop.op) {
4832               case Iop_QShl8x16: size = 0; break;
4833               case Iop_QShl16x8: size = 1; break;
4834               case Iop_QShl32x4: size = 2; break;
4835               case Iop_QShl64x2: size = 3; break;
4836               default: vassert(0);
4837            }
4838            addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4839                                          res, argL, argR, size, True));
4840            return res;
4841         }
4842         case Iop_QSal8x16:
4843         case Iop_QSal16x8:
4844         case Iop_QSal32x4:
4845         case Iop_QSal64x2: {
4846            HReg res = newVRegV(env);
4847            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4848            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4849            UInt size;
4850            switch (e->Iex.Binop.op) {
4851               case Iop_QSal8x16: size = 0; break;
4852               case Iop_QSal16x8: size = 1; break;
4853               case Iop_QSal32x4: size = 2; break;
4854               case Iop_QSal64x2: size = 3; break;
4855               default: vassert(0);
4856            }
4857            addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4858                                          res, argL, argR, size, True));
4859            return res;
4860         }
4861         case Iop_QShlNsatUU8x16:
4862         case Iop_QShlNsatUU16x8:
4863         case Iop_QShlNsatUU32x4:
4864         case Iop_QShlNsatUU64x2: {
4865            HReg res = newVRegV(env);
4866            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4867            UInt size, imm;
4868            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4869                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4870               vpanic("ARM target supports Iop_QShlNsatUUAxB with constant "
4871                      "second argument only\n");
4872            }
4873            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4874            switch (e->Iex.Binop.op) {
4875               case Iop_QShlNsatUU8x16: size = 8 | imm; break;
4876               case Iop_QShlNsatUU16x8: size = 16 | imm; break;
4877               case Iop_QShlNsatUU32x4: size = 32 | imm; break;
4878               case Iop_QShlNsatUU64x2: size = 64 | imm; break;
4879               default: vassert(0);
4880            }
4881            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4882                                          res, argL, size, True));
4883            return res;
4884         }
4885         case Iop_QShlNsatSU8x16:
4886         case Iop_QShlNsatSU16x8:
4887         case Iop_QShlNsatSU32x4:
4888         case Iop_QShlNsatSU64x2: {
4889            HReg res = newVRegV(env);
4890            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4891            UInt size, imm;
4892            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4893                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4894               vpanic("ARM target supports Iop_QShlNsatSUAxB with constant "
4895                      "second argument only\n");
4896            }
4897            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4898            switch (e->Iex.Binop.op) {
4899               case Iop_QShlNsatSU8x16: size = 8 | imm; break;
4900               case Iop_QShlNsatSU16x8: size = 16 | imm; break;
4901               case Iop_QShlNsatSU32x4: size = 32 | imm; break;
4902               case Iop_QShlNsatSU64x2: size = 64 | imm; break;
4903               default: vassert(0);
4904            }
4905            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4906                                          res, argL, size, True));
4907            return res;
4908         }
4909         case Iop_QShlNsatSS8x16:
4910         case Iop_QShlNsatSS16x8:
4911         case Iop_QShlNsatSS32x4:
4912         case Iop_QShlNsatSS64x2: {
4913            HReg res = newVRegV(env);
4914            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4915            UInt size, imm;
4916            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4917                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4918               vpanic("ARM target supports Iop_QShlNsatSSAxB with constant "
4919                      "second argument only\n");
4920            }
4921            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4922            switch (e->Iex.Binop.op) {
4923               case Iop_QShlNsatSS8x16: size = 8 | imm; break;
4924               case Iop_QShlNsatSS16x8: size = 16 | imm; break;
4925               case Iop_QShlNsatSS32x4: size = 32 | imm; break;
4926               case Iop_QShlNsatSS64x2: size = 64 | imm; break;
4927               default: vassert(0);
4928            }
4929            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4930                                          res, argL, size, True));
4931            return res;
4932         }
4933         case Iop_ShrN8x16:
4934         case Iop_ShrN16x8:
4935         case Iop_ShrN32x4:
4936         case Iop_ShrN64x2: {
4937            HReg res = newVRegV(env);
4938            HReg tmp = newVRegV(env);
4939            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4940            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4941            HReg argR2 = newVRegI(env);
4942            UInt size;
4943            switch (e->Iex.Binop.op) {
4944               case Iop_ShrN8x16: size = 0; break;
4945               case Iop_ShrN16x8: size = 1; break;
4946               case Iop_ShrN32x4: size = 2; break;
4947               case Iop_ShrN64x2: size = 3; break;
4948               default: vassert(0);
4949            }
4950            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4951            addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4952                                          tmp, argR2, 0, True));
4953            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4954                                          res, argL, tmp, size, True));
4955            return res;
4956         }
4957         case Iop_ShlN8x16:
4958         case Iop_ShlN16x8:
4959         case Iop_ShlN32x4:
4960         case Iop_ShlN64x2: {
4961            HReg res = newVRegV(env);
4962            HReg tmp = newVRegV(env);
4963            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4964            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4965            UInt size;
4966            switch (e->Iex.Binop.op) {
4967               case Iop_ShlN8x16: size = 0; break;
4968               case Iop_ShlN16x8: size = 1; break;
4969               case Iop_ShlN32x4: size = 2; break;
4970               case Iop_ShlN64x2: size = 3; break;
4971               default: vassert(0);
4972            }
4973            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4974            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4975                                          res, argL, tmp, size, True));
4976            return res;
4977         }
4978         case Iop_SarN8x16:
4979         case Iop_SarN16x8:
4980         case Iop_SarN32x4:
4981         case Iop_SarN64x2: {
4982            HReg res = newVRegV(env);
4983            HReg tmp = newVRegV(env);
4984            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4985            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4986            HReg argR2 = newVRegI(env);
4987            UInt size;
4988            switch (e->Iex.Binop.op) {
4989               case Iop_SarN8x16: size = 0; break;
4990               case Iop_SarN16x8: size = 1; break;
4991               case Iop_SarN32x4: size = 2; break;
4992               case Iop_SarN64x2: size = 3; break;
4993               default: vassert(0);
4994            }
4995            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4996            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4997            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4998                                          res, argL, tmp, size, True));
4999            return res;
5000         }
5001         case Iop_CmpGT8Ux16:
5002         case Iop_CmpGT16Ux8:
5003         case Iop_CmpGT32Ux4: {
5004            HReg res = newVRegV(env);
5005            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5006            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5007            UInt size;
5008            switch (e->Iex.Binop.op) {
5009               case Iop_CmpGT8Ux16: size = 0; break;
5010               case Iop_CmpGT16Ux8: size = 1; break;
5011               case Iop_CmpGT32Ux4: size = 2; break;
5012               default: vassert(0);
5013            }
5014            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5015                                           res, argL, argR, size, True));
5016            return res;
5017         }
5018         case Iop_CmpGT8Sx16:
5019         case Iop_CmpGT16Sx8:
5020         case Iop_CmpGT32Sx4: {
5021            HReg res = newVRegV(env);
5022            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5023            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5024            UInt size;
5025            switch (e->Iex.Binop.op) {
5026               case Iop_CmpGT8Sx16: size = 0; break;
5027               case Iop_CmpGT16Sx8: size = 1; break;
5028               case Iop_CmpGT32Sx4: size = 2; break;
5029               default: vassert(0);
5030            }
5031            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5032                                           res, argL, argR, size, True));
5033            return res;
5034         }
5035         case Iop_CmpEQ8x16:
5036         case Iop_CmpEQ16x8:
5037         case Iop_CmpEQ32x4: {
5038            HReg res = newVRegV(env);
5039            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5040            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5041            UInt size;
5042            switch (e->Iex.Binop.op) {
5043               case Iop_CmpEQ8x16: size = 0; break;
5044               case Iop_CmpEQ16x8: size = 1; break;
5045               case Iop_CmpEQ32x4: size = 2; break;
5046               default: vassert(0);
5047            }
5048            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5049                                           res, argL, argR, size, True));
5050            return res;
5051         }
5052         case Iop_Mul8x16:
5053         case Iop_Mul16x8:
5054         case Iop_Mul32x4: {
5055            HReg res = newVRegV(env);
5056            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5057            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5058            UInt size = 0;
5059            switch(e->Iex.Binop.op) {
5060               case Iop_Mul8x16: size = 0; break;
5061               case Iop_Mul16x8: size = 1; break;
5062               case Iop_Mul32x4: size = 2; break;
5063               default: vassert(0);
5064            }
5065            addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5066                                           res, argL, argR, size, True));
5067            return res;
5068         }
5069         case Iop_Mull8Ux8:
5070         case Iop_Mull16Ux4:
5071         case Iop_Mull32Ux2: {
5072            HReg res = newVRegV(env);
5073            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5074            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5075            UInt size = 0;
5076            switch(e->Iex.Binop.op) {
5077               case Iop_Mull8Ux8: size = 0; break;
5078               case Iop_Mull16Ux4: size = 1; break;
5079               case Iop_Mull32Ux2: size = 2; break;
5080               default: vassert(0);
5081            }
5082            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5083                                           res, argL, argR, size, True));
5084            return res;
5085         }
5086
5087         case Iop_Mull8Sx8:
5088         case Iop_Mull16Sx4:
5089         case Iop_Mull32Sx2: {
5090            HReg res = newVRegV(env);
5091            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5092            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5093            UInt size = 0;
5094            switch(e->Iex.Binop.op) {
5095               case Iop_Mull8Sx8: size = 0; break;
5096               case Iop_Mull16Sx4: size = 1; break;
5097               case Iop_Mull32Sx2: size = 2; break;
5098               default: vassert(0);
5099            }
5100            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5101                                           res, argL, argR, size, True));
5102            return res;
5103         }
5104
5105         case Iop_QDMulHi16Sx8:
5106         case Iop_QDMulHi32Sx4: {
5107            HReg res = newVRegV(env);
5108            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5109            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5110            UInt size = 0;
5111            switch(e->Iex.Binop.op) {
5112               case Iop_QDMulHi16Sx8: size = 1; break;
5113               case Iop_QDMulHi32Sx4: size = 2; break;
5114               default: vassert(0);
5115            }
5116            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5117                                           res, argL, argR, size, True));
5118            return res;
5119         }
5120
5121         case Iop_QRDMulHi16Sx8:
5122         case Iop_QRDMulHi32Sx4: {
5123            HReg res = newVRegV(env);
5124            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5125            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5126            UInt size = 0;
5127            switch(e->Iex.Binop.op) {
5128               case Iop_QRDMulHi16Sx8: size = 1; break;
5129               case Iop_QRDMulHi32Sx4: size = 2; break;
5130               default: vassert(0);
5131            }
5132            addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5133                                           res, argL, argR, size, True));
5134            return res;
5135         }
5136
5137         case Iop_QDMull16Sx4:
5138         case Iop_QDMull32Sx2: {
5139            HReg res = newVRegV(env);
5140            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5141            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5142            UInt size = 0;
5143            switch(e->Iex.Binop.op) {
5144               case Iop_QDMull16Sx4: size = 1; break;
5145               case Iop_QDMull32Sx2: size = 2; break;
5146               default: vassert(0);
5147            }
5148            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5149                                           res, argL, argR, size, True));
5150            return res;
5151         }
5152         case Iop_PolynomialMul8x16: {
5153            HReg res = newVRegV(env);
5154            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5155            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5156            UInt size = 0;
5157            addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5158                                           res, argL, argR, size, True));
5159            return res;
5160         }
5161         case Iop_Max32Fx4: {
5162            HReg res = newVRegV(env);
5163            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5164            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5165            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5166                                           res, argL, argR, 2, True));
5167            return res;
5168         }
5169         case Iop_Min32Fx4: {
5170            HReg res = newVRegV(env);
5171            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5172            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5173            addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5174                                           res, argL, argR, 2, True));
5175            return res;
5176         }
5177         case Iop_PwMax32Fx4: {
5178            HReg res = newVRegV(env);
5179            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5180            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5181            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5182                                           res, argL, argR, 2, True));
5183            return res;
5184         }
5185         case Iop_PwMin32Fx4: {
5186            HReg res = newVRegV(env);
5187            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5188            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5189            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5190                                           res, argL, argR, 2, True));
5191            return res;
5192         }
5193         case Iop_CmpGT32Fx4: {
5194            HReg res = newVRegV(env);
5195            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5196            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5197            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5198                                           res, argL, argR, 2, True));
5199            return res;
5200         }
5201         case Iop_CmpGE32Fx4: {
5202            HReg res = newVRegV(env);
5203            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5204            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5205            addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5206                                           res, argL, argR, 2, True));
5207            return res;
5208         }
5209         case Iop_CmpEQ32Fx4: {
5210            HReg res = newVRegV(env);
5211            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5212            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5213            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5214                                           res, argL, argR, 2, True));
5215            return res;
5216         }
5217
5218         case Iop_PolynomialMull8x8: {
5219            HReg res = newVRegV(env);
5220            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5221            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5222            UInt size = 0;
5223            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5224                                           res, argL, argR, size, True));
5225            return res;
5226         }
5227         case Iop_F32ToFixed32Ux4_RZ:
5228         case Iop_F32ToFixed32Sx4_RZ:
5229         case Iop_Fixed32UToF32x4_RN:
5230         case Iop_Fixed32SToF32x4_RN: {
5231            HReg res = newVRegV(env);
5232            HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5233            ARMNeonUnOp op;
5234            UInt imm6;
5235            if (e->Iex.Binop.arg2->tag != Iex_Const ||
5236               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5237                  vpanic("ARM supports FP <-> Fixed conversion with constant "
5238                         "second argument less than 33 only\n");
5239            }
5240            imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5241            vassert(imm6 <= 32 && imm6 > 0);
5242            imm6 = 64 - imm6;
5243            switch(e->Iex.Binop.op) {
5244               case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5245               case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5246               case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5247               case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5248               default: vassert(0);
5249            }
5250            addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5251            return res;
5252         }
5253         /*
5254         FIXME remove if not used
5255         case Iop_VDup8x16:
5256         case Iop_VDup16x8:
5257         case Iop_VDup32x4: {
5258            HReg res = newVRegV(env);
5259            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5260            UInt imm4;
5261            UInt index;
5262            if (e->Iex.Binop.arg2->tag != Iex_Const ||
5263               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5264                  vpanic("ARM supports Iop_VDup with constant "
5265                         "second argument less than 16 only\n");
5266            }
5267            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5268            switch(e->Iex.Binop.op) {
5269               case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5270               case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5271               case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5272               default: vassert(0);
5273            }
5274            if (imm4 >= 16) {
5275               vpanic("ARM supports Iop_VDup with constant "
5276                      "second argument less than 16 only\n");
5277            }
5278            addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5279                                          res, argL, imm4, True));
5280            return res;
5281         }
5282         */
5283         case Iop_PwAdd8x16:
5284         case Iop_PwAdd16x8:
5285         case Iop_PwAdd32x4: {
5286            HReg res = newVRegV(env);
5287            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5288            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5289            UInt size = 0;
5290            switch(e->Iex.Binop.op) {
5291               case Iop_PwAdd8x16: size = 0; break;
5292               case Iop_PwAdd16x8: size = 1; break;
5293               case Iop_PwAdd32x4: size = 2; break;
5294               default: vassert(0);
5295            }
5296            addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5297                                           res, argL, argR, size, True));
5298            return res;
5299         }
5300         /* ... */
5301         default:
5302            break;
5303      }
5304   }
5305
5306   if (e->tag == Iex_Triop) {
5307      IRTriop *triop = e->Iex.Triop.details;
5308
5309      switch (triop->op) {
5310         case Iop_SliceV128: {
5311            HReg res = newVRegV(env);
5312            HReg argL = iselNeonExpr(env, triop->arg2);
5313            HReg argR = iselNeonExpr(env, triop->arg1);
5314            UInt imm4;
5315            if (triop->arg3->tag != Iex_Const ||
5316                typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5317               vpanic("ARM target supports Iop_ExtractV128 with constant "
5318                      "third argument less than 16 only\n");
5319            }
5320            imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5321            if (imm4 >= 16) {
5322               vpanic("ARM target supports Iop_ExtractV128 with constant "
5323                      "third argument less than 16 only\n");
5324            }
5325            addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5326                                           res, argL, argR, imm4, True));
5327            return res;
5328         }
5329         case Iop_Mul32Fx4:
5330         case Iop_Sub32Fx4:
5331         case Iop_Add32Fx4: {
5332            HReg res = newVRegV(env);
5333            HReg argL = iselNeonExpr(env, triop->arg2);
5334            HReg argR = iselNeonExpr(env, triop->arg3);
5335            UInt size = 0;
5336            ARMNeonBinOp op = ARMneon_INVALID;
5337            switch (triop->op) {
5338               case Iop_Mul32Fx4: op = ARMneon_VMULFP; break;
5339               case Iop_Sub32Fx4: op = ARMneon_VSUBFP; break;
5340               case Iop_Add32Fx4: op = ARMneon_VADDFP; break;
5341               default: vassert(0);
5342            }
5343            addInstr(env, ARMInstr_NBinary(op, res, argL, argR, size, True));
5344            return res;
5345         }
5346         default:
5347            break;
5348      }
5349   }
5350
5351   if (e->tag == Iex_ITE) { // VFD
5352      ARMCondCode cc;
5353      HReg r1  = iselNeonExpr(env, e->Iex.ITE.iftrue);
5354      HReg r0  = iselNeonExpr(env, e->Iex.ITE.iffalse);
5355      HReg dst = newVRegV(env);
5356      addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5357      cc = iselCondCode(env, e->Iex.ITE.cond);
5358      addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5359      return dst;
5360   }
5361
5362  neon_expr_bad:
5363   ppIRExpr(e);
5364   vpanic("iselNeonExpr_wrk");
5365}
5366
5367/*---------------------------------------------------------*/
5368/*--- ISEL: Floating point expressions (64 bit)         ---*/
5369/*---------------------------------------------------------*/
5370
5371/* Compute a 64-bit floating point value into a register, the identity
5372   of which is returned.  As with iselIntExpr_R, the reg may be either
5373   real or virtual; in any case it must not be changed by subsequent
5374   code emitted by the caller.  */
5375
5376static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5377{
5378   HReg r = iselDblExpr_wrk( env, e );
5379#  if 0
5380   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5381#  endif
5382   vassert(hregClass(r) == HRcFlt64);
5383   vassert(hregIsVirtual(r));
5384   return r;
5385}
5386
5387/* DO NOT CALL THIS DIRECTLY */
5388static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5389{
5390   IRType ty = typeOfIRExpr(env->type_env,e);
5391   vassert(e);
5392   vassert(ty == Ity_F64);
5393
5394   if (e->tag == Iex_RdTmp) {
5395      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5396   }
5397
5398   if (e->tag == Iex_Const) {
5399      /* Just handle the zero case. */
5400      IRConst* con = e->Iex.Const.con;
5401      if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5402         HReg z32 = newVRegI(env);
5403         HReg dst = newVRegD(env);
5404         addInstr(env, ARMInstr_Imm32(z32, 0));
5405         addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5406         return dst;
5407      }
5408   }
5409
5410   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5411      ARMAModeV* am;
5412      HReg res = newVRegD(env);
5413      vassert(e->Iex.Load.ty == Ity_F64);
5414      am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5415      addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5416      return res;
5417   }
5418
5419   if (e->tag == Iex_Get) {
5420      // XXX This won't work if offset > 1020 or is not 0 % 4.
5421      // In which case we'll have to generate more longwinded code.
5422      ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5423      HReg       res = newVRegD(env);
5424      addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5425      return res;
5426   }
5427
5428   if (e->tag == Iex_Unop) {
5429      switch (e->Iex.Unop.op) {
5430         case Iop_ReinterpI64asF64: {
5431            if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5432               return iselNeon64Expr(env, e->Iex.Unop.arg);
5433            } else {
5434               HReg srcHi, srcLo;
5435               HReg dst = newVRegD(env);
5436               iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5437               addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5438               return dst;
5439            }
5440         }
5441         case Iop_NegF64: {
5442            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5443            HReg dst = newVRegD(env);
5444            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5445            return dst;
5446         }
5447         case Iop_AbsF64: {
5448            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5449            HReg dst = newVRegD(env);
5450            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5451            return dst;
5452         }
5453         case Iop_F32toF64: {
5454            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5455            HReg dst = newVRegD(env);
5456            addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5457            return dst;
5458         }
5459         case Iop_I32UtoF64:
5460         case Iop_I32StoF64: {
5461            HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
5462            HReg f32   = newVRegF(env);
5463            HReg dst   = newVRegD(env);
5464            Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5465            /* VMOV f32, src */
5466            addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5467            /* FSITOD dst, f32 */
5468            addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5469                                          dst, f32));
5470            return dst;
5471         }
5472         default:
5473            break;
5474      }
5475   }
5476
5477   if (e->tag == Iex_Binop) {
5478      switch (e->Iex.Binop.op) {
5479         case Iop_SqrtF64: {
5480            /* first arg is rounding mode; we ignore it. */
5481            HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5482            HReg dst = newVRegD(env);
5483            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5484            return dst;
5485         }
5486         default:
5487            break;
5488      }
5489   }
5490
5491   if (e->tag == Iex_Triop) {
5492      IRTriop *triop = e->Iex.Triop.details;
5493
5494      switch (triop->op) {
5495         case Iop_DivF64:
5496         case Iop_MulF64:
5497         case Iop_AddF64:
5498         case Iop_SubF64: {
5499            ARMVfpOp op = 0; /*INVALID*/
5500            HReg argL = iselDblExpr(env, triop->arg2);
5501            HReg argR = iselDblExpr(env, triop->arg3);
5502            HReg dst  = newVRegD(env);
5503            switch (triop->op) {
5504               case Iop_DivF64: op = ARMvfp_DIV; break;
5505               case Iop_MulF64: op = ARMvfp_MUL; break;
5506               case Iop_AddF64: op = ARMvfp_ADD; break;
5507               case Iop_SubF64: op = ARMvfp_SUB; break;
5508               default: vassert(0);
5509            }
5510            addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5511            return dst;
5512         }
5513         default:
5514            break;
5515      }
5516   }
5517
5518   if (e->tag == Iex_ITE) { // VFD
5519      if (ty == Ity_F64
5520          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5521         HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
5522         HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
5523         HReg dst = newVRegD(env);
5524         addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
5525         ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
5526         addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
5527         return dst;
5528      }
5529   }
5530
5531   ppIRExpr(e);
5532   vpanic("iselDblExpr_wrk");
5533}
5534
5535
5536/*---------------------------------------------------------*/
5537/*--- ISEL: Floating point expressions (32 bit)         ---*/
5538/*---------------------------------------------------------*/
5539
5540/* Compute a 32-bit floating point value into a register, the identity
5541   of which is returned.  As with iselIntExpr_R, the reg may be either
5542   real or virtual; in any case it must not be changed by subsequent
5543   code emitted by the caller.  */
5544
5545static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5546{
5547   HReg r = iselFltExpr_wrk( env, e );
5548#  if 0
5549   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5550#  endif
5551   vassert(hregClass(r) == HRcFlt32);
5552   vassert(hregIsVirtual(r));
5553   return r;
5554}
5555
5556/* DO NOT CALL THIS DIRECTLY */
5557static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5558{
5559   IRType ty = typeOfIRExpr(env->type_env,e);
5560   vassert(e);
5561   vassert(ty == Ity_F32);
5562
5563   if (e->tag == Iex_RdTmp) {
5564      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5565   }
5566
5567   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5568      ARMAModeV* am;
5569      HReg res = newVRegF(env);
5570      vassert(e->Iex.Load.ty == Ity_F32);
5571      am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5572      addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5573      return res;
5574   }
5575
5576   if (e->tag == Iex_Get) {
5577      // XXX This won't work if offset > 1020 or is not 0 % 4.
5578      // In which case we'll have to generate more longwinded code.
5579      ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5580      HReg       res = newVRegF(env);
5581      addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5582      return res;
5583   }
5584
5585   if (e->tag == Iex_Unop) {
5586      switch (e->Iex.Unop.op) {
5587         case Iop_ReinterpI32asF32: {
5588            HReg dst = newVRegF(env);
5589            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5590            addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5591            return dst;
5592         }
5593         case Iop_NegF32: {
5594            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5595            HReg dst = newVRegF(env);
5596            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5597            return dst;
5598         }
5599         case Iop_AbsF32: {
5600            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5601            HReg dst = newVRegF(env);
5602            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5603            return dst;
5604         }
5605         default:
5606            break;
5607      }
5608   }
5609
5610   if (e->tag == Iex_Binop) {
5611      switch (e->Iex.Binop.op) {
5612         case Iop_SqrtF32: {
5613            /* first arg is rounding mode; we ignore it. */
5614            HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5615            HReg dst = newVRegF(env);
5616            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5617            return dst;
5618         }
5619         case Iop_F64toF32: {
5620            HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5621            set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5622            HReg valS = newVRegF(env);
5623            /* FCVTSD valS, valD */
5624            addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5625            set_VFP_rounding_default(env);
5626            return valS;
5627         }
5628         default:
5629            break;
5630      }
5631   }
5632
5633   if (e->tag == Iex_Triop) {
5634      IRTriop *triop = e->Iex.Triop.details;
5635
5636      switch (triop->op) {
5637         case Iop_DivF32:
5638         case Iop_MulF32:
5639         case Iop_AddF32:
5640         case Iop_SubF32: {
5641            ARMVfpOp op = 0; /*INVALID*/
5642            HReg argL = iselFltExpr(env, triop->arg2);
5643            HReg argR = iselFltExpr(env, triop->arg3);
5644            HReg dst  = newVRegF(env);
5645            switch (triop->op) {
5646               case Iop_DivF32: op = ARMvfp_DIV; break;
5647               case Iop_MulF32: op = ARMvfp_MUL; break;
5648               case Iop_AddF32: op = ARMvfp_ADD; break;
5649               case Iop_SubF32: op = ARMvfp_SUB; break;
5650               default: vassert(0);
5651            }
5652            addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5653            return dst;
5654         }
5655         default:
5656            break;
5657      }
5658   }
5659
5660   if (e->tag == Iex_ITE) { // VFD
5661      if (ty == Ity_F32
5662          && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
5663         ARMCondCode cc;
5664         HReg r1  = iselFltExpr(env, e->Iex.ITE.iftrue);
5665         HReg r0  = iselFltExpr(env, e->Iex.ITE.iffalse);
5666         HReg dst = newVRegF(env);
5667         addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
5668         cc = iselCondCode(env, e->Iex.ITE.cond);
5669         addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
5670         return dst;
5671      }
5672   }
5673
5674   ppIRExpr(e);
5675   vpanic("iselFltExpr_wrk");
5676}
5677
5678
5679/*---------------------------------------------------------*/
5680/*--- ISEL: Statements                                  ---*/
5681/*---------------------------------------------------------*/
5682
5683static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5684{
5685   if (vex_traceflags & VEX_TRACE_VCODE) {
5686      vex_printf("\n-- ");
5687      ppIRStmt(stmt);
5688      vex_printf("\n");
5689   }
5690   switch (stmt->tag) {
5691
5692   /* --------- STORE --------- */
5693   /* little-endian write to memory */
5694   case Ist_Store: {
5695      IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5696      IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5697      IREndness end  = stmt->Ist.Store.end;
5698
5699      if (tya != Ity_I32 || end != Iend_LE)
5700         goto stmt_fail;
5701
5702      if (tyd == Ity_I32) {
5703         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5704         ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5705         addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5706         return;
5707      }
5708      if (tyd == Ity_I16) {
5709         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5710         ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5711         addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
5712                                       False/*!isLoad*/,
5713                                       False/*!isSignedLoad*/, rD, am));
5714         return;
5715      }
5716      if (tyd == Ity_I8) {
5717         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5718         ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5719         addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
5720         return;
5721      }
5722      if (tyd == Ity_I64) {
5723         if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5724            HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5725            ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5726            addInstr(env, ARMInstr_NLdStD(False, dD, am));
5727         } else {
5728            HReg rDhi, rDlo, rA;
5729            iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5730            rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5731            addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
5732                                          ARMAMode1_RI(rA,4)));
5733            addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
5734                                          ARMAMode1_RI(rA,0)));
5735         }
5736         return;
5737      }
5738      if (tyd == Ity_F64) {
5739         HReg       dD = iselDblExpr(env, stmt->Ist.Store.data);
5740         ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5741         addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5742         return;
5743      }
5744      if (tyd == Ity_F32) {
5745         HReg       fD = iselFltExpr(env, stmt->Ist.Store.data);
5746         ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5747         addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5748         return;
5749      }
5750      if (tyd == Ity_V128) {
5751         HReg       qD = iselNeonExpr(env, stmt->Ist.Store.data);
5752         ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5753         addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5754         return;
5755      }
5756
5757      break;
5758   }
5759
5760   /* --------- CONDITIONAL STORE --------- */
5761   /* conditional little-endian write to memory */
5762   case Ist_StoreG: {
5763      IRStoreG* sg   = stmt->Ist.StoreG.details;
5764      IRType    tya  = typeOfIRExpr(env->type_env, sg->addr);
5765      IRType    tyd  = typeOfIRExpr(env->type_env, sg->data);
5766      IREndness end  = sg->end;
5767
5768      if (tya != Ity_I32 || end != Iend_LE)
5769         goto stmt_fail;
5770
5771      switch (tyd) {
5772         case Ity_I8:
5773         case Ity_I32: {
5774            HReg        rD = iselIntExpr_R(env, sg->data);
5775            ARMAMode1*  am = iselIntExpr_AMode1(env, sg->addr);
5776            ARMCondCode cc = iselCondCode(env, sg->guard);
5777            addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
5778                             (cc, False/*!isLoad*/, rD, am));
5779            return;
5780         }
5781         case Ity_I16: {
5782            HReg        rD = iselIntExpr_R(env, sg->data);
5783            ARMAMode2*  am = iselIntExpr_AMode2(env, sg->addr);
5784            ARMCondCode cc = iselCondCode(env, sg->guard);
5785            addInstr(env, ARMInstr_LdSt16(cc,
5786                                          False/*!isLoad*/,
5787                                          False/*!isSignedLoad*/, rD, am));
5788            return;
5789         }
5790         default:
5791            break;
5792      }
5793      break;
5794   }
5795
5796   /* --------- CONDITIONAL LOAD --------- */
5797   /* conditional little-endian load from memory */
5798   case Ist_LoadG: {
5799      IRLoadG*  lg   = stmt->Ist.LoadG.details;
5800      IRType    tya  = typeOfIRExpr(env->type_env, lg->addr);
5801      IREndness end  = lg->end;
5802
5803      if (tya != Ity_I32 || end != Iend_LE)
5804         goto stmt_fail;
5805
5806      switch (lg->cvt) {
5807         case ILGop_8Uto32:
5808         case ILGop_Ident32: {
5809            HReg        rAlt = iselIntExpr_R(env, lg->alt);
5810            ARMAMode1*  am   = iselIntExpr_AMode1(env, lg->addr);
5811            HReg        rD   = lookupIRTemp(env, lg->dst);
5812            addInstr(env, mk_iMOVds_RR(rD, rAlt));
5813            ARMCondCode cc   = iselCondCode(env, lg->guard);
5814            addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
5815                                                    : ARMInstr_LdSt8U)
5816                             (cc, True/*isLoad*/, rD, am));
5817            return;
5818         }
5819         case ILGop_16Sto32:
5820         case ILGop_16Uto32:
5821         case ILGop_8Sto32: {
5822            HReg        rAlt = iselIntExpr_R(env, lg->alt);
5823            ARMAMode2*  am   = iselIntExpr_AMode2(env, lg->addr);
5824            HReg        rD   = lookupIRTemp(env, lg->dst);
5825            addInstr(env, mk_iMOVds_RR(rD, rAlt));
5826            ARMCondCode cc   = iselCondCode(env, lg->guard);
5827            if (lg->cvt == ILGop_8Sto32) {
5828               addInstr(env, ARMInstr_Ld8S(cc, rD, am));
5829            } else {
5830               vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
5831               Bool sx = lg->cvt == ILGop_16Sto32;
5832               addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
5833            }
5834            return;
5835         }
5836         default:
5837            break;
5838      }
5839      break;
5840   }
5841
5842   /* --------- PUT --------- */
5843   /* write guest state, fixed offset */
5844   case Ist_Put: {
5845       IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
5846
5847       if (tyd == Ity_I32) {
5848           HReg       rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5849           ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5850           addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/, rD, am));
5851           return;
5852       }
5853       if (tyd == Ity_I64) {
5854          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5855             HReg addr = newVRegI(env);
5856             HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5857             addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5858                                                stmt->Ist.Put.offset));
5859             addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5860          } else {
5861             HReg rDhi, rDlo;
5862             ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5863                                           stmt->Ist.Put.offset + 0);
5864             ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5865                                           stmt->Ist.Put.offset + 4);
5866             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5867             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
5868                                           rDhi, am4));
5869             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
5870                                           rDlo, am0));
5871          }
5872          return;
5873       }
5874       if (tyd == Ity_F64) {
5875          // XXX This won't work if offset > 1020 or is not 0 % 4.
5876          // In which case we'll have to generate more longwinded code.
5877          ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5878          HReg       rD = iselDblExpr(env, stmt->Ist.Put.data);
5879          addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5880          return;
5881       }
5882       if (tyd == Ity_F32) {
5883          // XXX This won't work if offset > 1020 or is not 0 % 4.
5884          // In which case we'll have to generate more longwinded code.
5885          ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5886          HReg       rD = iselFltExpr(env, stmt->Ist.Put.data);
5887          addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5888          return;
5889       }
5890       if (tyd == Ity_V128) {
5891          HReg addr = newVRegI(env);
5892          HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5893          addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5894                                       stmt->Ist.Put.offset));
5895          addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5896          return;
5897       }
5898       break;
5899   }
5900
5901   /* --------- TMP --------- */
5902   /* assign value to temporary */
5903   case Ist_WrTmp: {
5904      IRTemp tmp = stmt->Ist.WrTmp.tmp;
5905      IRType ty = typeOfIRTemp(env->type_env, tmp);
5906
5907      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
5908         ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5909                                          env, stmt->Ist.WrTmp.data);
5910         HReg     dst  = lookupIRTemp(env, tmp);
5911         addInstr(env, ARMInstr_Mov(dst,ri84));
5912         return;
5913      }
5914      if (ty == Ity_I1) {
5915         /* Here, we are generating a I1 value into a 32 bit register.
5916            Make sure the value in the register is only zero or one,
5917            but no other.  This allows optimisation of the
5918            1Uto32(tmp:I1) case, by making it simply a copy of the
5919            register holding 'tmp'.  The point being that the value in
5920            the register holding 'tmp' can only have been created
5921            here. */
5922         HReg        dst  = lookupIRTemp(env, tmp);
5923         ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5924         addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5925         addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5926         return;
5927      }
5928      if (ty == Ity_I64) {
5929         if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5930            HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5931            HReg dst = lookupIRTemp(env, tmp);
5932            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5933         } else {
5934            HReg rHi, rLo, dstHi, dstLo;
5935            iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5936            lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5937            addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5938            addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5939         }
5940         return;
5941      }
5942      if (ty == Ity_F64) {
5943         HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5944         HReg dst = lookupIRTemp(env, tmp);
5945         addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5946         return;
5947      }
5948      if (ty == Ity_F32) {
5949         HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5950         HReg dst = lookupIRTemp(env, tmp);
5951         addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5952         return;
5953      }
5954      if (ty == Ity_V128) {
5955         HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5956         HReg dst = lookupIRTemp(env, tmp);
5957         addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5958         return;
5959      }
5960      break;
5961   }
5962
5963   /* --------- Call to DIRTY helper --------- */
5964   /* call complex ("dirty") helper function */
5965   case Ist_Dirty: {
5966      IRDirty* d = stmt->Ist.Dirty.details;
5967
5968      /* Figure out the return type, if any. */
5969      IRType retty = Ity_INVALID;
5970      if (d->tmp != IRTemp_INVALID)
5971         retty = typeOfIRTemp(env->type_env, d->tmp);
5972
5973      Bool retty_ok = False;
5974      switch (retty) {
5975         case Ity_INVALID: /* function doesn't return anything */
5976         case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
5977         //case Ity_V128: //ATC
5978            retty_ok = True; break;
5979         default:
5980            break;
5981      }
5982      if (!retty_ok)
5983         break; /* will go to stmt_fail: */
5984
5985      /* Marshal args, do the call, and set the return value to 0x555..555
5986         if this is a conditional call that returns a value and the
5987         call is skipped. */
5988      UInt   addToSp = 0;
5989      RetLoc rloc    = mk_RetLoc_INVALID();
5990      doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
5991      vassert(is_sane_RetLoc(rloc));
5992
5993      /* Now figure out what to do with the returned value, if any. */
5994      switch (retty) {
5995         case Ity_INVALID: {
5996            /* No return value.  Nothing to do. */
5997            vassert(d->tmp == IRTemp_INVALID);
5998            vassert(rloc.pri == RLPri_None);
5999            vassert(addToSp == 0);
6000            return;
6001         }
6002         case Ity_I64: {
6003            vassert(rloc.pri == RLPri_2Int);
6004            vassert(addToSp == 0);
6005            if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6006               HReg tmp = lookupIRTemp(env, d->tmp);
6007               addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
6008                                                        hregARM_R0()));
6009            } else {
6010               HReg dstHi, dstLo;
6011               /* The returned value is in r1:r0.  Park it in the
6012                  register-pair associated with tmp. */
6013               lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
6014               addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
6015               addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
6016            }
6017            return;
6018         }
6019         case Ity_I32: case Ity_I16: case Ity_I8: {
6020            vassert(rloc.pri == RLPri_Int);
6021            vassert(addToSp == 0);
6022            /* The returned value is in r0.  Park it in the register
6023               associated with tmp. */
6024            HReg dst = lookupIRTemp(env, d->tmp);
6025            addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
6026            return;
6027         }
6028         case Ity_V128: {
6029            vassert(0); // ATC.  The code that this produces really
6030            // needs to be looked at, to verify correctness.
6031            // I don't think this can ever happen though, since the
6032            // ARM front end never produces 128-bit loads/stores.
6033            // Hence the following is mostly theoretical.
6034            /* The returned value is on the stack, and *retloc tells
6035               us where.  Fish it off the stack and then move the
6036               stack pointer upwards to clear it, as directed by
6037               doHelperCall. */
6038            vassert(rloc.pri == RLPri_V128SpRel);
6039            vassert(rloc.spOff < 256); // else ARMRI84_I84(_,0) can't encode it
6040            vassert(addToSp >= 16);
6041            vassert(addToSp < 256); // ditto reason as for rloc.spOff
6042            HReg dst = lookupIRTemp(env, d->tmp);
6043            HReg tmp = newVRegI(env);
6044            HReg r13 = hregARM_R13(); // sp
6045            addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6046                                       tmp, r13, ARMRI84_I84(rloc.spOff,0)));
6047            ARMAModeN* am = mkARMAModeN_R(tmp);
6048            addInstr(env, ARMInstr_NLdStQ(True/*load*/, dst, am));
6049            addInstr(env, ARMInstr_Alu(ARMalu_ADD,
6050                                       r13, r13, ARMRI84_I84(addToSp,0)));
6051            return;
6052         }
6053         default:
6054            /*NOTREACHED*/
6055            vassert(0);
6056      }
6057      break;
6058   }
6059
6060   /* --------- Load Linked and Store Conditional --------- */
6061   case Ist_LLSC: {
6062      if (stmt->Ist.LLSC.storedata == NULL) {
6063         /* LL */
6064         IRTemp res = stmt->Ist.LLSC.result;
6065         IRType ty  = typeOfIRTemp(env->type_env, res);
6066         if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6067            Int  szB   = 0;
6068            HReg r_dst = lookupIRTemp(env, res);
6069            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6070            switch (ty) {
6071               case Ity_I8:  szB = 1; break;
6072               case Ity_I16: szB = 2; break;
6073               case Ity_I32: szB = 4; break;
6074               default:      vassert(0);
6075            }
6076            addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6077            addInstr(env, ARMInstr_LdrEX(szB));
6078            addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
6079            return;
6080         }
6081         if (ty == Ity_I64) {
6082            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6083            addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
6084            addInstr(env, ARMInstr_LdrEX(8));
6085            /* Result is in r3:r2.  On a non-NEON capable CPU, we must
6086               move it into a result register pair.  On a NEON capable
6087               CPU, the result register will be a 64 bit NEON
6088               register, so we must move it there instead. */
6089            if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6090               HReg dst = lookupIRTemp(env, res);
6091               addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
6092                                                        hregARM_R2()));
6093            } else {
6094               HReg r_dst_hi, r_dst_lo;
6095               lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
6096               addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
6097               addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
6098            }
6099            return;
6100         }
6101         /*NOTREACHED*/
6102         vassert(0);
6103      } else {
6104         /* SC */
6105         IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6106         if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
6107            Int  szB = 0;
6108            HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6109            HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6110            switch (tyd) {
6111               case Ity_I8:  szB = 1; break;
6112               case Ity_I16: szB = 2; break;
6113               case Ity_I32: szB = 4; break;
6114               default:      vassert(0);
6115            }
6116            addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
6117            addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6118            addInstr(env, ARMInstr_StrEX(szB));
6119         } else {
6120            vassert(tyd == Ity_I64);
6121            /* This is really ugly.  There is no is/is-not NEON
6122               decision akin to the case for LL, because iselInt64Expr
6123               fudges this for us, and always gets the result into two
6124               GPRs even if this means moving it from a NEON
6125               register. */
6126            HReg rDhi, rDlo;
6127            iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
6128            HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6129            addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
6130            addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
6131            addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
6132            addInstr(env, ARMInstr_StrEX(8));
6133         }
6134         /* now r0 is 1 if failed, 0 if success.  Change to IR
6135            conventions (0 is fail, 1 is success).  Also transfer
6136            result to r_res. */
6137         IRTemp   res   = stmt->Ist.LLSC.result;
6138         IRType   ty    = typeOfIRTemp(env->type_env, res);
6139         HReg     r_res = lookupIRTemp(env, res);
6140         ARMRI84* one   = ARMRI84_I84(1,0);
6141         vassert(ty == Ity_I1);
6142         addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
6143         /* And be conservative -- mask off all but the lowest bit */
6144         addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
6145         return;
6146      }
6147      break;
6148   }
6149
6150   /* --------- MEM FENCE --------- */
6151   case Ist_MBE:
6152      switch (stmt->Ist.MBE.event) {
6153         case Imbe_Fence:
6154            addInstr(env, ARMInstr_MFence());
6155            return;
6156         case Imbe_CancelReservation:
6157            addInstr(env, ARMInstr_CLREX());
6158            return;
6159         default:
6160            break;
6161      }
6162      break;
6163
6164   /* --------- INSTR MARK --------- */
6165   /* Doesn't generate any executable code ... */
6166   case Ist_IMark:
6167       return;
6168
6169   /* --------- NO-OP --------- */
6170   case Ist_NoOp:
6171       return;
6172
6173   /* --------- EXIT --------- */
6174   case Ist_Exit: {
6175      if (stmt->Ist.Exit.dst->tag != Ico_U32)
6176         vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
6177
6178      ARMCondCode cc     = iselCondCode(env, stmt->Ist.Exit.guard);
6179      ARMAMode1*  amR15T = ARMAMode1_RI(hregARM_R8(),
6180                                        stmt->Ist.Exit.offsIP);
6181
6182      /* Case: boring transfer to known address */
6183      if (stmt->Ist.Exit.jk == Ijk_Boring
6184          || stmt->Ist.Exit.jk == Ijk_Call
6185          || stmt->Ist.Exit.jk == Ijk_Ret) {
6186         if (env->chainingAllowed) {
6187            /* .. almost always true .. */
6188            /* Skip the event check at the dst if this is a forwards
6189               edge. */
6190            Bool toFastEP
6191               = stmt->Ist.Exit.dst->Ico.U32 > env->max_ga;
6192            if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6193            addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6194                                           amR15T, cc, toFastEP));
6195         } else {
6196            /* .. very occasionally .. */
6197            /* We can't use chaining, so ask for an assisted transfer,
6198               as that's the only alternative that is allowable. */
6199            HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6200            addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6201         }
6202         return;
6203      }
6204
6205      /* Case: assisted transfer to arbitrary address */
6206      switch (stmt->Ist.Exit.jk) {
6207         /* Keep this list in sync with that in iselNext below */
6208         case Ijk_ClientReq:
6209         case Ijk_NoDecode:
6210         case Ijk_NoRedir:
6211         case Ijk_Sys_syscall:
6212         case Ijk_InvalICache:
6213         case Ijk_Yield:
6214         {
6215            HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6216            addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6217                                             stmt->Ist.Exit.jk));
6218            return;
6219         }
6220         default:
6221            break;
6222      }
6223
6224      /* Do we ever expect to see any other kind? */
6225      goto stmt_fail;
6226   }
6227
6228   default: break;
6229   }
6230  stmt_fail:
6231   ppIRStmt(stmt);
6232   vpanic("iselStmt");
6233}
6234
6235
6236/*---------------------------------------------------------*/
6237/*--- ISEL: Basic block terminators (Nexts)             ---*/
6238/*---------------------------------------------------------*/
6239
6240static void iselNext ( ISelEnv* env,
6241                       IRExpr* next, IRJumpKind jk, Int offsIP )
6242{
6243   if (vex_traceflags & VEX_TRACE_VCODE) {
6244      vex_printf( "\n-- PUT(%d) = ", offsIP);
6245      ppIRExpr( next );
6246      vex_printf( "; exit-");
6247      ppIRJumpKind(jk);
6248      vex_printf( "\n");
6249   }
6250
6251   /* Case: boring transfer to known address */
6252   if (next->tag == Iex_Const) {
6253      IRConst* cdst = next->Iex.Const.con;
6254      vassert(cdst->tag == Ico_U32);
6255      if (jk == Ijk_Boring || jk == Ijk_Call) {
6256         /* Boring transfer to known address */
6257         ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6258         if (env->chainingAllowed) {
6259            /* .. almost always true .. */
6260            /* Skip the event check at the dst if this is a forwards
6261               edge. */
6262            Bool toFastEP
6263               = cdst->Ico.U32 > env->max_ga;
6264            if (0) vex_printf("%s", toFastEP ? "X" : ".");
6265            addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6266                                           amR15T, ARMcc_AL,
6267                                           toFastEP));
6268         } else {
6269            /* .. very occasionally .. */
6270            /* We can't use chaining, so ask for an assisted transfer,
6271               as that's the only alternative that is allowable. */
6272            HReg r = iselIntExpr_R(env, next);
6273            addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6274                                             Ijk_Boring));
6275         }
6276         return;
6277      }
6278   }
6279
6280   /* Case: call/return (==boring) transfer to any address */
6281   switch (jk) {
6282      case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6283         HReg       r      = iselIntExpr_R(env, next);
6284         ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6285         if (env->chainingAllowed) {
6286            addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6287         } else {
6288            addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6289                                                Ijk_Boring));
6290         }
6291         return;
6292      }
6293      default:
6294         break;
6295   }
6296
6297   /* Case: assisted transfer to arbitrary address */
6298   switch (jk) {
6299      /* Keep this list in sync with that for Ist_Exit above */
6300      case Ijk_ClientReq:
6301      case Ijk_NoDecode:
6302      case Ijk_NoRedir:
6303      case Ijk_Sys_syscall:
6304      case Ijk_InvalICache:
6305      case Ijk_Yield:
6306      {
6307         HReg       r      = iselIntExpr_R(env, next);
6308         ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6309         addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6310         return;
6311      }
6312      default:
6313         break;
6314   }
6315
6316   vex_printf( "\n-- PUT(%d) = ", offsIP);
6317   ppIRExpr( next );
6318   vex_printf( "; exit-");
6319   ppIRJumpKind(jk);
6320   vex_printf( "\n");
6321   vassert(0); // are we expecting any other kind?
6322}
6323
6324
6325/*---------------------------------------------------------*/
6326/*--- Insn selector top-level                           ---*/
6327/*---------------------------------------------------------*/
6328
6329/* Translate an entire SB to arm code. */
6330
6331HInstrArray* iselSB_ARM ( const IRSB* bb,
6332                          VexArch      arch_host,
6333                          const VexArchInfo* archinfo_host,
6334                          const VexAbiInfo*  vbi/*UNUSED*/,
6335                          Int offs_Host_EvC_Counter,
6336                          Int offs_Host_EvC_FailAddr,
6337                          Bool chainingAllowed,
6338                          Bool addProfInc,
6339                          Addr max_ga )
6340{
6341   Int       i, j;
6342   HReg      hreg, hregHI;
6343   ISelEnv*  env;
6344   UInt      hwcaps_host = archinfo_host->hwcaps;
6345   ARMAMode1 *amCounter, *amFailAddr;
6346
6347   /* sanity ... */
6348   vassert(arch_host == VexArchARM);
6349
6350   /* Check that the host's endianness is as expected. */
6351   vassert(archinfo_host->endness == VexEndnessLE);
6352
6353   /* guard against unexpected space regressions */
6354   vassert(sizeof(ARMInstr) <= 28);
6355
6356   /* hwcaps should not change from one ISEL call to another. */
6357   arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
6358
6359   /* Make up an initial environment to use. */
6360   env = LibVEX_Alloc_inline(sizeof(ISelEnv));
6361   env->vreg_ctr = 0;
6362
6363   /* Set up output code array. */
6364   env->code = newHInstrArray();
6365
6366   /* Copy BB's type env. */
6367   env->type_env = bb->tyenv;
6368
6369   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
6370      change as we go along. */
6371   env->n_vregmap = bb->tyenv->types_used;
6372   env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6373   env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
6374
6375   /* and finally ... */
6376   env->chainingAllowed = chainingAllowed;
6377   env->hwcaps          = hwcaps_host;
6378   env->max_ga          = max_ga;
6379
6380   /* For each IR temporary, allocate a suitably-kinded virtual
6381      register. */
6382   j = 0;
6383   for (i = 0; i < env->n_vregmap; i++) {
6384      hregHI = hreg = INVALID_HREG;
6385      switch (bb->tyenv->types[i]) {
6386         case Ity_I1:
6387         case Ity_I8:
6388         case Ity_I16:
6389         case Ity_I32:  hreg   = mkHReg(True, HRcInt32, 0, j++); break;
6390         case Ity_I64:
6391            if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
6392               hreg = mkHReg(True, HRcFlt64, 0, j++);
6393            } else {
6394               hregHI = mkHReg(True, HRcInt32, 0, j++);
6395               hreg   = mkHReg(True, HRcInt32, 0, j++);
6396            }
6397            break;
6398         case Ity_F32:  hreg   = mkHReg(True, HRcFlt32,  0, j++); break;
6399         case Ity_F64:  hreg   = mkHReg(True, HRcFlt64,  0, j++); break;
6400         case Ity_V128: hreg   = mkHReg(True, HRcVec128, 0, j++); break;
6401         default: ppIRType(bb->tyenv->types[i]);
6402                  vpanic("iselBB: IRTemp type");
6403      }
6404      env->vregmap[i]   = hreg;
6405      env->vregmapHI[i] = hregHI;
6406   }
6407   env->vreg_ctr = j;
6408
6409   /* The very first instruction must be an event check. */
6410   amCounter  = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6411   amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6412   addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6413
6414   /* Possibly a block counter increment (for profiling).  At this
6415      point we don't know the address of the counter, so just pretend
6416      it is zero.  It will have to be patched later, but before this
6417      translation is used, by a call to LibVEX_patchProfCtr. */
6418   if (addProfInc) {
6419      addInstr(env, ARMInstr_ProfInc());
6420   }
6421
6422   /* Ok, finally we can iterate over the statements. */
6423   for (i = 0; i < bb->stmts_used; i++)
6424      iselStmt(env, bb->stmts[i]);
6425
6426   iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6427
6428   /* record the number of vregs we used. */
6429   env->code->n_vregs = env->vreg_ctr;
6430   return env->code;
6431}
6432
6433
6434/*---------------------------------------------------------------*/
6435/*--- end                                     host_arm_isel.c ---*/
6436/*---------------------------------------------------------------*/
6437
6438