1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_arm_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2012 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2012 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39#include "ir_match.h"
40
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"  // for 32-bit SIMD helpers
45#include "host_arm_defs.h"
46
47
48/*---------------------------------------------------------*/
49/*--- ARMvfp control word stuff                         ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53   exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54   flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
55   this corresponds to a FPSCR value of zero.
56
57   fpscr should therefore be zero on entry to Vex-generated code, and
58   should be unchanged at exit.  (Or at least the bottom 28 bits
59   should be zero).
60*/
61
62#define DEFAULT_FPSCR 0
63
64
65/*---------------------------------------------------------*/
66/*--- ISelEnv                                           ---*/
67/*---------------------------------------------------------*/
68
69/* This carries around:
70
71   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72     might encounter.  This is computed before insn selection starts,
73     and does not change.
74
75   - A mapping from IRTemp to HReg.  This tells the insn selector
76     which virtual register(s) are associated with each IRTemp
77     temporary.  This is computed before insn selection starts, and
78     does not change.  We expect this mapping to map precisely the
79     same set of IRTemps as the type mapping does.
80
81        - vregmap   holds the primary register for the IRTemp.
82        - vregmapHI is only used for 64-bit integer-typed
83             IRTemps.  It holds the identity of a second
84             32-bit virtual HReg, which holds the high half
85             of the value.
86
87   - The code array, that is, the insns selected so far.
88
89   - A counter, for generating new virtual registers.
90
91   - The host hardware capabilities word.  This is set at the start
92     and does not change.
93
94   - A Bool for indicating whether we may generate chain-me
95     instructions for control flow transfers, or whether we must use
96     XAssisted.
97
98   - The maximum guest address of any guest insn in this block.
99     Actually, the address of the highest-addressed byte from any insn
100     in this block.  Is set at the start and does not change.  This is
101     used for detecting jumps which are definitely forward-edges from
102     this block, and therefore can be made (chained) to the fast entry
103     point of the destination, thereby avoiding the destination's
104     event check.
105
106   Note, this is all (well, mostly) host-independent.
107*/
108
109typedef
110   struct {
111      /* Constant -- are set at the start and do not change. */
112      IRTypeEnv*   type_env;
113
114      HReg*        vregmap;
115      HReg*        vregmapHI;
116      Int          n_vregmap;
117
118      UInt         hwcaps;
119
120      Bool         chainingAllowed;
121      Addr64       max_ga;
122
123      /* These are modified as we go along. */
124      HInstrArray* code;
125      Int          vreg_ctr;
126   }
127   ISelEnv;
128
129static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
130{
131   vassert(tmp >= 0);
132   vassert(tmp < env->n_vregmap);
133   return env->vregmap[tmp];
134}
135
136static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
137{
138   vassert(tmp >= 0);
139   vassert(tmp < env->n_vregmap);
140   vassert(env->vregmapHI[tmp] != INVALID_HREG);
141   *vrLO = env->vregmap[tmp];
142   *vrHI = env->vregmapHI[tmp];
143}
144
145static void addInstr ( ISelEnv* env, ARMInstr* instr )
146{
147   addHInstr(env->code, instr);
148   if (vex_traceflags & VEX_TRACE_VCODE) {
149      ppARMInstr(instr);
150      vex_printf("\n");
151   }
152#if 0
153   if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
154         || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
155         || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
156      ppARMInstr(instr);
157      vex_printf("\n");
158   }
159#endif
160}
161
162static HReg newVRegI ( ISelEnv* env )
163{
164   HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
165   env->vreg_ctr++;
166   return reg;
167}
168
169static HReg newVRegD ( ISelEnv* env )
170{
171   HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
172   env->vreg_ctr++;
173   return reg;
174}
175
176static HReg newVRegF ( ISelEnv* env )
177{
178   HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
179   env->vreg_ctr++;
180   return reg;
181}
182
183static HReg newVRegV ( ISelEnv* env )
184{
185   HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
186   env->vreg_ctr++;
187   return reg;
188}
189
190/* These are duplicated in guest_arm_toIR.c */
191static IRExpr* unop ( IROp op, IRExpr* a )
192{
193   return IRExpr_Unop(op, a);
194}
195
196static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
197{
198   return IRExpr_Binop(op, a1, a2);
199}
200
201static IRExpr* bind ( Int binder )
202{
203   return IRExpr_Binder(binder);
204}
205
206
207/*---------------------------------------------------------*/
208/*--- ISEL: Forward declarations                        ---*/
209/*---------------------------------------------------------*/
210
211/* These are organised as iselXXX and iselXXX_wrk pairs.  The
212   iselXXX_wrk do the real work, but are not to be called directly.
213   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
214   checks that all returned registers are virtual.  You should not
215   call the _wrk version directly.
216*/
217static ARMAMode1*  iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
218static ARMAMode1*  iselIntExpr_AMode1     ( ISelEnv* env, IRExpr* e );
219
220static ARMAMode2*  iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
221static ARMAMode2*  iselIntExpr_AMode2     ( ISelEnv* env, IRExpr* e );
222
223static ARMAModeV*  iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
224static ARMAModeV*  iselIntExpr_AModeV     ( ISelEnv* env, IRExpr* e );
225
226static ARMAModeN*  iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
227static ARMAModeN*  iselIntExpr_AModeN     ( ISelEnv* env, IRExpr* e );
228
229static ARMRI84*    iselIntExpr_RI84_wrk
230        ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
231static ARMRI84*    iselIntExpr_RI84
232        ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
233
234static ARMRI5*     iselIntExpr_RI5_wrk    ( ISelEnv* env, IRExpr* e );
235static ARMRI5*     iselIntExpr_RI5        ( ISelEnv* env, IRExpr* e );
236
237static ARMCondCode iselCondCode_wrk       ( ISelEnv* env, IRExpr* e );
238static ARMCondCode iselCondCode           ( ISelEnv* env, IRExpr* e );
239
240static HReg        iselIntExpr_R_wrk      ( ISelEnv* env, IRExpr* e );
241static HReg        iselIntExpr_R          ( ISelEnv* env, IRExpr* e );
242
243static void        iselInt64Expr_wrk      ( HReg* rHi, HReg* rLo,
244                                            ISelEnv* env, IRExpr* e );
245static void        iselInt64Expr          ( HReg* rHi, HReg* rLo,
246                                            ISelEnv* env, IRExpr* e );
247
248static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
249static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
250
251static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
252static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
253
254static HReg        iselNeon64Expr_wrk     ( ISelEnv* env, IRExpr* e );
255static HReg        iselNeon64Expr         ( ISelEnv* env, IRExpr* e );
256
257static HReg        iselNeonExpr_wrk       ( ISelEnv* env, IRExpr* e );
258static HReg        iselNeonExpr           ( ISelEnv* env, IRExpr* e );
259
260/*---------------------------------------------------------*/
261/*--- ISEL: Misc helpers                                ---*/
262/*---------------------------------------------------------*/
263
264static UInt ROR32 ( UInt x, UInt sh ) {
265   vassert(sh >= 0 && sh < 32);
266   if (sh == 0)
267      return x;
268   else
269      return (x << (32-sh)) | (x >> sh);
270}
271
272/* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
273   form, and if so return the components. */
274static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
275{
276   UInt i;
277   for (i = 0; i < 16; i++) {
278      if (0 == (u & 0xFFFFFF00)) {
279         *u8 = u;
280         *u4 = i;
281         return True;
282      }
283      u = ROR32(u, 30);
284   }
285   vassert(i == 16);
286   return False;
287}
288
289/* Make a int reg-reg move. */
290static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
291{
292   vassert(hregClass(src) == HRcInt32);
293   vassert(hregClass(dst) == HRcInt32);
294   return ARMInstr_Mov(dst, ARMRI84_R(src));
295}
296
297/* Set the VFP unit's rounding mode to default (round to nearest). */
298static void set_VFP_rounding_default ( ISelEnv* env )
299{
300   /* mov rTmp, #DEFAULT_FPSCR
301      fmxr fpscr, rTmp
302   */
303   HReg rTmp = newVRegI(env);
304   addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
305   addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
306}
307
308/* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
309   expression denoting a value in the range 0 .. 3, indicating a round
310   mode encoded as per type IRRoundingMode.  Set FPSCR to have the
311   same rounding.
312*/
313static
314void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
315{
316   /* This isn't simple, because 'mode' carries an IR rounding
317      encoding, and we need to translate that to an ARMvfp one:
318      The IR encoding:
319         00  to nearest (the default)
320         10  to +infinity
321         01  to -infinity
322         11  to zero
323      The ARMvfp encoding:
324         00  to nearest
325         01  to +infinity
326         10  to -infinity
327         11  to zero
328      Easy enough to do; just swap the two bits.
329   */
330   HReg irrm = iselIntExpr_R(env, mode);
331   HReg tL   = newVRegI(env);
332   HReg tR   = newVRegI(env);
333   HReg t3   = newVRegI(env);
334   /* tL = irrm << 1;
335      tR = irrm >> 1;  if we're lucky, these will issue together
336      tL &= 2;
337      tR &= 1;         ditto
338      t3 = tL | tR;
339      t3 <<= 22;
340      fmxr fpscr, t3
341   */
342   addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
343   addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
344   addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
345   addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
346   addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
347   addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
348   addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
349}
350
351
352/*---------------------------------------------------------*/
353/*--- ISEL: Function call helpers                       ---*/
354/*---------------------------------------------------------*/
355
356/* Used only in doHelperCall.  See big comment in doHelperCall re
357   handling of register-parameter args.  This function figures out
358   whether evaluation of an expression might require use of a fixed
359   register.  If in doubt return True (safe but suboptimal).
360*/
361static
362Bool mightRequireFixedRegs ( IRExpr* e )
363{
364   switch (e->tag) {
365   case Iex_RdTmp: case Iex_Const: case Iex_Get:
366      return False;
367   default:
368      return True;
369   }
370}
371
372
373/* Do a complete function call.  guard is a Ity_Bit expression
374   indicating whether or not the call happens.  If guard==NULL, the
375   call is unconditional.  Returns True iff it managed to handle this
376   combination of arg/return types, else returns False. */
377
378static
379Bool doHelperCall ( ISelEnv* env,
380                    Bool passBBP,
381                    IRExpr* guard, IRCallee* cee, IRExpr** args )
382{
383   ARMCondCode cc;
384   HReg        argregs[ARM_N_ARGREGS];
385   HReg        tmpregs[ARM_N_ARGREGS];
386   Bool        go_fast;
387   Int         n_args, i, nextArgReg;
388   ULong       target;
389
390   vassert(ARM_N_ARGREGS == 4);
391
392   /* Marshal args for a call and do the call.
393
394      If passBBP is True, r8 (the baseblock pointer) is to be passed
395      as the first arg.
396
397      This function only deals with a tiny set of possibilities, which
398      cover all helpers in practice.  The restrictions are that only
399      arguments in registers are supported, hence only ARM_N_REGPARMS
400      x 32 integer bits in total can be passed.  In fact the only
401      supported arg types are I32 and I64.
402
403      Generating code which is both efficient and correct when
404      parameters are to be passed in registers is difficult, for the
405      reasons elaborated in detail in comments attached to
406      doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
407      of the method described in those comments.
408
409      The problem is split into two cases: the fast scheme and the
410      slow scheme.  In the fast scheme, arguments are computed
411      directly into the target (real) registers.  This is only safe
412      when we can be sure that computation of each argument will not
413      trash any real registers set by computation of any other
414      argument.
415
416      In the slow scheme, all args are first computed into vregs, and
417      once they are all done, they are moved to the relevant real
418      regs.  This always gives correct code, but it also gives a bunch
419      of vreg-to-rreg moves which are usually redundant but are hard
420      for the register allocator to get rid of.
421
422      To decide which scheme to use, all argument expressions are
423      first examined.  If they are all so simple that it is clear they
424      will be evaluated without use of any fixed registers, use the
425      fast scheme, else use the slow scheme.  Note also that only
426      unconditional calls may use the fast scheme, since having to
427      compute a condition expression could itself trash real
428      registers.
429
430      Note this requires being able to examine an expression and
431      determine whether or not evaluation of it might use a fixed
432      register.  That requires knowledge of how the rest of this insn
433      selector works.  Currently just the following 3 are regarded as
434      safe -- hopefully they cover the majority of arguments in
435      practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
436   */
437
438   /* Note that the cee->regparms field is meaningless on ARM hosts
439      (since there is only one calling convention) and so we always
440      ignore it. */
441
442   n_args = 0;
443   for (i = 0; args[i]; i++)
444      n_args++;
445
446   argregs[0] = hregARM_R0();
447   argregs[1] = hregARM_R1();
448   argregs[2] = hregARM_R2();
449   argregs[3] = hregARM_R3();
450
451   tmpregs[0] = tmpregs[1] = tmpregs[2] =
452   tmpregs[3] = INVALID_HREG;
453
454   /* First decide which scheme (slow or fast) is to be used.  First
455      assume the fast scheme, and select slow if any contraindications
456      (wow) appear. */
457
458   go_fast = True;
459
460   if (guard) {
461      if (guard->tag == Iex_Const
462          && guard->Iex.Const.con->tag == Ico_U1
463          && guard->Iex.Const.con->Ico.U1 == True) {
464         /* unconditional */
465      } else {
466         /* Not manifestly unconditional -- be conservative. */
467         go_fast = False;
468      }
469   }
470
471   if (go_fast) {
472      for (i = 0; i < n_args; i++) {
473         if (mightRequireFixedRegs(args[i])) {
474            go_fast = False;
475            break;
476         }
477      }
478   }
479   /* At this point the scheme to use has been established.  Generate
480      code to get the arg values into the argument rregs.  If we run
481      out of arg regs, give up. */
482
483   if (go_fast) {
484
485      /* FAST SCHEME */
486      nextArgReg = 0;
487      if (passBBP) {
488         addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
489                                     hregARM_R8() ));
490         nextArgReg++;
491      }
492
493      for (i = 0; i < n_args; i++) {
494         IRType aTy = typeOfIRExpr(env->type_env, args[i]);
495         if (nextArgReg >= ARM_N_ARGREGS)
496            return False; /* out of argregs */
497         if (aTy == Ity_I32) {
498            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
499                                        iselIntExpr_R(env, args[i]) ));
500            nextArgReg++;
501         }
502         else if (aTy == Ity_I64) {
503            /* 64-bit args must be passed in an a reg-pair of the form
504               n:n+1, where n is even.  Hence either r0:r1 or r2:r3.
505               On a little-endian host, the less significant word is
506               passed in the lower-numbered register. */
507            if (nextArgReg & 1) {
508               if (nextArgReg >= ARM_N_ARGREGS)
509                  return False; /* out of argregs */
510               addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
511               nextArgReg++;
512            }
513            if (nextArgReg >= ARM_N_ARGREGS)
514               return False; /* out of argregs */
515            HReg raHi, raLo;
516            iselInt64Expr(&raHi, &raLo, env, args[i]);
517            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
518            nextArgReg++;
519            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
520            nextArgReg++;
521         }
522         else
523            return False; /* unhandled arg type */
524      }
525
526      /* Fast scheme only applies for unconditional calls.  Hence: */
527      cc = ARMcc_AL;
528
529   } else {
530
531      /* SLOW SCHEME; move via temporaries */
532      nextArgReg = 0;
533
534      if (passBBP) {
535         /* This is pretty stupid; better to move directly to r0
536            after the rest of the args are done. */
537         tmpregs[nextArgReg] = newVRegI(env);
538         addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
539                                     hregARM_R8() ));
540         nextArgReg++;
541      }
542
543      for (i = 0; i < n_args; i++) {
544         IRType aTy = typeOfIRExpr(env->type_env, args[i]);
545         if (nextArgReg >= ARM_N_ARGREGS)
546            return False; /* out of argregs */
547         if (aTy == Ity_I32) {
548            tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
549            nextArgReg++;
550         }
551         else if (aTy == Ity_I64) {
552            /* Same comment applies as in the Fast-scheme case. */
553            if (nextArgReg & 1)
554               nextArgReg++;
555            if (nextArgReg + 1 >= ARM_N_ARGREGS)
556               return False; /* out of argregs */
557            HReg raHi, raLo;
558            iselInt64Expr(&raHi, &raLo, env, args[i]);
559            tmpregs[nextArgReg] = raLo;
560            nextArgReg++;
561            tmpregs[nextArgReg] = raHi;
562            nextArgReg++;
563         }
564      }
565
566      /* Now we can compute the condition.  We can't do it earlier
567         because the argument computations could trash the condition
568         codes.  Be a bit clever to handle the common case where the
569         guard is 1:Bit. */
570      cc = ARMcc_AL;
571      if (guard) {
572         if (guard->tag == Iex_Const
573             && guard->Iex.Const.con->tag == Ico_U1
574             && guard->Iex.Const.con->Ico.U1 == True) {
575            /* unconditional -- do nothing */
576         } else {
577            cc = iselCondCode( env, guard );
578         }
579      }
580
581      /* Move the args to their final destinations. */
582      for (i = 0; i < nextArgReg; i++) {
583         if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
584            addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
585            continue;
586         }
587         /* None of these insns, including any spill code that might
588            be generated, may alter the condition codes. */
589         addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
590      }
591
592   }
593
594   /* Should be assured by checks above */
595   vassert(nextArgReg <= ARM_N_ARGREGS);
596
597   target = (HWord)Ptr_to_ULong(cee->addr);
598
599   /* nextArgReg doles out argument registers.  Since these are
600      assigned in the order r0, r1, r2, r3, its numeric value at this
601      point, which must be between 0 and 4 inclusive, is going to be
602      equal to the number of arg regs in use for the call.  Hence bake
603      that number into the call (we'll need to know it when doing
604      register allocation, to know what regs the call reads.)
605
606      There is a bit of a twist -- harmless but worth recording.
607      Suppose the arg types are (Ity_I32, Ity_I64).  Then we will have
608      the first arg in r0 and the second in r3:r2, but r1 isn't used.
609      We nevertheless have nextArgReg==4 and bake that into the call
610      instruction.  This will mean the register allocator wil believe
611      this insn reads r1 when in fact it doesn't.  But that's
612      harmless; it just artificially extends the live range of r1
613      unnecessarily.  The best fix would be to put into the
614      instruction, a bitmask indicating which of r0/1/2/3 carry live
615      values.  But that's too much hassle. */
616
617   /* Finally, the call itself. */
618   addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
619
620   return True; /* success */
621}
622
623
624/*---------------------------------------------------------*/
625/*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
626/*---------------------------------------------------------*/
627
628/* Select insns for an integer-typed expression, and add them to the
629   code list.  Return a reg holding the result.  This reg will be a
630   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
631   want to modify it, ask for a new vreg, copy it in there, and modify
632   the copy.  The register allocator will do its best to map both
633   vregs to the same real register, so the copies will often disappear
634   later in the game.
635
636   This should handle expressions of 32, 16 and 8-bit type.  All
637   results are returned in a 32-bit register.  For 16- and 8-bit
638   expressions, the upper 16/24 bits are arbitrary, so you should mask
639   or sign extend partial values if necessary.
640*/
641
642/* --------------------- AMode1 --------------------- */
643
644/* Return an AMode1 which computes the value of the specified
645   expression, possibly also adding insns to the code list as a
646   result.  The expression may only be a 32-bit one.
647*/
648
649static Bool sane_AMode1 ( ARMAMode1* am )
650{
651   switch (am->tag) {
652      case ARMam1_RI:
653         return
654            toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
655                    && (hregIsVirtual(am->ARMam1.RI.reg)
656                        || am->ARMam1.RI.reg == hregARM_R8())
657                    && am->ARMam1.RI.simm13 >= -4095
658                    && am->ARMam1.RI.simm13 <= 4095 );
659      case ARMam1_RRS:
660         return
661            toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
662                    && hregIsVirtual(am->ARMam1.RRS.base)
663                    && hregClass(am->ARMam1.RRS.index) == HRcInt32
664                    && hregIsVirtual(am->ARMam1.RRS.index)
665                    && am->ARMam1.RRS.shift >= 0
666                    && am->ARMam1.RRS.shift <= 3 );
667      default:
668         vpanic("sane_AMode: unknown ARM AMode1 tag");
669   }
670}
671
672static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
673{
674   ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
675   vassert(sane_AMode1(am));
676   return am;
677}
678
679static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
680{
681   IRType ty = typeOfIRExpr(env->type_env,e);
682   vassert(ty == Ity_I32);
683
684   /* FIXME: add RRS matching */
685
686   /* {Add32,Sub32}(expr,simm13) */
687   if (e->tag == Iex_Binop
688       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
689       && e->Iex.Binop.arg2->tag == Iex_Const
690       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
691      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
692      if (simm >= -4095 && simm <= 4095) {
693         HReg reg;
694         if (e->Iex.Binop.op == Iop_Sub32)
695            simm = -simm;
696         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
697         return ARMAMode1_RI(reg, simm);
698      }
699   }
700
701   /* Doesn't match anything in particular.  Generate it into
702      a register and use that. */
703   {
704      HReg reg = iselIntExpr_R(env, e);
705      return ARMAMode1_RI(reg, 0);
706   }
707
708}
709
710
711/* --------------------- AMode2 --------------------- */
712
713/* Return an AMode2 which computes the value of the specified
714   expression, possibly also adding insns to the code list as a
715   result.  The expression may only be a 32-bit one.
716*/
717
718static Bool sane_AMode2 ( ARMAMode2* am )
719{
720   switch (am->tag) {
721      case ARMam2_RI:
722         return
723            toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
724                    && hregIsVirtual(am->ARMam2.RI.reg)
725                    && am->ARMam2.RI.simm9 >= -255
726                    && am->ARMam2.RI.simm9 <= 255 );
727      case ARMam2_RR:
728         return
729            toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
730                    && hregIsVirtual(am->ARMam2.RR.base)
731                    && hregClass(am->ARMam2.RR.index) == HRcInt32
732                    && hregIsVirtual(am->ARMam2.RR.index) );
733      default:
734         vpanic("sane_AMode: unknown ARM AMode2 tag");
735   }
736}
737
738static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
739{
740   ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
741   vassert(sane_AMode2(am));
742   return am;
743}
744
745static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
746{
747   IRType ty = typeOfIRExpr(env->type_env,e);
748   vassert(ty == Ity_I32);
749
750   /* FIXME: add RR matching */
751
752   /* {Add32,Sub32}(expr,simm8) */
753   if (e->tag == Iex_Binop
754       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
755       && e->Iex.Binop.arg2->tag == Iex_Const
756       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
757      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
758      if (simm >= -255 && simm <= 255) {
759         HReg reg;
760         if (e->Iex.Binop.op == Iop_Sub32)
761            simm = -simm;
762         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
763         return ARMAMode2_RI(reg, simm);
764      }
765   }
766
767   /* Doesn't match anything in particular.  Generate it into
768      a register and use that. */
769   {
770      HReg reg = iselIntExpr_R(env, e);
771      return ARMAMode2_RI(reg, 0);
772   }
773
774}
775
776
777/* --------------------- AModeV --------------------- */
778
779/* Return an AModeV which computes the value of the specified
780   expression, possibly also adding insns to the code list as a
781   result.  The expression may only be a 32-bit one.
782*/
783
784static Bool sane_AModeV ( ARMAModeV* am )
785{
786  return toBool( hregClass(am->reg) == HRcInt32
787                 && hregIsVirtual(am->reg)
788                 && am->simm11 >= -1020 && am->simm11 <= 1020
789                 && 0 == (am->simm11 & 3) );
790}
791
792static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
793{
794   ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
795   vassert(sane_AModeV(am));
796   return am;
797}
798
799static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
800{
801   IRType ty = typeOfIRExpr(env->type_env,e);
802   vassert(ty == Ity_I32);
803
804   /* {Add32,Sub32}(expr, simm8 << 2) */
805   if (e->tag == Iex_Binop
806       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
807       && e->Iex.Binop.arg2->tag == Iex_Const
808       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
809      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
810      if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
811         HReg reg;
812         if (e->Iex.Binop.op == Iop_Sub32)
813            simm = -simm;
814         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
815         return mkARMAModeV(reg, simm);
816      }
817   }
818
819   /* Doesn't match anything in particular.  Generate it into
820      a register and use that. */
821   {
822      HReg reg = iselIntExpr_R(env, e);
823      return mkARMAModeV(reg, 0);
824   }
825
826}
827
828/* -------------------- AModeN -------------------- */
829
830static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
831{
832   return iselIntExpr_AModeN_wrk(env, e);
833}
834
835static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
836{
837   HReg reg = iselIntExpr_R(env, e);
838   return mkARMAModeN_R(reg);
839}
840
841
842/* --------------------- RI84 --------------------- */
843
844/* Select instructions to generate 'e' into a RI84.  If mayInv is
845   true, then the caller will also accept an I84 form that denotes
846   'not e'.  In this case didInv may not be NULL, and *didInv is set
847   to True.  This complication is so as to allow generation of an RI84
848   which is suitable for use in either an AND or BIC instruction,
849   without knowing (before this call) which one.
850*/
851static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
852                                   ISelEnv* env, IRExpr* e )
853{
854   ARMRI84* ri;
855   if (mayInv)
856      vassert(didInv != NULL);
857   ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
858   /* sanity checks ... */
859   switch (ri->tag) {
860      case ARMri84_I84:
861         return ri;
862      case ARMri84_R:
863         vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
864         vassert(hregIsVirtual(ri->ARMri84.R.reg));
865         return ri;
866      default:
867         vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
868   }
869}
870
871/* DO NOT CALL THIS DIRECTLY ! */
872static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
873                                       ISelEnv* env, IRExpr* e )
874{
875   IRType ty = typeOfIRExpr(env->type_env,e);
876   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
877
878   if (didInv) *didInv = False;
879
880   /* special case: immediate */
881   if (e->tag == Iex_Const) {
882      UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
883      switch (e->Iex.Const.con->tag) {
884         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
885         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
886         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
887         default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
888      }
889      if (fitsIn8x4(&u8, &u4, u)) {
890         return ARMRI84_I84( (UShort)u8, (UShort)u4 );
891      }
892      if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
893         vassert(didInv);
894         *didInv = True;
895         return ARMRI84_I84( (UShort)u8, (UShort)u4 );
896      }
897      /* else fail, fall through to default case */
898   }
899
900   /* default case: calculate into a register and return that */
901   {
902      HReg r = iselIntExpr_R ( env, e );
903      return ARMRI84_R(r);
904   }
905}
906
907
908/* --------------------- RI5 --------------------- */
909
910/* Select instructions to generate 'e' into a RI5. */
911
912static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
913{
914   ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
915   /* sanity checks ... */
916   switch (ri->tag) {
917      case ARMri5_I5:
918         return ri;
919      case ARMri5_R:
920         vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
921         vassert(hregIsVirtual(ri->ARMri5.R.reg));
922         return ri;
923      default:
924         vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
925   }
926}
927
928/* DO NOT CALL THIS DIRECTLY ! */
929static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
930{
931   IRType ty = typeOfIRExpr(env->type_env,e);
932   vassert(ty == Ity_I32 || ty == Ity_I8);
933
934   /* special case: immediate */
935   if (e->tag == Iex_Const) {
936      UInt u; /* both invalid */
937      switch (e->Iex.Const.con->tag) {
938         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
939         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
940         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
941         default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
942      }
943      if (u >= 1 && u <= 31) {
944         return ARMRI5_I5(u);
945      }
946      /* else fail, fall through to default case */
947   }
948
949   /* default case: calculate into a register and return that */
950   {
951      HReg r = iselIntExpr_R ( env, e );
952      return ARMRI5_R(r);
953   }
954}
955
956
957/* ------------------- CondCode ------------------- */
958
959/* Generate code to evaluated a bit-typed expression, returning the
960   condition code which would correspond when the expression would
961   notionally have returned 1. */
962
963static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
964{
965   ARMCondCode cc = iselCondCode_wrk(env,e);
966   vassert(cc != ARMcc_NV);
967   return cc;
968}
969
970static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
971{
972   vassert(e);
973   vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
974
975   /* var */
976   if (e->tag == Iex_RdTmp) {
977      HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
978      /* CmpOrTst doesn't modify rTmp; so this is OK. */
979      ARMRI84* one  = ARMRI84_I84(1,0);
980      addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
981      return ARMcc_NE;
982   }
983
984   /* Not1(e) */
985   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
986      /* Generate code for the arg, and negate the test condition */
987      return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
988   }
989
990   /* --- patterns rooted at: 32to1 --- */
991
992   if (e->tag == Iex_Unop
993       && e->Iex.Unop.op == Iop_32to1) {
994      HReg     rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
995      ARMRI84* one  = ARMRI84_I84(1,0);
996      addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
997      return ARMcc_NE;
998   }
999
1000   /* --- patterns rooted at: CmpNEZ8 --- */
1001
1002   if (e->tag == Iex_Unop
1003       && e->Iex.Unop.op == Iop_CmpNEZ8) {
1004      HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1005      ARMRI84* xFF  = ARMRI84_I84(0xFF,0);
1006      addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
1007      return ARMcc_NE;
1008   }
1009
1010   /* --- patterns rooted at: CmpNEZ32 --- */
1011
1012   if (e->tag == Iex_Unop
1013       && e->Iex.Unop.op == Iop_CmpNEZ32) {
1014      HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1015      ARMRI84* zero = ARMRI84_I84(0,0);
1016      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1017      return ARMcc_NE;
1018   }
1019
1020   /* --- patterns rooted at: CmpNEZ64 --- */
1021
1022   if (e->tag == Iex_Unop
1023       && e->Iex.Unop.op == Iop_CmpNEZ64) {
1024      HReg     tHi, tLo;
1025      HReg     tmp  = newVRegI(env);
1026      ARMRI84* zero = ARMRI84_I84(0,0);
1027      iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1028      addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1029      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1030      return ARMcc_NE;
1031   }
1032
1033   /* --- Cmp*32*(x,y) --- */
1034   if (e->tag == Iex_Binop
1035       && (e->Iex.Binop.op == Iop_CmpEQ32
1036           || e->Iex.Binop.op == Iop_CmpNE32
1037           || e->Iex.Binop.op == Iop_CmpLT32S
1038           || e->Iex.Binop.op == Iop_CmpLT32U
1039           || e->Iex.Binop.op == Iop_CmpLE32S
1040           || e->Iex.Binop.op == Iop_CmpLE32U)) {
1041      HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1042      ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1043                                       env, e->Iex.Binop.arg2);
1044      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1045      switch (e->Iex.Binop.op) {
1046         case Iop_CmpEQ32:  return ARMcc_EQ;
1047         case Iop_CmpNE32:  return ARMcc_NE;
1048         case Iop_CmpLT32S: return ARMcc_LT;
1049         case Iop_CmpLT32U: return ARMcc_LO;
1050         case Iop_CmpLE32S: return ARMcc_LE;
1051         case Iop_CmpLE32U: return ARMcc_LS;
1052         default: vpanic("iselCondCode(arm): CmpXX32");
1053      }
1054   }
1055
1056   /* --- CasCmpEQ* --- */
1057   /* Ist_Cas has a dummy argument to compare with, so comparison is
1058      always true. */
1059   if (e->tag == Iex_Binop
1060       && (e->Iex.Binop.op == Iop_CasCmpEQ32
1061           || e->Iex.Binop.op == Iop_CasCmpEQ16
1062           || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1063      return ARMcc_AL;
1064   }
1065
1066   ppIRExpr(e);
1067   vpanic("iselCondCode");
1068}
1069
1070
1071/* --------------------- Reg --------------------- */
1072
1073static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1074{
1075   HReg r = iselIntExpr_R_wrk(env, e);
1076   /* sanity checks ... */
1077#  if 0
1078   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1079#  endif
1080   vassert(hregClass(r) == HRcInt32);
1081   vassert(hregIsVirtual(r));
1082   return r;
1083}
1084
1085/* DO NOT CALL THIS DIRECTLY ! */
1086static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1087{
1088   IRType ty = typeOfIRExpr(env->type_env,e);
1089   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1090//   vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1091
1092   switch (e->tag) {
1093
1094   /* --------- TEMP --------- */
1095   case Iex_RdTmp: {
1096      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1097   }
1098
1099   /* --------- LOAD --------- */
1100   case Iex_Load: {
1101      HReg dst  = newVRegI(env);
1102
1103      if (e->Iex.Load.end != Iend_LE)
1104         goto irreducible;
1105
1106      if (ty == Ity_I32) {
1107         ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1108         addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
1109         return dst;
1110      }
1111      if (ty == Ity_I16) {
1112         ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1113         addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
1114                                       dst, amode));
1115         return dst;
1116      }
1117      if (ty == Ity_I8) {
1118         ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1119         addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
1120         return dst;
1121      }
1122
1123//zz      if (ty == Ity_I16) {
1124//zz         addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1125//zz         return dst;
1126//zz      }
1127//zz      if (ty == Ity_I8) {
1128//zz         addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1129//zz         return dst;
1130//zz      }
1131      break;
1132   }
1133
1134//zz   /* --------- TERNARY OP --------- */
1135//zz   case Iex_Triop: {
1136//zz      IRTriop *triop = e->Iex.Triop.details;
1137//zz      /* C3210 flags following FPU partial remainder (fprem), both
1138//zz         IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1139//zz      if (triop->op == Iop_PRemC3210F64
1140//zz          || triop->op == Iop_PRem1C3210F64) {
1141//zz         HReg junk = newVRegF(env);
1142//zz         HReg dst  = newVRegI(env);
1143//zz         HReg srcL = iselDblExpr(env, triop->arg2);
1144//zz         HReg srcR = iselDblExpr(env, triop->arg3);
1145//zz         /* XXXROUNDINGFIXME */
1146//zz         /* set roundingmode here */
1147//zz         addInstr(env, X86Instr_FpBinary(
1148//zz                           e->Iex.Binop.op==Iop_PRemC3210F64
1149//zz                              ? Xfp_PREM : Xfp_PREM1,
1150//zz                           srcL,srcR,junk
1151//zz                 ));
1152//zz         /* The previous pseudo-insn will have left the FPU's C3210
1153//zz            flags set correctly.  So bag them. */
1154//zz         addInstr(env, X86Instr_FpStSW_AX());
1155//zz         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1156//zz         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1157//zz         return dst;
1158//zz      }
1159//zz
1160//zz      break;
1161//zz   }
1162
1163   /* --------- BINARY OP --------- */
1164   case Iex_Binop: {
1165
1166      ARMAluOp   aop = 0; /* invalid */
1167      ARMShiftOp sop = 0; /* invalid */
1168
1169      /* ADD/SUB/AND/OR/XOR */
1170      switch (e->Iex.Binop.op) {
1171         case Iop_And32: {
1172            Bool     didInv = False;
1173            HReg     dst    = newVRegI(env);
1174            HReg     argL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1175            ARMRI84* argR   = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1176                                               env, e->Iex.Binop.arg2);
1177            addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1178                                       dst, argL, argR));
1179            return dst;
1180         }
1181         case Iop_Or32:  aop = ARMalu_OR;  goto std_binop;
1182         case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1183         case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1184         case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1185         std_binop: {
1186            HReg     dst  = newVRegI(env);
1187            HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1188            ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1189                                             env, e->Iex.Binop.arg2);
1190            addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1191            return dst;
1192         }
1193         default: break;
1194      }
1195
1196      /* SDIV/UDIV */
1197      if (e->Iex.Binop.op == Iop_DivU32 || e->Iex.Binop.op == Iop_DivS32) {
1198         HReg     dst  = newVRegI(env);
1199         HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1200         HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1201
1202         addInstr(env,
1203                  ARMInstr_Div(e->Iex.Binop.op == Iop_DivU32 ?
1204                                  ARMdiv_U : ARMdiv_S,
1205                               dst, argL, argR));
1206         return dst;
1207      }
1208
1209      /* SHL/SHR/SAR */
1210      switch (e->Iex.Binop.op) {
1211         case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1212         case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1213         case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1214         sh_binop: {
1215            HReg    dst  = newVRegI(env);
1216            HReg    argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1217            ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1218            addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1219            vassert(ty == Ity_I32); /* else the IR is ill-typed */
1220            return dst;
1221         }
1222         default: break;
1223      }
1224
1225      /* MUL */
1226      if (e->Iex.Binop.op == Iop_Mul32) {
1227         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1228         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1229         HReg dst  = newVRegI(env);
1230         addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1231         addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1232         addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1233         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1234         return dst;
1235      }
1236
1237      /* Handle misc other ops. */
1238
1239      if (e->Iex.Binop.op == Iop_Max32U) {
1240         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1241         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1242         HReg dst  = newVRegI(env);
1243         addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1244                                         ARMRI84_R(argR)));
1245         addInstr(env, mk_iMOVds_RR(dst, argL));
1246         addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1247         return dst;
1248      }
1249
1250      if (e->Iex.Binop.op == Iop_CmpF64) {
1251         HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1252         HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1253         HReg dst = newVRegI(env);
1254         /* Do the compare (FCMPD) and set NZCV in FPSCR.  Then also do
1255            FMSTAT, so we can examine the results directly. */
1256         addInstr(env, ARMInstr_VCmpD(dL, dR));
1257         /* Create in dst, the IRCmpF64Result encoded result. */
1258         addInstr(env, ARMInstr_Imm32(dst, 0));
1259         addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1260         addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1261         addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1262         addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1263         return dst;
1264      }
1265
1266      if (e->Iex.Binop.op == Iop_F64toI32S
1267          || e->Iex.Binop.op == Iop_F64toI32U) {
1268         /* Wretched uglyness all round, due to having to deal
1269            with rounding modes.  Oh well. */
1270         /* FIXME: if arg1 is a constant indicating round-to-zero,
1271            then we could skip all this arsing around with FPSCR and
1272            simply emit FTO{S,U}IZD. */
1273         Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1274         HReg valD  = iselDblExpr(env, e->Iex.Binop.arg2);
1275         set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1276         /* FTO{S,U}ID valF, valD */
1277         HReg valF = newVRegF(env);
1278         addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1279                                       valF, valD));
1280         set_VFP_rounding_default(env);
1281         /* VMOV dst, valF */
1282         HReg dst = newVRegI(env);
1283         addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1284         return dst;
1285      }
1286
1287      if (e->Iex.Binop.op == Iop_GetElem8x8
1288          || e->Iex.Binop.op == Iop_GetElem16x4
1289          || e->Iex.Binop.op == Iop_GetElem32x2) {
1290         HReg res = newVRegI(env);
1291         HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1292         UInt index, size;
1293         if (e->Iex.Binop.arg2->tag != Iex_Const ||
1294             typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1295            vpanic("ARM target supports GetElem with constant "
1296                   "second argument only\n");
1297         }
1298         index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1299         switch (e->Iex.Binop.op) {
1300            case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1301            case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1302            case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1303            default: vassert(0);
1304         }
1305         addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1306                                        mkARMNRS(ARMNRS_Reg, res, 0),
1307                                        mkARMNRS(ARMNRS_Scalar, arg, index),
1308                                        size, False));
1309         return res;
1310      }
1311
1312      if (e->Iex.Binop.op == Iop_GetElem8x16
1313          || e->Iex.Binop.op == Iop_GetElem16x8
1314          || e->Iex.Binop.op == Iop_GetElem32x4) {
1315         HReg res = newVRegI(env);
1316         HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1317         UInt index, size;
1318         if (e->Iex.Binop.arg2->tag != Iex_Const ||
1319             typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1320            vpanic("ARM target supports GetElem with constant "
1321                   "second argument only\n");
1322         }
1323         index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1324         switch (e->Iex.Binop.op) {
1325            case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1326            case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1327            case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1328            default: vassert(0);
1329         }
1330         addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1331                                        mkARMNRS(ARMNRS_Reg, res, 0),
1332                                        mkARMNRS(ARMNRS_Scalar, arg, index),
1333                                        size, True));
1334         return res;
1335      }
1336
1337      /* All cases involving host-side helper calls. */
1338      void* fn = NULL;
1339      switch (e->Iex.Binop.op) {
1340         case Iop_Add16x2:
1341            fn = &h_generic_calc_Add16x2; break;
1342         case Iop_Sub16x2:
1343            fn = &h_generic_calc_Sub16x2; break;
1344         case Iop_HAdd16Ux2:
1345            fn = &h_generic_calc_HAdd16Ux2; break;
1346         case Iop_HAdd16Sx2:
1347            fn = &h_generic_calc_HAdd16Sx2; break;
1348         case Iop_HSub16Ux2:
1349            fn = &h_generic_calc_HSub16Ux2; break;
1350         case Iop_HSub16Sx2:
1351            fn = &h_generic_calc_HSub16Sx2; break;
1352         case Iop_QAdd16Sx2:
1353            fn = &h_generic_calc_QAdd16Sx2; break;
1354         case Iop_QSub16Sx2:
1355            fn = &h_generic_calc_QSub16Sx2; break;
1356         case Iop_Add8x4:
1357            fn = &h_generic_calc_Add8x4; break;
1358         case Iop_Sub8x4:
1359            fn = &h_generic_calc_Sub8x4; break;
1360         case Iop_HAdd8Ux4:
1361            fn = &h_generic_calc_HAdd8Ux4; break;
1362         case Iop_HAdd8Sx4:
1363            fn = &h_generic_calc_HAdd8Sx4; break;
1364         case Iop_HSub8Ux4:
1365            fn = &h_generic_calc_HSub8Ux4; break;
1366         case Iop_HSub8Sx4:
1367            fn = &h_generic_calc_HSub8Sx4; break;
1368         case Iop_QAdd8Sx4:
1369            fn = &h_generic_calc_QAdd8Sx4; break;
1370         case Iop_QAdd8Ux4:
1371            fn = &h_generic_calc_QAdd8Ux4; break;
1372         case Iop_QSub8Sx4:
1373            fn = &h_generic_calc_QSub8Sx4; break;
1374         case Iop_QSub8Ux4:
1375            fn = &h_generic_calc_QSub8Ux4; break;
1376         case Iop_Sad8Ux4:
1377            fn = &h_generic_calc_Sad8Ux4; break;
1378         case Iop_QAdd32S:
1379            fn = &h_generic_calc_QAdd32S; break;
1380         case Iop_QSub32S:
1381            fn = &h_generic_calc_QSub32S; break;
1382         case Iop_QSub16Ux2:
1383            fn = &h_generic_calc_QSub16Ux2; break;
1384         default:
1385            break;
1386      }
1387
1388      if (fn) {
1389         HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1390         HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1391         HReg res  = newVRegI(env);
1392         addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1393         addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1394         addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
1395         addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1396         return res;
1397      }
1398
1399      break;
1400   }
1401
1402   /* --------- UNARY OP --------- */
1403   case Iex_Unop: {
1404
1405//zz      /* 1Uto8(32to1(expr32)) */
1406//zz      if (e->Iex.Unop.op == Iop_1Uto8) {
1407//zz         DECLARE_PATTERN(p_32to1_then_1Uto8);
1408//zz         DEFINE_PATTERN(p_32to1_then_1Uto8,
1409//zz                        unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1410//zz         if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1411//zz            IRExpr* expr32 = mi.bindee[0];
1412//zz            HReg dst = newVRegI(env);
1413//zz            HReg src = iselIntExpr_R(env, expr32);
1414//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1415//zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1416//zz                                          X86RMI_Imm(1), dst));
1417//zz            return dst;
1418//zz         }
1419//zz      }
1420//zz
1421//zz      /* 8Uto32(LDle(expr32)) */
1422//zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1423//zz         DECLARE_PATTERN(p_LDle8_then_8Uto32);
1424//zz         DEFINE_PATTERN(p_LDle8_then_8Uto32,
1425//zz                        unop(Iop_8Uto32,
1426//zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1427//zz         if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1428//zz            HReg dst = newVRegI(env);
1429//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1430//zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1431//zz            return dst;
1432//zz         }
1433//zz      }
1434//zz
1435//zz      /* 8Sto32(LDle(expr32)) */
1436//zz      if (e->Iex.Unop.op == Iop_8Sto32) {
1437//zz         DECLARE_PATTERN(p_LDle8_then_8Sto32);
1438//zz         DEFINE_PATTERN(p_LDle8_then_8Sto32,
1439//zz                        unop(Iop_8Sto32,
1440//zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1441//zz         if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1442//zz            HReg dst = newVRegI(env);
1443//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1444//zz            addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1445//zz            return dst;
1446//zz         }
1447//zz      }
1448//zz
1449//zz      /* 16Uto32(LDle(expr32)) */
1450//zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1451//zz         DECLARE_PATTERN(p_LDle16_then_16Uto32);
1452//zz         DEFINE_PATTERN(p_LDle16_then_16Uto32,
1453//zz                        unop(Iop_16Uto32,
1454//zz                             IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1455//zz         if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1456//zz            HReg dst = newVRegI(env);
1457//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1458//zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1459//zz            return dst;
1460//zz         }
1461//zz      }
1462//zz
1463//zz      /* 8Uto32(GET:I8) */
1464//zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1465//zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1466//zz            HReg      dst;
1467//zz            X86AMode* amode;
1468//zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1469//zz            dst = newVRegI(env);
1470//zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1471//zz                                hregX86_EBP());
1472//zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1473//zz            return dst;
1474//zz         }
1475//zz      }
1476//zz
1477//zz      /* 16to32(GET:I16) */
1478//zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1479//zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1480//zz            HReg      dst;
1481//zz            X86AMode* amode;
1482//zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1483//zz            dst = newVRegI(env);
1484//zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1485//zz                                hregX86_EBP());
1486//zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1487//zz            return dst;
1488//zz         }
1489//zz      }
1490
1491      switch (e->Iex.Unop.op) {
1492         case Iop_8Uto32: {
1493            HReg dst = newVRegI(env);
1494            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1495            addInstr(env, ARMInstr_Alu(ARMalu_AND,
1496                                       dst, src, ARMRI84_I84(0xFF,0)));
1497            return dst;
1498         }
1499//zz         case Iop_8Uto16:
1500//zz         case Iop_8Uto32:
1501//zz         case Iop_16Uto32: {
1502//zz            HReg dst = newVRegI(env);
1503//zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1504//zz            UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1505//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1506//zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1507//zz                                          X86RMI_Imm(mask), dst));
1508//zz            return dst;
1509//zz         }
1510//zz         case Iop_8Sto16:
1511//zz         case Iop_8Sto32:
1512         case Iop_16Uto32: {
1513            HReg dst = newVRegI(env);
1514            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1515            ARMRI5* amt = ARMRI5_I5(16);
1516            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1517            addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1518            return dst;
1519         }
1520         case Iop_8Sto32:
1521         case Iop_16Sto32: {
1522            HReg dst = newVRegI(env);
1523            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1524            ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1525            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1526            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1527            return dst;
1528         }
1529//zz         case Iop_Not8:
1530//zz         case Iop_Not16:
1531         case Iop_Not32: {
1532            HReg dst = newVRegI(env);
1533            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1534            addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1535            return dst;
1536         }
1537         case Iop_64HIto32: {
1538            HReg rHi, rLo;
1539            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1540            return rHi; /* and abandon rLo .. poor wee thing :-) */
1541         }
1542         case Iop_64to32: {
1543            HReg rHi, rLo;
1544            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1545            return rLo; /* similar stupid comment to the above ... */
1546         }
1547         case Iop_64to8: {
1548            HReg rHi, rLo;
1549            if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1550               HReg tHi = newVRegI(env);
1551               HReg tLo = newVRegI(env);
1552               HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1553               addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1554               rHi = tHi;
1555               rLo = tLo;
1556            } else {
1557               iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1558            }
1559            return rLo;
1560         }
1561//zz         case Iop_16HIto8:
1562//zz         case Iop_32HIto16: {
1563//zz            HReg dst  = newVRegI(env);
1564//zz            HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
1565//zz            Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1566//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1567//zz            addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1568//zz            return dst;
1569//zz         }
1570         case Iop_1Uto32:
1571         case Iop_1Uto8: {
1572            HReg        dst  = newVRegI(env);
1573            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1574            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1575            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1576            return dst;
1577         }
1578
1579         case Iop_1Sto32: {
1580            HReg        dst  = newVRegI(env);
1581            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1582            ARMRI5*     amt  = ARMRI5_I5(31);
1583            /* This is really rough.  We could do much better here;
1584               perhaps mvn{cond} dst, #0 as the second insn?
1585               (same applies to 1Sto64) */
1586            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1587            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1588            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1589            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1590            return dst;
1591         }
1592
1593
1594//zz         case Iop_1Sto8:
1595//zz         case Iop_1Sto16:
1596//zz         case Iop_1Sto32: {
1597//zz            /* could do better than this, but for now ... */
1598//zz            HReg dst         = newVRegI(env);
1599//zz            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1600//zz            addInstr(env, X86Instr_Set32(cond,dst));
1601//zz            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1602//zz            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1603//zz            return dst;
1604//zz         }
1605//zz         case Iop_Ctz32: {
1606//zz            /* Count trailing zeroes, implemented by x86 'bsfl' */
1607//zz            HReg dst = newVRegI(env);
1608//zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1609//zz            addInstr(env, X86Instr_Bsfr32(True,src,dst));
1610//zz            return dst;
1611//zz         }
1612         case Iop_Clz32: {
1613            /* Count leading zeroes; easy on ARM. */
1614            HReg dst = newVRegI(env);
1615            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1616            addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1617            return dst;
1618         }
1619
1620         case Iop_CmpwNEZ32: {
1621            HReg dst = newVRegI(env);
1622            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1623            addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1624            addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1625            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1626            return dst;
1627         }
1628
1629         case Iop_Left32: {
1630            HReg dst = newVRegI(env);
1631            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1632            addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1633            addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1634            return dst;
1635         }
1636
1637//zz         case Iop_V128to32: {
1638//zz            HReg      dst  = newVRegI(env);
1639//zz            HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1640//zz            X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1641//zz            sub_from_esp(env, 16);
1642//zz            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1643//zz            addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1644//zz            add_to_esp(env, 16);
1645//zz            return dst;
1646//zz         }
1647//zz
1648         case Iop_ReinterpF32asI32: {
1649            HReg dst = newVRegI(env);
1650            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1651            addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1652            return dst;
1653         }
1654
1655//zz
1656//zz         case Iop_16to8:
1657         case Iop_32to8:
1658         case Iop_32to16:
1659            /* These are no-ops. */
1660            return iselIntExpr_R(env, e->Iex.Unop.arg);
1661
1662         default:
1663            break;
1664      }
1665
1666      /* All Unop cases involving host-side helper calls. */
1667      void* fn = NULL;
1668      switch (e->Iex.Unop.op) {
1669         case Iop_CmpNEZ16x2:
1670            fn = &h_generic_calc_CmpNEZ16x2; break;
1671         case Iop_CmpNEZ8x4:
1672            fn = &h_generic_calc_CmpNEZ8x4; break;
1673         default:
1674            break;
1675      }
1676
1677      if (fn) {
1678         HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1679         HReg res = newVRegI(env);
1680         addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1681         addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
1682         addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1683         return res;
1684      }
1685
1686      break;
1687   }
1688
1689   /* --------- GET --------- */
1690   case Iex_Get: {
1691      if (ty == Ity_I32
1692          && 0 == (e->Iex.Get.offset & 3)
1693          && e->Iex.Get.offset < 4096-4) {
1694         HReg dst = newVRegI(env);
1695         addInstr(env, ARMInstr_LdSt32(
1696                          True/*isLoad*/,
1697                          dst,
1698                          ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1699         return dst;
1700      }
1701//zz      if (ty == Ity_I8 || ty == Ity_I16) {
1702//zz         HReg dst = newVRegI(env);
1703//zz         addInstr(env, X86Instr_LoadEX(
1704//zz                          toUChar(ty==Ity_I8 ? 1 : 2),
1705//zz                          False,
1706//zz                          X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1707//zz                          dst));
1708//zz         return dst;
1709//zz      }
1710      break;
1711   }
1712
1713//zz   case Iex_GetI: {
1714//zz      X86AMode* am
1715//zz         = genGuestArrayOffset(
1716//zz              env, e->Iex.GetI.descr,
1717//zz                   e->Iex.GetI.ix, e->Iex.GetI.bias );
1718//zz      HReg dst = newVRegI(env);
1719//zz      if (ty == Ity_I8) {
1720//zz         addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1721//zz         return dst;
1722//zz      }
1723//zz      if (ty == Ity_I32) {
1724//zz         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1725//zz         return dst;
1726//zz      }
1727//zz      break;
1728//zz   }
1729
1730   /* --------- CCALL --------- */
1731   case Iex_CCall: {
1732      HReg    dst = newVRegI(env);
1733      vassert(ty == e->Iex.CCall.retty);
1734
1735      /* be very restrictive for now.  Only 32/64-bit ints allowed
1736         for args, and 32 bits for return type. */
1737      if (e->Iex.CCall.retty != Ity_I32)
1738         goto irreducible;
1739
1740      /* Marshal args, do the call, clear stack. */
1741      Bool ok = doHelperCall( env, False,
1742                              NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1743      if (ok) {
1744         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1745         return dst;
1746      }
1747      /* else fall through; will hit the irreducible: label */
1748   }
1749
1750   /* --------- LITERAL --------- */
1751   /* 32 literals */
1752   case Iex_Const: {
1753      UInt u   = 0;
1754      HReg dst = newVRegI(env);
1755      switch (e->Iex.Const.con->tag) {
1756         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1757         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1758         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1759         default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
1760      }
1761      addInstr(env, ARMInstr_Imm32(dst, u));
1762      return dst;
1763   }
1764
1765   /* --------- MULTIPLEX --------- */
1766   case Iex_Mux0X: {
1767      IRExpr* cond = e->Iex.Mux0X.cond;
1768
1769      /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
1770      if (ty == Ity_I32
1771          && cond->tag == Iex_Unop
1772          && cond->Iex.Unop.op == Iop_32to8
1773          && cond->Iex.Unop.arg->tag == Iex_Unop
1774          && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
1775         ARMCondCode cc;
1776         HReg     rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1777         ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1778         HReg     dst = newVRegI(env);
1779         addInstr(env, mk_iMOVds_RR(dst, rX));
1780         cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
1781         addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1782         return dst;
1783      }
1784
1785      /* Mux0X(cond, expr0, exprX) (general case) */
1786      if (ty == Ity_I32) {
1787         HReg     r8;
1788         HReg     rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1789         ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1790         HReg     dst = newVRegI(env);
1791         addInstr(env, mk_iMOVds_RR(dst, rX));
1792         r8 = iselIntExpr_R(env, cond);
1793         addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
1794                                         ARMRI84_I84(0xFF,0)));
1795         addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
1796         return dst;
1797      }
1798      break;
1799   }
1800
1801   default:
1802   break;
1803   } /* switch (e->tag) */
1804
1805   /* We get here if no pattern matched. */
1806  irreducible:
1807   ppIRExpr(e);
1808   vpanic("iselIntExpr_R: cannot reduce tree");
1809}
1810
1811
1812/* -------------------- 64-bit -------------------- */
1813
1814/* Compute a 64-bit value into a register pair, which is returned as
1815   the first two parameters.  As with iselIntExpr_R, these may be
1816   either real or virtual regs; in any case they must not be changed
1817   by subsequent code emitted by the caller.  */
1818
1819static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1820{
1821   iselInt64Expr_wrk(rHi, rLo, env, e);
1822#  if 0
1823   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1824#  endif
1825   vassert(hregClass(*rHi) == HRcInt32);
1826   vassert(hregIsVirtual(*rHi));
1827   vassert(hregClass(*rLo) == HRcInt32);
1828   vassert(hregIsVirtual(*rLo));
1829}
1830
1831/* DO NOT CALL THIS DIRECTLY ! */
1832static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1833{
1834   vassert(e);
1835   vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1836
1837   /* 64-bit literal */
1838   if (e->tag == Iex_Const) {
1839      ULong   w64 = e->Iex.Const.con->Ico.U64;
1840      UInt    wHi = toUInt(w64 >> 32);
1841      UInt    wLo = toUInt(w64);
1842      HReg    tHi = newVRegI(env);
1843      HReg    tLo = newVRegI(env);
1844      vassert(e->Iex.Const.con->tag == Ico_U64);
1845      addInstr(env, ARMInstr_Imm32(tHi, wHi));
1846      addInstr(env, ARMInstr_Imm32(tLo, wLo));
1847      *rHi = tHi;
1848      *rLo = tLo;
1849      return;
1850   }
1851
1852   /* read 64-bit IRTemp */
1853   if (e->tag == Iex_RdTmp) {
1854      if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
1855         HReg tHi = newVRegI(env);
1856         HReg tLo = newVRegI(env);
1857         HReg tmp = iselNeon64Expr(env, e);
1858         addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1859         *rHi = tHi;
1860         *rLo = tLo;
1861      } else {
1862         lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1863      }
1864      return;
1865   }
1866
1867   /* 64-bit load */
1868   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1869      HReg      tLo, tHi, rA;
1870      vassert(e->Iex.Load.ty == Ity_I64);
1871      rA  = iselIntExpr_R(env, e->Iex.Load.addr);
1872      tHi = newVRegI(env);
1873      tLo = newVRegI(env);
1874      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
1875      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
1876      *rHi = tHi;
1877      *rLo = tLo;
1878      return;
1879   }
1880
1881   /* 64-bit GET */
1882   if (e->tag == Iex_Get) {
1883      ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1884      ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1885      HReg tHi = newVRegI(env);
1886      HReg tLo = newVRegI(env);
1887      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
1888      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
1889      *rHi = tHi;
1890      *rLo = tLo;
1891      return;
1892   }
1893
1894   /* --------- BINARY ops --------- */
1895   if (e->tag == Iex_Binop) {
1896      switch (e->Iex.Binop.op) {
1897
1898         /* 32 x 32 -> 64 multiply */
1899         case Iop_MullS32:
1900         case Iop_MullU32: {
1901            HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1902            HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1903            HReg     tHi  = newVRegI(env);
1904            HReg     tLo  = newVRegI(env);
1905            ARMMulDivOp mop  = e->Iex.Binop.op == Iop_MullS32
1906                               ? ARMmul_SX : ARMmul_ZX;
1907            addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1908            addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1909            addInstr(env, ARMInstr_Mul(mop));
1910            addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
1911            addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
1912            *rHi = tHi;
1913            *rLo = tLo;
1914            return;
1915         }
1916
1917         case Iop_Or64: {
1918            HReg xLo, xHi, yLo, yHi;
1919            HReg tHi = newVRegI(env);
1920            HReg tLo = newVRegI(env);
1921            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1922            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1923            addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
1924            addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
1925            *rHi = tHi;
1926            *rLo = tLo;
1927            return;
1928         }
1929
1930         case Iop_Add64: {
1931            HReg xLo, xHi, yLo, yHi;
1932            HReg tHi = newVRegI(env);
1933            HReg tLo = newVRegI(env);
1934            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1935            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1936            addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
1937            addInstr(env, ARMInstr_Alu(ARMalu_ADC,  tHi, xHi, ARMRI84_R(yHi)));
1938            *rHi = tHi;
1939            *rLo = tLo;
1940            return;
1941         }
1942
1943         /* 32HLto64(e1,e2) */
1944         case Iop_32HLto64: {
1945            *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
1946            *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
1947            return;
1948         }
1949
1950         default:
1951            break;
1952      }
1953   }
1954
1955   /* --------- UNARY ops --------- */
1956   if (e->tag == Iex_Unop) {
1957      switch (e->Iex.Unop.op) {
1958
1959         /* ReinterpF64asI64 */
1960         case Iop_ReinterpF64asI64: {
1961            HReg dstHi = newVRegI(env);
1962            HReg dstLo = newVRegI(env);
1963            HReg src   = iselDblExpr(env, e->Iex.Unop.arg);
1964            addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
1965            *rHi = dstHi;
1966            *rLo = dstLo;
1967            return;
1968         }
1969
1970         /* Left64(e) */
1971         case Iop_Left64: {
1972            HReg yLo, yHi;
1973            HReg tHi  = newVRegI(env);
1974            HReg tLo  = newVRegI(env);
1975            HReg zero = newVRegI(env);
1976            /* yHi:yLo = arg */
1977            iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
1978            /* zero = 0 */
1979            addInstr(env, ARMInstr_Imm32(zero, 0));
1980            /* tLo = 0 - yLo, and set carry */
1981            addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
1982                                       tLo, zero, ARMRI84_R(yLo)));
1983            /* tHi = 0 - yHi - carry */
1984            addInstr(env, ARMInstr_Alu(ARMalu_SBC,
1985                                       tHi, zero, ARMRI84_R(yHi)));
1986            /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
1987               back in, so as to give the final result
1988               tHi:tLo = arg | -arg. */
1989            addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
1990            addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
1991            *rHi = tHi;
1992            *rLo = tLo;
1993            return;
1994         }
1995
1996         /* CmpwNEZ64(e) */
1997         case Iop_CmpwNEZ64: {
1998            HReg srcLo, srcHi;
1999            HReg tmp1 = newVRegI(env);
2000            HReg tmp2 = newVRegI(env);
2001            /* srcHi:srcLo = arg */
2002            iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2003            /* tmp1 = srcHi | srcLo */
2004            addInstr(env, ARMInstr_Alu(ARMalu_OR,
2005                                       tmp1, srcHi, ARMRI84_R(srcLo)));
2006            /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2007            addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2008            addInstr(env, ARMInstr_Alu(ARMalu_OR,
2009                                       tmp2, tmp2, ARMRI84_R(tmp1)));
2010            addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2011                                         tmp2, tmp2, ARMRI5_I5(31)));
2012            *rHi = tmp2;
2013            *rLo = tmp2;
2014            return;
2015         }
2016
2017         case Iop_1Sto64: {
2018            HReg        dst  = newVRegI(env);
2019            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2020            ARMRI5*     amt  = ARMRI5_I5(31);
2021            /* This is really rough.  We could do much better here;
2022               perhaps mvn{cond} dst, #0 as the second insn?
2023               (same applies to 1Sto32) */
2024            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2025            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2026            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2027            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2028            *rHi = dst;
2029            *rLo = dst;
2030            return;
2031         }
2032
2033         default:
2034            break;
2035      }
2036   } /* if (e->tag == Iex_Unop) */
2037
2038   /* --------- MULTIPLEX --------- */
2039   if (e->tag == Iex_Mux0X) {
2040      IRType ty8;
2041      HReg   r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
2042      ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
2043      vassert(ty8 == Ity_I8);
2044      iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
2045      iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
2046      dstHi = newVRegI(env);
2047      dstLo = newVRegI(env);
2048      addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
2049      addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
2050      r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2051      addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
2052                                      ARMRI84_I84(0xFF,0)));
2053      addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
2054      addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
2055      *rHi = dstHi;
2056      *rLo = dstLo;
2057      return;
2058   }
2059
2060   /* It is convenient sometimes to call iselInt64Expr even when we
2061      have NEON support (e.g. in do_helper_call we need 64-bit
2062      arguments as 2 x 32 regs). */
2063   if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2064      HReg tHi = newVRegI(env);
2065      HReg tLo = newVRegI(env);
2066      HReg tmp = iselNeon64Expr(env, e);
2067      addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2068      *rHi = tHi;
2069      *rLo = tLo;
2070      return ;
2071   }
2072
2073   ppIRExpr(e);
2074   vpanic("iselInt64Expr");
2075}
2076
2077
2078/*---------------------------------------------------------*/
2079/*--- ISEL: Vector (NEON) expressions (64 or 128 bit)   ---*/
2080/*---------------------------------------------------------*/
2081
2082static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2083{
2084   HReg r = iselNeon64Expr_wrk( env, e );
2085   vassert(hregClass(r) == HRcFlt64);
2086   vassert(hregIsVirtual(r));
2087   return r;
2088}
2089
2090/* DO NOT CALL THIS DIRECTLY */
2091static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2092{
2093   IRType ty = typeOfIRExpr(env->type_env, e);
2094   MatchInfo mi;
2095   vassert(e);
2096   vassert(ty == Ity_I64);
2097
2098   if (e->tag == Iex_RdTmp) {
2099      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2100   }
2101
2102   if (e->tag == Iex_Const) {
2103      HReg rLo, rHi;
2104      HReg res = newVRegD(env);
2105      iselInt64Expr(&rHi, &rLo, env, e);
2106      addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2107      return res;
2108   }
2109
2110   /* 64-bit load */
2111   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2112      HReg res = newVRegD(env);
2113      ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2114      vassert(ty == Ity_I64);
2115      addInstr(env, ARMInstr_NLdStD(True, res, am));
2116      return res;
2117   }
2118
2119   /* 64-bit GET */
2120   if (e->tag == Iex_Get) {
2121      HReg addr = newVRegI(env);
2122      HReg res = newVRegD(env);
2123      vassert(ty == Ity_I64);
2124      addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2125      addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2126      return res;
2127   }
2128
2129   /* --------- BINARY ops --------- */
2130   if (e->tag == Iex_Binop) {
2131      switch (e->Iex.Binop.op) {
2132
2133         /* 32 x 32 -> 64 multiply */
2134         case Iop_MullS32:
2135         case Iop_MullU32: {
2136            HReg rLo, rHi;
2137            HReg res = newVRegD(env);
2138            iselInt64Expr(&rHi, &rLo, env, e);
2139            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2140            return res;
2141         }
2142
2143         case Iop_And64: {
2144            HReg res = newVRegD(env);
2145            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2146            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2147            addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2148                                           res, argL, argR, 4, False));
2149            return res;
2150         }
2151         case Iop_Or64: {
2152            HReg res = newVRegD(env);
2153            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2154            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2155            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2156                                           res, argL, argR, 4, False));
2157            return res;
2158         }
2159         case Iop_Xor64: {
2160            HReg res = newVRegD(env);
2161            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2162            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2163            addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2164                                           res, argL, argR, 4, False));
2165            return res;
2166         }
2167
2168         /* 32HLto64(e1,e2) */
2169         case Iop_32HLto64: {
2170            HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2171            HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2172            HReg res = newVRegD(env);
2173            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2174            return res;
2175         }
2176
2177         case Iop_Add8x8:
2178         case Iop_Add16x4:
2179         case Iop_Add32x2:
2180         case Iop_Add64: {
2181            HReg res = newVRegD(env);
2182            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2183            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2184            UInt size;
2185            switch (e->Iex.Binop.op) {
2186               case Iop_Add8x8: size = 0; break;
2187               case Iop_Add16x4: size = 1; break;
2188               case Iop_Add32x2: size = 2; break;
2189               case Iop_Add64: size = 3; break;
2190               default: vassert(0);
2191            }
2192            addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2193                                           res, argL, argR, size, False));
2194            return res;
2195         }
2196         case Iop_Add32Fx2: {
2197            HReg res = newVRegD(env);
2198            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2199            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2200            UInt size = 0;
2201            addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2202                                           res, argL, argR, size, False));
2203            return res;
2204         }
2205         case Iop_Recps32Fx2: {
2206            HReg res = newVRegD(env);
2207            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2208            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2209            UInt size = 0;
2210            addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2211                                           res, argL, argR, size, False));
2212            return res;
2213         }
2214         case Iop_Rsqrts32Fx2: {
2215            HReg res = newVRegD(env);
2216            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2217            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2218            UInt size = 0;
2219            addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2220                                           res, argL, argR, size, False));
2221            return res;
2222         }
2223         case Iop_InterleaveOddLanes8x8:
2224         case Iop_InterleaveOddLanes16x4:
2225         case Iop_InterleaveLO32x2:
2226         case Iop_InterleaveEvenLanes8x8:
2227         case Iop_InterleaveEvenLanes16x4:
2228         case Iop_InterleaveHI32x2: {
2229            HReg tmp = newVRegD(env);
2230            HReg res = newVRegD(env);
2231            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2232            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2233            UInt size;
2234            UInt is_lo;
2235            switch (e->Iex.Binop.op) {
2236               case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
2237               case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
2238               case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
2239               case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
2240               case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
2241               case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
2242               default: vassert(0);
2243            }
2244            if (is_lo) {
2245               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2246                                             tmp, argL, 4, False));
2247               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2248                                             res, argR, 4, False));
2249               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2250                                            res, tmp, size, False));
2251            } else {
2252               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2253                                             tmp, argR, 4, False));
2254               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2255                                             res, argL, 4, False));
2256               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2257                                            tmp, res, size, False));
2258            }
2259            return res;
2260         }
2261         case Iop_InterleaveHI8x8:
2262         case Iop_InterleaveHI16x4:
2263         case Iop_InterleaveLO8x8:
2264         case Iop_InterleaveLO16x4: {
2265            HReg tmp = newVRegD(env);
2266            HReg res = newVRegD(env);
2267            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2268            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2269            UInt size;
2270            UInt is_lo;
2271            switch (e->Iex.Binop.op) {
2272               case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
2273               case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
2274               case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
2275               case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
2276               default: vassert(0);
2277            }
2278            if (is_lo) {
2279               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2280                                             tmp, argL, 4, False));
2281               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2282                                             res, argR, 4, False));
2283               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2284                                            res, tmp, size, False));
2285            } else {
2286               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2287                                             tmp, argR, 4, False));
2288               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2289                                             res, argL, 4, False));
2290               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2291                                            tmp, res, size, False));
2292            }
2293            return res;
2294         }
2295         case Iop_CatOddLanes8x8:
2296         case Iop_CatOddLanes16x4:
2297         case Iop_CatEvenLanes8x8:
2298         case Iop_CatEvenLanes16x4: {
2299            HReg tmp = newVRegD(env);
2300            HReg res = newVRegD(env);
2301            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2302            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2303            UInt size;
2304            UInt is_lo;
2305            switch (e->Iex.Binop.op) {
2306               case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
2307               case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
2308               case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
2309               case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
2310               default: vassert(0);
2311            }
2312            if (is_lo) {
2313               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2314                                             tmp, argL, 4, False));
2315               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2316                                             res, argR, 4, False));
2317               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2318                                            res, tmp, size, False));
2319            } else {
2320               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2321                                             tmp, argR, 4, False));
2322               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2323                                             res, argL, 4, False));
2324               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2325                                            tmp, res, size, False));
2326            }
2327            return res;
2328         }
2329         case Iop_QAdd8Ux8:
2330         case Iop_QAdd16Ux4:
2331         case Iop_QAdd32Ux2:
2332         case Iop_QAdd64Ux1: {
2333            HReg res = newVRegD(env);
2334            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2335            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2336            UInt size;
2337            switch (e->Iex.Binop.op) {
2338               case Iop_QAdd8Ux8: size = 0; break;
2339               case Iop_QAdd16Ux4: size = 1; break;
2340               case Iop_QAdd32Ux2: size = 2; break;
2341               case Iop_QAdd64Ux1: size = 3; break;
2342               default: vassert(0);
2343            }
2344            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2345                                           res, argL, argR, size, False));
2346            return res;
2347         }
2348         case Iop_QAdd8Sx8:
2349         case Iop_QAdd16Sx4:
2350         case Iop_QAdd32Sx2:
2351         case Iop_QAdd64Sx1: {
2352            HReg res = newVRegD(env);
2353            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2354            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2355            UInt size;
2356            switch (e->Iex.Binop.op) {
2357               case Iop_QAdd8Sx8: size = 0; break;
2358               case Iop_QAdd16Sx4: size = 1; break;
2359               case Iop_QAdd32Sx2: size = 2; break;
2360               case Iop_QAdd64Sx1: size = 3; break;
2361               default: vassert(0);
2362            }
2363            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2364                                           res, argL, argR, size, False));
2365            return res;
2366         }
2367         case Iop_Sub8x8:
2368         case Iop_Sub16x4:
2369         case Iop_Sub32x2:
2370         case Iop_Sub64: {
2371            HReg res = newVRegD(env);
2372            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2373            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2374            UInt size;
2375            switch (e->Iex.Binop.op) {
2376               case Iop_Sub8x8: size = 0; break;
2377               case Iop_Sub16x4: size = 1; break;
2378               case Iop_Sub32x2: size = 2; break;
2379               case Iop_Sub64: size = 3; break;
2380               default: vassert(0);
2381            }
2382            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2383                                           res, argL, argR, size, False));
2384            return res;
2385         }
2386         case Iop_Sub32Fx2: {
2387            HReg res = newVRegD(env);
2388            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2389            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2390            UInt size = 0;
2391            addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2392                                           res, argL, argR, size, False));
2393            return res;
2394         }
2395         case Iop_QSub8Ux8:
2396         case Iop_QSub16Ux4:
2397         case Iop_QSub32Ux2:
2398         case Iop_QSub64Ux1: {
2399            HReg res = newVRegD(env);
2400            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2401            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2402            UInt size;
2403            switch (e->Iex.Binop.op) {
2404               case Iop_QSub8Ux8: size = 0; break;
2405               case Iop_QSub16Ux4: size = 1; break;
2406               case Iop_QSub32Ux2: size = 2; break;
2407               case Iop_QSub64Ux1: size = 3; break;
2408               default: vassert(0);
2409            }
2410            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2411                                           res, argL, argR, size, False));
2412            return res;
2413         }
2414         case Iop_QSub8Sx8:
2415         case Iop_QSub16Sx4:
2416         case Iop_QSub32Sx2:
2417         case Iop_QSub64Sx1: {
2418            HReg res = newVRegD(env);
2419            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2420            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2421            UInt size;
2422            switch (e->Iex.Binop.op) {
2423               case Iop_QSub8Sx8: size = 0; break;
2424               case Iop_QSub16Sx4: size = 1; break;
2425               case Iop_QSub32Sx2: size = 2; break;
2426               case Iop_QSub64Sx1: size = 3; break;
2427               default: vassert(0);
2428            }
2429            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2430                                           res, argL, argR, size, False));
2431            return res;
2432         }
2433         case Iop_Max8Ux8:
2434         case Iop_Max16Ux4:
2435         case Iop_Max32Ux2: {
2436            HReg res = newVRegD(env);
2437            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2438            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2439            UInt size;
2440            switch (e->Iex.Binop.op) {
2441               case Iop_Max8Ux8: size = 0; break;
2442               case Iop_Max16Ux4: size = 1; break;
2443               case Iop_Max32Ux2: size = 2; break;
2444               default: vassert(0);
2445            }
2446            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2447                                           res, argL, argR, size, False));
2448            return res;
2449         }
2450         case Iop_Max8Sx8:
2451         case Iop_Max16Sx4:
2452         case Iop_Max32Sx2: {
2453            HReg res = newVRegD(env);
2454            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2455            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2456            UInt size;
2457            switch (e->Iex.Binop.op) {
2458               case Iop_Max8Sx8: size = 0; break;
2459               case Iop_Max16Sx4: size = 1; break;
2460               case Iop_Max32Sx2: size = 2; break;
2461               default: vassert(0);
2462            }
2463            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2464                                           res, argL, argR, size, False));
2465            return res;
2466         }
2467         case Iop_Min8Ux8:
2468         case Iop_Min16Ux4:
2469         case Iop_Min32Ux2: {
2470            HReg res = newVRegD(env);
2471            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2472            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2473            UInt size;
2474            switch (e->Iex.Binop.op) {
2475               case Iop_Min8Ux8: size = 0; break;
2476               case Iop_Min16Ux4: size = 1; break;
2477               case Iop_Min32Ux2: size = 2; break;
2478               default: vassert(0);
2479            }
2480            addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2481                                           res, argL, argR, size, False));
2482            return res;
2483         }
2484         case Iop_Min8Sx8:
2485         case Iop_Min16Sx4:
2486         case Iop_Min32Sx2: {
2487            HReg res = newVRegD(env);
2488            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2489            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2490            UInt size;
2491            switch (e->Iex.Binop.op) {
2492               case Iop_Min8Sx8: size = 0; break;
2493               case Iop_Min16Sx4: size = 1; break;
2494               case Iop_Min32Sx2: size = 2; break;
2495               default: vassert(0);
2496            }
2497            addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2498                                           res, argL, argR, size, False));
2499            return res;
2500         }
2501         case Iop_Sar8x8:
2502         case Iop_Sar16x4:
2503         case Iop_Sar32x2: {
2504            HReg res = newVRegD(env);
2505            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2506            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2507            HReg argR2 = newVRegD(env);
2508            HReg zero = newVRegD(env);
2509            UInt size;
2510            switch (e->Iex.Binop.op) {
2511               case Iop_Sar8x8: size = 0; break;
2512               case Iop_Sar16x4: size = 1; break;
2513               case Iop_Sar32x2: size = 2; break;
2514               case Iop_Sar64: size = 3; break;
2515               default: vassert(0);
2516            }
2517            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2518            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2519                                           argR2, zero, argR, size, False));
2520            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2521                                          res, argL, argR2, size, False));
2522            return res;
2523         }
2524         case Iop_Sal8x8:
2525         case Iop_Sal16x4:
2526         case Iop_Sal32x2:
2527         case Iop_Sal64x1: {
2528            HReg res = newVRegD(env);
2529            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2530            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2531            UInt size;
2532            switch (e->Iex.Binop.op) {
2533               case Iop_Sal8x8: size = 0; break;
2534               case Iop_Sal16x4: size = 1; break;
2535               case Iop_Sal32x2: size = 2; break;
2536               case Iop_Sal64x1: size = 3; break;
2537               default: vassert(0);
2538            }
2539            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2540                                          res, argL, argR, size, False));
2541            return res;
2542         }
2543         case Iop_Shr8x8:
2544         case Iop_Shr16x4:
2545         case Iop_Shr32x2: {
2546            HReg res = newVRegD(env);
2547            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2548            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2549            HReg argR2 = newVRegD(env);
2550            HReg zero = newVRegD(env);
2551            UInt size;
2552            switch (e->Iex.Binop.op) {
2553               case Iop_Shr8x8: size = 0; break;
2554               case Iop_Shr16x4: size = 1; break;
2555               case Iop_Shr32x2: size = 2; break;
2556               default: vassert(0);
2557            }
2558            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2559            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2560                                           argR2, zero, argR, size, False));
2561            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2562                                          res, argL, argR2, size, False));
2563            return res;
2564         }
2565         case Iop_Shl8x8:
2566         case Iop_Shl16x4:
2567         case Iop_Shl32x2: {
2568            HReg res = newVRegD(env);
2569            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2570            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2571            UInt size;
2572            switch (e->Iex.Binop.op) {
2573               case Iop_Shl8x8: size = 0; break;
2574               case Iop_Shl16x4: size = 1; break;
2575               case Iop_Shl32x2: size = 2; break;
2576               default: vassert(0);
2577            }
2578            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2579                                          res, argL, argR, size, False));
2580            return res;
2581         }
2582         case Iop_QShl8x8:
2583         case Iop_QShl16x4:
2584         case Iop_QShl32x2:
2585         case Iop_QShl64x1: {
2586            HReg res = newVRegD(env);
2587            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2588            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2589            UInt size;
2590            switch (e->Iex.Binop.op) {
2591               case Iop_QShl8x8: size = 0; break;
2592               case Iop_QShl16x4: size = 1; break;
2593               case Iop_QShl32x2: size = 2; break;
2594               case Iop_QShl64x1: size = 3; break;
2595               default: vassert(0);
2596            }
2597            addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2598                                          res, argL, argR, size, False));
2599            return res;
2600         }
2601         case Iop_QSal8x8:
2602         case Iop_QSal16x4:
2603         case Iop_QSal32x2:
2604         case Iop_QSal64x1: {
2605            HReg res = newVRegD(env);
2606            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2607            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2608            UInt size;
2609            switch (e->Iex.Binop.op) {
2610               case Iop_QSal8x8: size = 0; break;
2611               case Iop_QSal16x4: size = 1; break;
2612               case Iop_QSal32x2: size = 2; break;
2613               case Iop_QSal64x1: size = 3; break;
2614               default: vassert(0);
2615            }
2616            addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2617                                          res, argL, argR, size, False));
2618            return res;
2619         }
2620         case Iop_QShlN8x8:
2621         case Iop_QShlN16x4:
2622         case Iop_QShlN32x2:
2623         case Iop_QShlN64x1: {
2624            HReg res = newVRegD(env);
2625            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2626            UInt size, imm;
2627            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2628                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2629               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2630                      "second argument only\n");
2631            }
2632            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2633            switch (e->Iex.Binop.op) {
2634               case Iop_QShlN8x8: size = 8 | imm; break;
2635               case Iop_QShlN16x4: size = 16 | imm; break;
2636               case Iop_QShlN32x2: size = 32 | imm; break;
2637               case Iop_QShlN64x1: size = 64 | imm; break;
2638               default: vassert(0);
2639            }
2640            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2641                                          res, argL, size, False));
2642            return res;
2643         }
2644         case Iop_QShlN8Sx8:
2645         case Iop_QShlN16Sx4:
2646         case Iop_QShlN32Sx2:
2647         case Iop_QShlN64Sx1: {
2648            HReg res = newVRegD(env);
2649            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2650            UInt size, imm;
2651            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2652                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2653               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2654                      "second argument only\n");
2655            }
2656            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2657            switch (e->Iex.Binop.op) {
2658               case Iop_QShlN8Sx8: size = 8 | imm; break;
2659               case Iop_QShlN16Sx4: size = 16 | imm; break;
2660               case Iop_QShlN32Sx2: size = 32 | imm; break;
2661               case Iop_QShlN64Sx1: size = 64 | imm; break;
2662               default: vassert(0);
2663            }
2664            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2665                                          res, argL, size, False));
2666            return res;
2667         }
2668         case Iop_QSalN8x8:
2669         case Iop_QSalN16x4:
2670         case Iop_QSalN32x2:
2671         case Iop_QSalN64x1: {
2672            HReg res = newVRegD(env);
2673            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2674            UInt size, imm;
2675            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2676                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2677               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2678                      "second argument only\n");
2679            }
2680            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2681            switch (e->Iex.Binop.op) {
2682               case Iop_QSalN8x8: size = 8 | imm; break;
2683               case Iop_QSalN16x4: size = 16 | imm; break;
2684               case Iop_QSalN32x2: size = 32 | imm; break;
2685               case Iop_QSalN64x1: size = 64 | imm; break;
2686               default: vassert(0);
2687            }
2688            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2689                                          res, argL, size, False));
2690            return res;
2691         }
2692         case Iop_ShrN8x8:
2693         case Iop_ShrN16x4:
2694         case Iop_ShrN32x2:
2695         case Iop_Shr64: {
2696            HReg res = newVRegD(env);
2697            HReg tmp = newVRegD(env);
2698            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2699            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2700            HReg argR2 = newVRegI(env);
2701            UInt size;
2702            switch (e->Iex.Binop.op) {
2703               case Iop_ShrN8x8: size = 0; break;
2704               case Iop_ShrN16x4: size = 1; break;
2705               case Iop_ShrN32x2: size = 2; break;
2706               case Iop_Shr64: size = 3; break;
2707               default: vassert(0);
2708            }
2709            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2710            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2711            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2712                                          res, argL, tmp, size, False));
2713            return res;
2714         }
2715         case Iop_ShlN8x8:
2716         case Iop_ShlN16x4:
2717         case Iop_ShlN32x2:
2718         case Iop_Shl64: {
2719            HReg res = newVRegD(env);
2720            HReg tmp = newVRegD(env);
2721            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2722            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2723            UInt size;
2724            switch (e->Iex.Binop.op) {
2725               case Iop_ShlN8x8: size = 0; break;
2726               case Iop_ShlN16x4: size = 1; break;
2727               case Iop_ShlN32x2: size = 2; break;
2728               case Iop_Shl64: size = 3; break;
2729               default: vassert(0);
2730            }
2731            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
2732            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2733                                          res, argL, tmp, size, False));
2734            return res;
2735         }
2736         case Iop_SarN8x8:
2737         case Iop_SarN16x4:
2738         case Iop_SarN32x2:
2739         case Iop_Sar64: {
2740            HReg res = newVRegD(env);
2741            HReg tmp = newVRegD(env);
2742            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2743            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2744            HReg argR2 = newVRegI(env);
2745            UInt size;
2746            switch (e->Iex.Binop.op) {
2747               case Iop_SarN8x8: size = 0; break;
2748               case Iop_SarN16x4: size = 1; break;
2749               case Iop_SarN32x2: size = 2; break;
2750               case Iop_Sar64: size = 3; break;
2751               default: vassert(0);
2752            }
2753            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2754            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2755            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2756                                          res, argL, tmp, size, False));
2757            return res;
2758         }
2759         case Iop_CmpGT8Ux8:
2760         case Iop_CmpGT16Ux4:
2761         case Iop_CmpGT32Ux2: {
2762            HReg res = newVRegD(env);
2763            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2764            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2765            UInt size;
2766            switch (e->Iex.Binop.op) {
2767               case Iop_CmpGT8Ux8: size = 0; break;
2768               case Iop_CmpGT16Ux4: size = 1; break;
2769               case Iop_CmpGT32Ux2: size = 2; break;
2770               default: vassert(0);
2771            }
2772            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2773                                           res, argL, argR, size, False));
2774            return res;
2775         }
2776         case Iop_CmpGT8Sx8:
2777         case Iop_CmpGT16Sx4:
2778         case Iop_CmpGT32Sx2: {
2779            HReg res = newVRegD(env);
2780            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2781            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2782            UInt size;
2783            switch (e->Iex.Binop.op) {
2784               case Iop_CmpGT8Sx8: size = 0; break;
2785               case Iop_CmpGT16Sx4: size = 1; break;
2786               case Iop_CmpGT32Sx2: size = 2; break;
2787               default: vassert(0);
2788            }
2789            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2790                                           res, argL, argR, size, False));
2791            return res;
2792         }
2793         case Iop_CmpEQ8x8:
2794         case Iop_CmpEQ16x4:
2795         case Iop_CmpEQ32x2: {
2796            HReg res = newVRegD(env);
2797            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2798            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2799            UInt size;
2800            switch (e->Iex.Binop.op) {
2801               case Iop_CmpEQ8x8: size = 0; break;
2802               case Iop_CmpEQ16x4: size = 1; break;
2803               case Iop_CmpEQ32x2: size = 2; break;
2804               default: vassert(0);
2805            }
2806            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2807                                           res, argL, argR, size, False));
2808            return res;
2809         }
2810         case Iop_Mul8x8:
2811         case Iop_Mul16x4:
2812         case Iop_Mul32x2: {
2813            HReg res = newVRegD(env);
2814            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2815            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2816            UInt size = 0;
2817            switch(e->Iex.Binop.op) {
2818               case Iop_Mul8x8: size = 0; break;
2819               case Iop_Mul16x4: size = 1; break;
2820               case Iop_Mul32x2: size = 2; break;
2821               default: vassert(0);
2822            }
2823            addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2824                                           res, argL, argR, size, False));
2825            return res;
2826         }
2827         case Iop_Mul32Fx2: {
2828            HReg res = newVRegD(env);
2829            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2830            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2831            UInt size = 0;
2832            addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2833                                           res, argL, argR, size, False));
2834            return res;
2835         }
2836         case Iop_QDMulHi16Sx4:
2837         case Iop_QDMulHi32Sx2: {
2838            HReg res = newVRegD(env);
2839            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2840            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2841            UInt size = 0;
2842            switch(e->Iex.Binop.op) {
2843               case Iop_QDMulHi16Sx4: size = 1; break;
2844               case Iop_QDMulHi32Sx2: size = 2; break;
2845               default: vassert(0);
2846            }
2847            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2848                                           res, argL, argR, size, False));
2849            return res;
2850         }
2851
2852         case Iop_QRDMulHi16Sx4:
2853         case Iop_QRDMulHi32Sx2: {
2854            HReg res = newVRegD(env);
2855            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2856            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2857            UInt size = 0;
2858            switch(e->Iex.Binop.op) {
2859               case Iop_QRDMulHi16Sx4: size = 1; break;
2860               case Iop_QRDMulHi32Sx2: size = 2; break;
2861               default: vassert(0);
2862            }
2863            addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2864                                           res, argL, argR, size, False));
2865            return res;
2866         }
2867
2868         case Iop_PwAdd8x8:
2869         case Iop_PwAdd16x4:
2870         case Iop_PwAdd32x2: {
2871            HReg res = newVRegD(env);
2872            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2873            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2874            UInt size = 0;
2875            switch(e->Iex.Binop.op) {
2876               case Iop_PwAdd8x8: size = 0; break;
2877               case Iop_PwAdd16x4: size = 1; break;
2878               case Iop_PwAdd32x2: size = 2; break;
2879               default: vassert(0);
2880            }
2881            addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2882                                           res, argL, argR, size, False));
2883            return res;
2884         }
2885         case Iop_PwAdd32Fx2: {
2886            HReg res = newVRegD(env);
2887            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2888            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2889            UInt size = 0;
2890            addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2891                                           res, argL, argR, size, False));
2892            return res;
2893         }
2894         case Iop_PwMin8Ux8:
2895         case Iop_PwMin16Ux4:
2896         case Iop_PwMin32Ux2: {
2897            HReg res = newVRegD(env);
2898            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2899            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2900            UInt size = 0;
2901            switch(e->Iex.Binop.op) {
2902               case Iop_PwMin8Ux8: size = 0; break;
2903               case Iop_PwMin16Ux4: size = 1; break;
2904               case Iop_PwMin32Ux2: size = 2; break;
2905               default: vassert(0);
2906            }
2907            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2908                                           res, argL, argR, size, False));
2909            return res;
2910         }
2911         case Iop_PwMin8Sx8:
2912         case Iop_PwMin16Sx4:
2913         case Iop_PwMin32Sx2: {
2914            HReg res = newVRegD(env);
2915            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2916            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2917            UInt size = 0;
2918            switch(e->Iex.Binop.op) {
2919               case Iop_PwMin8Sx8: size = 0; break;
2920               case Iop_PwMin16Sx4: size = 1; break;
2921               case Iop_PwMin32Sx2: size = 2; break;
2922               default: vassert(0);
2923            }
2924            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
2925                                           res, argL, argR, size, False));
2926            return res;
2927         }
2928         case Iop_PwMax8Ux8:
2929         case Iop_PwMax16Ux4:
2930         case Iop_PwMax32Ux2: {
2931            HReg res = newVRegD(env);
2932            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2933            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2934            UInt size = 0;
2935            switch(e->Iex.Binop.op) {
2936               case Iop_PwMax8Ux8: size = 0; break;
2937               case Iop_PwMax16Ux4: size = 1; break;
2938               case Iop_PwMax32Ux2: size = 2; break;
2939               default: vassert(0);
2940            }
2941            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
2942                                           res, argL, argR, size, False));
2943            return res;
2944         }
2945         case Iop_PwMax8Sx8:
2946         case Iop_PwMax16Sx4:
2947         case Iop_PwMax32Sx2: {
2948            HReg res = newVRegD(env);
2949            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2950            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2951            UInt size = 0;
2952            switch(e->Iex.Binop.op) {
2953               case Iop_PwMax8Sx8: size = 0; break;
2954               case Iop_PwMax16Sx4: size = 1; break;
2955               case Iop_PwMax32Sx2: size = 2; break;
2956               default: vassert(0);
2957            }
2958            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
2959                                           res, argL, argR, size, False));
2960            return res;
2961         }
2962         case Iop_Perm8x8: {
2963            HReg res = newVRegD(env);
2964            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2965            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2966            addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
2967                                           res, argL, argR, 0, False));
2968            return res;
2969         }
2970         case Iop_PolynomialMul8x8: {
2971            HReg res = newVRegD(env);
2972            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2973            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2974            UInt size = 0;
2975            addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
2976                                           res, argL, argR, size, False));
2977            return res;
2978         }
2979         case Iop_Max32Fx2: {
2980            HReg res = newVRegD(env);
2981            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2982            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2983            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
2984                                           res, argL, argR, 2, False));
2985            return res;
2986         }
2987         case Iop_Min32Fx2: {
2988            HReg res = newVRegD(env);
2989            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2990            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2991            addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
2992                                           res, argL, argR, 2, False));
2993            return res;
2994         }
2995         case Iop_PwMax32Fx2: {
2996            HReg res = newVRegD(env);
2997            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2998            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2999            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3000                                           res, argL, argR, 2, False));
3001            return res;
3002         }
3003         case Iop_PwMin32Fx2: {
3004            HReg res = newVRegD(env);
3005            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3006            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3007            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3008                                           res, argL, argR, 2, False));
3009            return res;
3010         }
3011         case Iop_CmpGT32Fx2: {
3012            HReg res = newVRegD(env);
3013            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3014            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3015            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3016                                           res, argL, argR, 2, False));
3017            return res;
3018         }
3019         case Iop_CmpGE32Fx2: {
3020            HReg res = newVRegD(env);
3021            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3022            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3023            addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3024                                           res, argL, argR, 2, False));
3025            return res;
3026         }
3027         case Iop_CmpEQ32Fx2: {
3028            HReg res = newVRegD(env);
3029            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3030            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3031            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3032                                           res, argL, argR, 2, False));
3033            return res;
3034         }
3035         case Iop_F32ToFixed32Ux2_RZ:
3036         case Iop_F32ToFixed32Sx2_RZ:
3037         case Iop_Fixed32UToF32x2_RN:
3038         case Iop_Fixed32SToF32x2_RN: {
3039            HReg res = newVRegD(env);
3040            HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3041            ARMNeonUnOp op;
3042            UInt imm6;
3043            if (e->Iex.Binop.arg2->tag != Iex_Const ||
3044               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3045                  vpanic("ARM supports FP <-> Fixed conversion with constant "
3046                         "second argument less than 33 only\n");
3047            }
3048            imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3049            vassert(imm6 <= 32 && imm6 > 0);
3050            imm6 = 64 - imm6;
3051            switch(e->Iex.Binop.op) {
3052               case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3053               case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3054               case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3055               case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3056               default: vassert(0);
3057            }
3058            addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3059            return res;
3060         }
3061         /*
3062         FIXME: is this here or not?
3063         case Iop_VDup8x8:
3064         case Iop_VDup16x4:
3065         case Iop_VDup32x2: {
3066            HReg res = newVRegD(env);
3067            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3068            UInt index;
3069            UInt imm4;
3070            UInt size = 0;
3071            if (e->Iex.Binop.arg2->tag != Iex_Const ||
3072               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3073                  vpanic("ARM supports Iop_VDup with constant "
3074                         "second argument less than 16 only\n");
3075            }
3076            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3077            switch(e->Iex.Binop.op) {
3078               case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3079               case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3080               case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3081               default: vassert(0);
3082            }
3083            if (imm4 >= 16) {
3084               vpanic("ARM supports Iop_VDup with constant "
3085                      "second argument less than 16 only\n");
3086            }
3087            addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3088                                          res, argL, imm4, False));
3089            return res;
3090         }
3091         */
3092         default:
3093            break;
3094      }
3095   }
3096
3097   /* --------- UNARY ops --------- */
3098   if (e->tag == Iex_Unop) {
3099      switch (e->Iex.Unop.op) {
3100
3101         /* ReinterpF64asI64 */
3102         case Iop_ReinterpF64asI64:
3103         /* Left64(e) */
3104         case Iop_Left64:
3105         /* CmpwNEZ64(e) */
3106         //case Iop_CmpwNEZ64:
3107         case Iop_1Sto64: {
3108            HReg rLo, rHi;
3109            HReg res = newVRegD(env);
3110            iselInt64Expr(&rHi, &rLo, env, e);
3111            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3112            return res;
3113         }
3114         case Iop_Not64: {
3115            DECLARE_PATTERN(p_veqz_8x8);
3116            DECLARE_PATTERN(p_veqz_16x4);
3117            DECLARE_PATTERN(p_veqz_32x2);
3118            DECLARE_PATTERN(p_vcge_8sx8);
3119            DECLARE_PATTERN(p_vcge_16sx4);
3120            DECLARE_PATTERN(p_vcge_32sx2);
3121            DECLARE_PATTERN(p_vcge_8ux8);
3122            DECLARE_PATTERN(p_vcge_16ux4);
3123            DECLARE_PATTERN(p_vcge_32ux2);
3124            DEFINE_PATTERN(p_veqz_8x8,
3125                  unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3126            DEFINE_PATTERN(p_veqz_16x4,
3127                  unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3128            DEFINE_PATTERN(p_veqz_32x2,
3129                  unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3130            DEFINE_PATTERN(p_vcge_8sx8,
3131                  unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3132            DEFINE_PATTERN(p_vcge_16sx4,
3133                  unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3134            DEFINE_PATTERN(p_vcge_32sx2,
3135                  unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3136            DEFINE_PATTERN(p_vcge_8ux8,
3137                  unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3138            DEFINE_PATTERN(p_vcge_16ux4,
3139                  unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3140            DEFINE_PATTERN(p_vcge_32ux2,
3141                  unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3142            if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3143               HReg res = newVRegD(env);
3144               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3145               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3146               return res;
3147            } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3148               HReg res = newVRegD(env);
3149               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3150               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3151               return res;
3152            } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3153               HReg res = newVRegD(env);
3154               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3155               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3156               return res;
3157            } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3158               HReg res = newVRegD(env);
3159               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3160               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3161               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3162                                              res, argL, argR, 0, False));
3163               return res;
3164            } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3165               HReg res = newVRegD(env);
3166               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3167               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3168               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3169                                              res, argL, argR, 1, False));
3170               return res;
3171            } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3172               HReg res = newVRegD(env);
3173               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3174               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3175               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3176                                              res, argL, argR, 2, False));
3177               return res;
3178            } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3179               HReg res = newVRegD(env);
3180               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3181               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3182               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3183                                              res, argL, argR, 0, False));
3184               return res;
3185            } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3186               HReg res = newVRegD(env);
3187               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3188               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3189               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3190                                              res, argL, argR, 1, False));
3191               return res;
3192            } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3193               HReg res = newVRegD(env);
3194               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3195               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3196               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3197                                              res, argL, argR, 2, False));
3198               return res;
3199            } else {
3200               HReg res = newVRegD(env);
3201               HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3202               addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3203               return res;
3204            }
3205         }
3206         case Iop_Dup8x8:
3207         case Iop_Dup16x4:
3208         case Iop_Dup32x2: {
3209            HReg res, arg;
3210            UInt size;
3211            DECLARE_PATTERN(p_vdup_8x8);
3212            DECLARE_PATTERN(p_vdup_16x4);
3213            DECLARE_PATTERN(p_vdup_32x2);
3214            DEFINE_PATTERN(p_vdup_8x8,
3215                  unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3216            DEFINE_PATTERN(p_vdup_16x4,
3217                  unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3218            DEFINE_PATTERN(p_vdup_32x2,
3219                  unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3220            if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3221               UInt index;
3222               UInt imm4;
3223               if (mi.bindee[1]->tag == Iex_Const &&
3224                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3225                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3226                  imm4 = (index << 1) + 1;
3227                  if (index < 8) {
3228                     res = newVRegD(env);
3229                     arg = iselNeon64Expr(env, mi.bindee[0]);
3230                     addInstr(env, ARMInstr_NUnaryS(
3231                                      ARMneon_VDUP,
3232                                      mkARMNRS(ARMNRS_Reg, res, 0),
3233                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3234                                      imm4, False
3235                             ));
3236                     return res;
3237                  }
3238               }
3239            } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3240               UInt index;
3241               UInt imm4;
3242               if (mi.bindee[1]->tag == Iex_Const &&
3243                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3244                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3245                  imm4 = (index << 2) + 2;
3246                  if (index < 4) {
3247                     res = newVRegD(env);
3248                     arg = iselNeon64Expr(env, mi.bindee[0]);
3249                     addInstr(env, ARMInstr_NUnaryS(
3250                                      ARMneon_VDUP,
3251                                      mkARMNRS(ARMNRS_Reg, res, 0),
3252                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3253                                      imm4, False
3254                             ));
3255                     return res;
3256                  }
3257               }
3258            } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3259               UInt index;
3260               UInt imm4;
3261               if (mi.bindee[1]->tag == Iex_Const &&
3262                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3263                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3264                  imm4 = (index << 3) + 4;
3265                  if (index < 2) {
3266                     res = newVRegD(env);
3267                     arg = iselNeon64Expr(env, mi.bindee[0]);
3268                     addInstr(env, ARMInstr_NUnaryS(
3269                                      ARMneon_VDUP,
3270                                      mkARMNRS(ARMNRS_Reg, res, 0),
3271                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3272                                      imm4, False
3273                             ));
3274                     return res;
3275                  }
3276               }
3277            }
3278            arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3279            res = newVRegD(env);
3280            switch (e->Iex.Unop.op) {
3281               case Iop_Dup8x8: size = 0; break;
3282               case Iop_Dup16x4: size = 1; break;
3283               case Iop_Dup32x2: size = 2; break;
3284               default: vassert(0);
3285            }
3286            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3287            return res;
3288         }
3289         case Iop_Abs8x8:
3290         case Iop_Abs16x4:
3291         case Iop_Abs32x2: {
3292            HReg res = newVRegD(env);
3293            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3294            UInt size = 0;
3295            switch(e->Iex.Binop.op) {
3296               case Iop_Abs8x8: size = 0; break;
3297               case Iop_Abs16x4: size = 1; break;
3298               case Iop_Abs32x2: size = 2; break;
3299               default: vassert(0);
3300            }
3301            addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3302            return res;
3303         }
3304         case Iop_Reverse64_8x8:
3305         case Iop_Reverse64_16x4:
3306         case Iop_Reverse64_32x2: {
3307            HReg res = newVRegD(env);
3308            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3309            UInt size = 0;
3310            switch(e->Iex.Binop.op) {
3311               case Iop_Reverse64_8x8: size = 0; break;
3312               case Iop_Reverse64_16x4: size = 1; break;
3313               case Iop_Reverse64_32x2: size = 2; break;
3314               default: vassert(0);
3315            }
3316            addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3317                                          res, arg, size, False));
3318            return res;
3319         }
3320         case Iop_Reverse32_8x8:
3321         case Iop_Reverse32_16x4: {
3322            HReg res = newVRegD(env);
3323            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3324            UInt size = 0;
3325            switch(e->Iex.Binop.op) {
3326               case Iop_Reverse32_8x8: size = 0; break;
3327               case Iop_Reverse32_16x4: size = 1; break;
3328               default: vassert(0);
3329            }
3330            addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3331                                          res, arg, size, False));
3332            return res;
3333         }
3334         case Iop_Reverse16_8x8: {
3335            HReg res = newVRegD(env);
3336            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3337            UInt size = 0;
3338            addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3339                                          res, arg, size, False));
3340            return res;
3341         }
3342         case Iop_CmpwNEZ64: {
3343            HReg x_lsh = newVRegD(env);
3344            HReg x_rsh = newVRegD(env);
3345            HReg lsh_amt = newVRegD(env);
3346            HReg rsh_amt = newVRegD(env);
3347            HReg zero = newVRegD(env);
3348            HReg tmp = newVRegD(env);
3349            HReg tmp2 = newVRegD(env);
3350            HReg res = newVRegD(env);
3351            HReg x = newVRegD(env);
3352            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3353            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3354            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3355            addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3356            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3357            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3358                                           rsh_amt, zero, lsh_amt, 2, False));
3359            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3360                                          x_lsh, x, lsh_amt, 3, False));
3361            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3362                                          x_rsh, x, rsh_amt, 3, False));
3363            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3364                                           tmp, x_lsh, x_rsh, 0, False));
3365            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3366                                           res, tmp, x, 0, False));
3367            return res;
3368         }
3369         case Iop_CmpNEZ8x8:
3370         case Iop_CmpNEZ16x4:
3371         case Iop_CmpNEZ32x2: {
3372            HReg res = newVRegD(env);
3373            HReg tmp = newVRegD(env);
3374            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3375            UInt size;
3376            switch (e->Iex.Unop.op) {
3377               case Iop_CmpNEZ8x8: size = 0; break;
3378               case Iop_CmpNEZ16x4: size = 1; break;
3379               case Iop_CmpNEZ32x2: size = 2; break;
3380               default: vassert(0);
3381            }
3382            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3383            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3384            return res;
3385         }
3386         case Iop_NarrowUn16to8x8:
3387         case Iop_NarrowUn32to16x4:
3388         case Iop_NarrowUn64to32x2: {
3389            HReg res = newVRegD(env);
3390            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3391            UInt size = 0;
3392            switch(e->Iex.Binop.op) {
3393               case Iop_NarrowUn16to8x8:  size = 0; break;
3394               case Iop_NarrowUn32to16x4: size = 1; break;
3395               case Iop_NarrowUn64to32x2: size = 2; break;
3396               default: vassert(0);
3397            }
3398            addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3399                                          res, arg, size, False));
3400            return res;
3401         }
3402         case Iop_QNarrowUn16Sto8Sx8:
3403         case Iop_QNarrowUn32Sto16Sx4:
3404         case Iop_QNarrowUn64Sto32Sx2: {
3405            HReg res = newVRegD(env);
3406            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3407            UInt size = 0;
3408            switch(e->Iex.Binop.op) {
3409               case Iop_QNarrowUn16Sto8Sx8:  size = 0; break;
3410               case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3411               case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
3412               default: vassert(0);
3413            }
3414            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3415                                          res, arg, size, False));
3416            return res;
3417         }
3418         case Iop_QNarrowUn16Sto8Ux8:
3419         case Iop_QNarrowUn32Sto16Ux4:
3420         case Iop_QNarrowUn64Sto32Ux2: {
3421            HReg res = newVRegD(env);
3422            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3423            UInt size = 0;
3424            switch(e->Iex.Binop.op) {
3425               case Iop_QNarrowUn16Sto8Ux8:  size = 0; break;
3426               case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3427               case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
3428               default: vassert(0);
3429            }
3430            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3431                                          res, arg, size, False));
3432            return res;
3433         }
3434         case Iop_QNarrowUn16Uto8Ux8:
3435         case Iop_QNarrowUn32Uto16Ux4:
3436         case Iop_QNarrowUn64Uto32Ux2: {
3437            HReg res = newVRegD(env);
3438            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3439            UInt size = 0;
3440            switch(e->Iex.Binop.op) {
3441               case Iop_QNarrowUn16Uto8Ux8:  size = 0; break;
3442               case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3443               case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
3444               default: vassert(0);
3445            }
3446            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3447                                          res, arg, size, False));
3448            return res;
3449         }
3450         case Iop_PwAddL8Sx8:
3451         case Iop_PwAddL16Sx4:
3452         case Iop_PwAddL32Sx2: {
3453            HReg res = newVRegD(env);
3454            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3455            UInt size = 0;
3456            switch(e->Iex.Binop.op) {
3457               case Iop_PwAddL8Sx8: size = 0; break;
3458               case Iop_PwAddL16Sx4: size = 1; break;
3459               case Iop_PwAddL32Sx2: size = 2; break;
3460               default: vassert(0);
3461            }
3462            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3463                                          res, arg, size, False));
3464            return res;
3465         }
3466         case Iop_PwAddL8Ux8:
3467         case Iop_PwAddL16Ux4:
3468         case Iop_PwAddL32Ux2: {
3469            HReg res = newVRegD(env);
3470            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3471            UInt size = 0;
3472            switch(e->Iex.Binop.op) {
3473               case Iop_PwAddL8Ux8: size = 0; break;
3474               case Iop_PwAddL16Ux4: size = 1; break;
3475               case Iop_PwAddL32Ux2: size = 2; break;
3476               default: vassert(0);
3477            }
3478            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3479                                          res, arg, size, False));
3480            return res;
3481         }
3482         case Iop_Cnt8x8: {
3483            HReg res = newVRegD(env);
3484            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3485            UInt size = 0;
3486            addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3487                                          res, arg, size, False));
3488            return res;
3489         }
3490         case Iop_Clz8Sx8:
3491         case Iop_Clz16Sx4:
3492         case Iop_Clz32Sx2: {
3493            HReg res = newVRegD(env);
3494            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3495            UInt size = 0;
3496            switch(e->Iex.Binop.op) {
3497               case Iop_Clz8Sx8: size = 0; break;
3498               case Iop_Clz16Sx4: size = 1; break;
3499               case Iop_Clz32Sx2: size = 2; break;
3500               default: vassert(0);
3501            }
3502            addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3503                                          res, arg, size, False));
3504            return res;
3505         }
3506         case Iop_Cls8Sx8:
3507         case Iop_Cls16Sx4:
3508         case Iop_Cls32Sx2: {
3509            HReg res = newVRegD(env);
3510            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3511            UInt size = 0;
3512            switch(e->Iex.Binop.op) {
3513               case Iop_Cls8Sx8: size = 0; break;
3514               case Iop_Cls16Sx4: size = 1; break;
3515               case Iop_Cls32Sx2: size = 2; break;
3516               default: vassert(0);
3517            }
3518            addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3519                                          res, arg, size, False));
3520            return res;
3521         }
3522         case Iop_FtoI32Sx2_RZ: {
3523            HReg res = newVRegD(env);
3524            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3525            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3526                                          res, arg, 2, False));
3527            return res;
3528         }
3529         case Iop_FtoI32Ux2_RZ: {
3530            HReg res = newVRegD(env);
3531            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3532            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3533                                          res, arg, 2, False));
3534            return res;
3535         }
3536         case Iop_I32StoFx2: {
3537            HReg res = newVRegD(env);
3538            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3539            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3540                                          res, arg, 2, False));
3541            return res;
3542         }
3543         case Iop_I32UtoFx2: {
3544            HReg res = newVRegD(env);
3545            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3546            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3547                                          res, arg, 2, False));
3548            return res;
3549         }
3550         case Iop_F32toF16x4: {
3551            HReg res = newVRegD(env);
3552            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3553            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3554                                          res, arg, 2, False));
3555            return res;
3556         }
3557         case Iop_Recip32Fx2: {
3558            HReg res = newVRegD(env);
3559            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3560            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3561                                          res, argL, 0, False));
3562            return res;
3563         }
3564         case Iop_Recip32x2: {
3565            HReg res = newVRegD(env);
3566            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3567            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3568                                          res, argL, 0, False));
3569            return res;
3570         }
3571         case Iop_Abs32Fx2: {
3572            DECLARE_PATTERN(p_vabd_32fx2);
3573            DEFINE_PATTERN(p_vabd_32fx2,
3574                           unop(Iop_Abs32Fx2,
3575                                binop(Iop_Sub32Fx2,
3576                                      bind(0),
3577                                      bind(1))));
3578            if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3579               HReg res = newVRegD(env);
3580               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3581               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3582               addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3583                                              res, argL, argR, 0, False));
3584               return res;
3585            } else {
3586               HReg res = newVRegD(env);
3587               HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3588               addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3589                                             res, arg, 0, False));
3590               return res;
3591            }
3592         }
3593         case Iop_Rsqrte32Fx2: {
3594            HReg res = newVRegD(env);
3595            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3596            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3597                                          res, arg, 0, False));
3598            return res;
3599         }
3600         case Iop_Rsqrte32x2: {
3601            HReg res = newVRegD(env);
3602            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3603            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3604                                          res, arg, 0, False));
3605            return res;
3606         }
3607         case Iop_Neg32Fx2: {
3608            HReg res = newVRegD(env);
3609            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3610            addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3611                                          res, arg, 0, False));
3612            return res;
3613         }
3614         default:
3615            break;
3616      }
3617   } /* if (e->tag == Iex_Unop) */
3618
3619   if (e->tag == Iex_Triop) {
3620      IRTriop *triop = e->Iex.Triop.details;
3621
3622      switch (triop->op) {
3623         case Iop_Extract64: {
3624            HReg res = newVRegD(env);
3625            HReg argL = iselNeon64Expr(env, triop->arg1);
3626            HReg argR = iselNeon64Expr(env, triop->arg2);
3627            UInt imm4;
3628            if (triop->arg3->tag != Iex_Const ||
3629                typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
3630               vpanic("ARM target supports Iop_Extract64 with constant "
3631                      "third argument less than 16 only\n");
3632            }
3633            imm4 = triop->arg3->Iex.Const.con->Ico.U8;
3634            if (imm4 >= 8) {
3635               vpanic("ARM target supports Iop_Extract64 with constant "
3636                      "third argument less than 16 only\n");
3637            }
3638            addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3639                                           res, argL, argR, imm4, False));
3640            return res;
3641         }
3642         case Iop_SetElem8x8:
3643         case Iop_SetElem16x4:
3644         case Iop_SetElem32x2: {
3645            HReg res = newVRegD(env);
3646            HReg dreg = iselNeon64Expr(env, triop->arg1);
3647            HReg arg = iselIntExpr_R(env, triop->arg3);
3648            UInt index, size;
3649            if (triop->arg2->tag != Iex_Const ||
3650                typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
3651               vpanic("ARM target supports SetElem with constant "
3652                      "second argument only\n");
3653            }
3654            index = triop->arg2->Iex.Const.con->Ico.U8;
3655            switch (triop->op) {
3656               case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3657               case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3658               case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3659               default: vassert(0);
3660            }
3661            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3662            addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3663                                           mkARMNRS(ARMNRS_Scalar, res, index),
3664                                           mkARMNRS(ARMNRS_Reg, arg, 0),
3665                                           size, False));
3666            return res;
3667         }
3668         default:
3669            break;
3670      }
3671   }
3672
3673   /* --------- MULTIPLEX --------- */
3674   if (e->tag == Iex_Mux0X) {
3675      HReg rLo, rHi;
3676      HReg res = newVRegD(env);
3677      iselInt64Expr(&rHi, &rLo, env, e);
3678      addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3679      return res;
3680   }
3681
3682   ppIRExpr(e);
3683   vpanic("iselNeon64Expr");
3684}
3685
3686static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3687{
3688   HReg r = iselNeonExpr_wrk( env, e );
3689   vassert(hregClass(r) == HRcVec128);
3690   vassert(hregIsVirtual(r));
3691   return r;
3692}
3693
3694/* DO NOT CALL THIS DIRECTLY */
3695static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3696{
3697   IRType ty = typeOfIRExpr(env->type_env, e);
3698   MatchInfo mi;
3699   vassert(e);
3700   vassert(ty == Ity_V128);
3701
3702   if (e->tag == Iex_RdTmp) {
3703      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3704   }
3705
3706   if (e->tag == Iex_Const) {
3707      /* At the moment there should be no 128-bit constants in IR for ARM
3708         generated during disassemble. They are represented as Iop_64HLtoV128
3709         binary operation and are handled among binary ops. */
3710      /* But zero can be created by valgrind internal optimizer */
3711      if (e->Iex.Const.con->Ico.V128 == 0) {
3712         HReg res = newVRegV(env);
3713         addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
3714         return res;
3715      }
3716      ppIRExpr(e);
3717      vpanic("128-bit constant is not implemented");
3718   }
3719
3720   if (e->tag == Iex_Load) {
3721      HReg res = newVRegV(env);
3722      ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3723      vassert(ty == Ity_V128);
3724      addInstr(env, ARMInstr_NLdStQ(True, res, am));
3725      return res;
3726   }
3727
3728   if (e->tag == Iex_Get) {
3729      HReg addr = newVRegI(env);
3730      HReg res = newVRegV(env);
3731      vassert(ty == Ity_V128);
3732      addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3733      addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3734      return res;
3735   }
3736
3737   if (e->tag == Iex_Unop) {
3738      switch (e->Iex.Unop.op) {
3739         case Iop_NotV128: {
3740            DECLARE_PATTERN(p_veqz_8x16);
3741            DECLARE_PATTERN(p_veqz_16x8);
3742            DECLARE_PATTERN(p_veqz_32x4);
3743            DECLARE_PATTERN(p_vcge_8sx16);
3744            DECLARE_PATTERN(p_vcge_16sx8);
3745            DECLARE_PATTERN(p_vcge_32sx4);
3746            DECLARE_PATTERN(p_vcge_8ux16);
3747            DECLARE_PATTERN(p_vcge_16ux8);
3748            DECLARE_PATTERN(p_vcge_32ux4);
3749            DEFINE_PATTERN(p_veqz_8x16,
3750                  unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3751            DEFINE_PATTERN(p_veqz_16x8,
3752                  unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3753            DEFINE_PATTERN(p_veqz_32x4,
3754                  unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3755            DEFINE_PATTERN(p_vcge_8sx16,
3756                  unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3757            DEFINE_PATTERN(p_vcge_16sx8,
3758                  unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3759            DEFINE_PATTERN(p_vcge_32sx4,
3760                  unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3761            DEFINE_PATTERN(p_vcge_8ux16,
3762                  unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3763            DEFINE_PATTERN(p_vcge_16ux8,
3764                  unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3765            DEFINE_PATTERN(p_vcge_32ux4,
3766                  unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3767            if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3768               HReg res = newVRegV(env);
3769               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3770               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3771               return res;
3772            } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3773               HReg res = newVRegV(env);
3774               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3775               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3776               return res;
3777            } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3778               HReg res = newVRegV(env);
3779               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3780               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3781               return res;
3782            } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3783               HReg res = newVRegV(env);
3784               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3785               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3786               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3787                                              res, argL, argR, 0, True));
3788               return res;
3789            } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3790               HReg res = newVRegV(env);
3791               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3792               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3793               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3794                                              res, argL, argR, 1, True));
3795               return res;
3796            } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3797               HReg res = newVRegV(env);
3798               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3799               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3800               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3801                                              res, argL, argR, 2, True));
3802               return res;
3803            } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3804               HReg res = newVRegV(env);
3805               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3806               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3807               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3808                                              res, argL, argR, 0, True));
3809               return res;
3810            } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3811               HReg res = newVRegV(env);
3812               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3813               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3814               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3815                                              res, argL, argR, 1, True));
3816               return res;
3817            } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3818               HReg res = newVRegV(env);
3819               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3820               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3821               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3822                                              res, argL, argR, 2, True));
3823               return res;
3824            } else {
3825               HReg res = newVRegV(env);
3826               HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3827               addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3828               return res;
3829            }
3830         }
3831         case Iop_Dup8x16:
3832         case Iop_Dup16x8:
3833         case Iop_Dup32x4: {
3834            HReg res, arg;
3835            UInt size;
3836            DECLARE_PATTERN(p_vdup_8x16);
3837            DECLARE_PATTERN(p_vdup_16x8);
3838            DECLARE_PATTERN(p_vdup_32x4);
3839            DEFINE_PATTERN(p_vdup_8x16,
3840                  unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3841            DEFINE_PATTERN(p_vdup_16x8,
3842                  unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3843            DEFINE_PATTERN(p_vdup_32x4,
3844                  unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3845            if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3846               UInt index;
3847               UInt imm4;
3848               if (mi.bindee[1]->tag == Iex_Const &&
3849                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3850                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3851                  imm4 = (index << 1) + 1;
3852                  if (index < 8) {
3853                     res = newVRegV(env);
3854                     arg = iselNeon64Expr(env, mi.bindee[0]);
3855                     addInstr(env, ARMInstr_NUnaryS(
3856                                      ARMneon_VDUP,
3857                                      mkARMNRS(ARMNRS_Reg, res, 0),
3858                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3859                                      imm4, True
3860                             ));
3861                     return res;
3862                  }
3863               }
3864            } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3865               UInt index;
3866               UInt imm4;
3867               if (mi.bindee[1]->tag == Iex_Const &&
3868                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3869                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3870                  imm4 = (index << 2) + 2;
3871                  if (index < 4) {
3872                     res = newVRegV(env);
3873                     arg = iselNeon64Expr(env, mi.bindee[0]);
3874                     addInstr(env, ARMInstr_NUnaryS(
3875                                      ARMneon_VDUP,
3876                                      mkARMNRS(ARMNRS_Reg, res, 0),
3877                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3878                                      imm4, True
3879                             ));
3880                     return res;
3881                  }
3882               }
3883            } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3884               UInt index;
3885               UInt imm4;
3886               if (mi.bindee[1]->tag == Iex_Const &&
3887                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3888                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3889                  imm4 = (index << 3) + 4;
3890                  if (index < 2) {
3891                     res = newVRegV(env);
3892                     arg = iselNeon64Expr(env, mi.bindee[0]);
3893                     addInstr(env, ARMInstr_NUnaryS(
3894                                      ARMneon_VDUP,
3895                                      mkARMNRS(ARMNRS_Reg, res, 0),
3896                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3897                                      imm4, True
3898                             ));
3899                     return res;
3900                  }
3901               }
3902            }
3903            arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3904            res = newVRegV(env);
3905            switch (e->Iex.Unop.op) {
3906               case Iop_Dup8x16: size = 0; break;
3907               case Iop_Dup16x8: size = 1; break;
3908               case Iop_Dup32x4: size = 2; break;
3909               default: vassert(0);
3910            }
3911            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
3912            return res;
3913         }
3914         case Iop_Abs8x16:
3915         case Iop_Abs16x8:
3916         case Iop_Abs32x4: {
3917            HReg res = newVRegV(env);
3918            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3919            UInt size = 0;
3920            switch(e->Iex.Binop.op) {
3921               case Iop_Abs8x16: size = 0; break;
3922               case Iop_Abs16x8: size = 1; break;
3923               case Iop_Abs32x4: size = 2; break;
3924               default: vassert(0);
3925            }
3926            addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
3927            return res;
3928         }
3929         case Iop_Reverse64_8x16:
3930         case Iop_Reverse64_16x8:
3931         case Iop_Reverse64_32x4: {
3932            HReg res = newVRegV(env);
3933            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3934            UInt size = 0;
3935            switch(e->Iex.Binop.op) {
3936               case Iop_Reverse64_8x16: size = 0; break;
3937               case Iop_Reverse64_16x8: size = 1; break;
3938               case Iop_Reverse64_32x4: size = 2; break;
3939               default: vassert(0);
3940            }
3941            addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3942                                          res, arg, size, True));
3943            return res;
3944         }
3945         case Iop_Reverse32_8x16:
3946         case Iop_Reverse32_16x8: {
3947            HReg res = newVRegV(env);
3948            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3949            UInt size = 0;
3950            switch(e->Iex.Binop.op) {
3951               case Iop_Reverse32_8x16: size = 0; break;
3952               case Iop_Reverse32_16x8: size = 1; break;
3953               default: vassert(0);
3954            }
3955            addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3956                                          res, arg, size, True));
3957            return res;
3958         }
3959         case Iop_Reverse16_8x16: {
3960            HReg res = newVRegV(env);
3961            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3962            UInt size = 0;
3963            addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3964                                          res, arg, size, True));
3965            return res;
3966         }
3967         case Iop_CmpNEZ64x2: {
3968            HReg x_lsh = newVRegV(env);
3969            HReg x_rsh = newVRegV(env);
3970            HReg lsh_amt = newVRegV(env);
3971            HReg rsh_amt = newVRegV(env);
3972            HReg zero = newVRegV(env);
3973            HReg tmp = newVRegV(env);
3974            HReg tmp2 = newVRegV(env);
3975            HReg res = newVRegV(env);
3976            HReg x = newVRegV(env);
3977            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3978            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
3979            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
3980            addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3981            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3982            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3983                                           rsh_amt, zero, lsh_amt, 2, True));
3984            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3985                                          x_lsh, x, lsh_amt, 3, True));
3986            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3987                                          x_rsh, x, rsh_amt, 3, True));
3988            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3989                                           tmp, x_lsh, x_rsh, 0, True));
3990            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3991                                           res, tmp, x, 0, True));
3992            return res;
3993         }
3994         case Iop_CmpNEZ8x16:
3995         case Iop_CmpNEZ16x8:
3996         case Iop_CmpNEZ32x4: {
3997            HReg res = newVRegV(env);
3998            HReg tmp = newVRegV(env);
3999            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4000            UInt size;
4001            switch (e->Iex.Unop.op) {
4002               case Iop_CmpNEZ8x16: size = 0; break;
4003               case Iop_CmpNEZ16x8: size = 1; break;
4004               case Iop_CmpNEZ32x4: size = 2; break;
4005               default: vassert(0);
4006            }
4007            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
4008            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
4009            return res;
4010         }
4011         case Iop_Widen8Uto16x8:
4012         case Iop_Widen16Uto32x4:
4013         case Iop_Widen32Uto64x2: {
4014            HReg res = newVRegV(env);
4015            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4016            UInt size;
4017            switch (e->Iex.Unop.op) {
4018               case Iop_Widen8Uto16x8:  size = 0; break;
4019               case Iop_Widen16Uto32x4: size = 1; break;
4020               case Iop_Widen32Uto64x2: size = 2; break;
4021               default: vassert(0);
4022            }
4023            addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
4024                                          res, arg, size, True));
4025            return res;
4026         }
4027         case Iop_Widen8Sto16x8:
4028         case Iop_Widen16Sto32x4:
4029         case Iop_Widen32Sto64x2: {
4030            HReg res = newVRegV(env);
4031            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4032            UInt size;
4033            switch (e->Iex.Unop.op) {
4034               case Iop_Widen8Sto16x8:  size = 0; break;
4035               case Iop_Widen16Sto32x4: size = 1; break;
4036               case Iop_Widen32Sto64x2: size = 2; break;
4037               default: vassert(0);
4038            }
4039            addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4040                                          res, arg, size, True));
4041            return res;
4042         }
4043         case Iop_PwAddL8Sx16:
4044         case Iop_PwAddL16Sx8:
4045         case Iop_PwAddL32Sx4: {
4046            HReg res = newVRegV(env);
4047            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4048            UInt size = 0;
4049            switch(e->Iex.Binop.op) {
4050               case Iop_PwAddL8Sx16: size = 0; break;
4051               case Iop_PwAddL16Sx8: size = 1; break;
4052               case Iop_PwAddL32Sx4: size = 2; break;
4053               default: vassert(0);
4054            }
4055            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4056                                          res, arg, size, True));
4057            return res;
4058         }
4059         case Iop_PwAddL8Ux16:
4060         case Iop_PwAddL16Ux8:
4061         case Iop_PwAddL32Ux4: {
4062            HReg res = newVRegV(env);
4063            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4064            UInt size = 0;
4065            switch(e->Iex.Binop.op) {
4066               case Iop_PwAddL8Ux16: size = 0; break;
4067               case Iop_PwAddL16Ux8: size = 1; break;
4068               case Iop_PwAddL32Ux4: size = 2; break;
4069               default: vassert(0);
4070            }
4071            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4072                                          res, arg, size, True));
4073            return res;
4074         }
4075         case Iop_Cnt8x16: {
4076            HReg res = newVRegV(env);
4077            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4078            UInt size = 0;
4079            addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4080            return res;
4081         }
4082         case Iop_Clz8Sx16:
4083         case Iop_Clz16Sx8:
4084         case Iop_Clz32Sx4: {
4085            HReg res = newVRegV(env);
4086            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4087            UInt size = 0;
4088            switch(e->Iex.Binop.op) {
4089               case Iop_Clz8Sx16: size = 0; break;
4090               case Iop_Clz16Sx8: size = 1; break;
4091               case Iop_Clz32Sx4: size = 2; break;
4092               default: vassert(0);
4093            }
4094            addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4095            return res;
4096         }
4097         case Iop_Cls8Sx16:
4098         case Iop_Cls16Sx8:
4099         case Iop_Cls32Sx4: {
4100            HReg res = newVRegV(env);
4101            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4102            UInt size = 0;
4103            switch(e->Iex.Binop.op) {
4104               case Iop_Cls8Sx16: size = 0; break;
4105               case Iop_Cls16Sx8: size = 1; break;
4106               case Iop_Cls32Sx4: size = 2; break;
4107               default: vassert(0);
4108            }
4109            addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4110            return res;
4111         }
4112         case Iop_FtoI32Sx4_RZ: {
4113            HReg res = newVRegV(env);
4114            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4115            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4116                                          res, arg, 2, True));
4117            return res;
4118         }
4119         case Iop_FtoI32Ux4_RZ: {
4120            HReg res = newVRegV(env);
4121            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4122            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4123                                          res, arg, 2, True));
4124            return res;
4125         }
4126         case Iop_I32StoFx4: {
4127            HReg res = newVRegV(env);
4128            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4129            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4130                                          res, arg, 2, True));
4131            return res;
4132         }
4133         case Iop_I32UtoFx4: {
4134            HReg res = newVRegV(env);
4135            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4136            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4137                                          res, arg, 2, True));
4138            return res;
4139         }
4140         case Iop_F16toF32x4: {
4141            HReg res = newVRegV(env);
4142            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4143            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4144                                          res, arg, 2, True));
4145            return res;
4146         }
4147         case Iop_Recip32Fx4: {
4148            HReg res = newVRegV(env);
4149            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4150            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4151                                          res, argL, 0, True));
4152            return res;
4153         }
4154         case Iop_Recip32x4: {
4155            HReg res = newVRegV(env);
4156            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4157            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4158                                          res, argL, 0, True));
4159            return res;
4160         }
4161         case Iop_Abs32Fx4: {
4162            DECLARE_PATTERN(p_vabd_32fx4);
4163            DEFINE_PATTERN(p_vabd_32fx4,
4164                           unop(Iop_Abs32Fx4,
4165                                binop(Iop_Sub32Fx4,
4166                                      bind(0),
4167                                      bind(1))));
4168            if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4169               HReg res = newVRegV(env);
4170               HReg argL = iselNeonExpr(env, mi.bindee[0]);
4171               HReg argR = iselNeonExpr(env, mi.bindee[1]);
4172               addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4173                                              res, argL, argR, 0, True));
4174               return res;
4175            } else {
4176               HReg res = newVRegV(env);
4177               HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4178               addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4179                                             res, argL, 0, True));
4180               return res;
4181            }
4182         }
4183         case Iop_Rsqrte32Fx4: {
4184            HReg res = newVRegV(env);
4185            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4186            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4187                                          res, argL, 0, True));
4188            return res;
4189         }
4190         case Iop_Rsqrte32x4: {
4191            HReg res = newVRegV(env);
4192            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4193            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4194                                          res, argL, 0, True));
4195            return res;
4196         }
4197         case Iop_Neg32Fx4: {
4198            HReg res = newVRegV(env);
4199            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4200            addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4201                                          res, arg, 0, True));
4202            return res;
4203         }
4204         /* ... */
4205         default:
4206            break;
4207      }
4208   }
4209
4210   if (e->tag == Iex_Binop) {
4211      switch (e->Iex.Binop.op) {
4212         case Iop_64HLtoV128:
4213            /* Try to match into single "VMOV reg, imm" instruction */
4214            if (e->Iex.Binop.arg1->tag == Iex_Const &&
4215                e->Iex.Binop.arg2->tag == Iex_Const &&
4216                typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4217                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4218                e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4219                           e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4220               ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4221               ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4222               if (imm) {
4223                  HReg res = newVRegV(env);
4224                  addInstr(env, ARMInstr_NeonImm(res, imm));
4225                  return res;
4226               }
4227               if ((imm64 >> 32) == 0LL &&
4228                   (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4229                  HReg tmp1 = newVRegV(env);
4230                  HReg tmp2 = newVRegV(env);
4231                  HReg res = newVRegV(env);
4232                  if (imm->type < 10) {
4233                     addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4234                     addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4235                     addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4236                                                    res, tmp1, tmp2, 4, True));
4237                     return res;
4238                  }
4239               }
4240               if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4241                   (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4242                  HReg tmp1 = newVRegV(env);
4243                  HReg tmp2 = newVRegV(env);
4244                  HReg res = newVRegV(env);
4245                  if (imm->type < 10) {
4246                     addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4247                     addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4248                     addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4249                                                    res, tmp1, tmp2, 4, True));
4250                     return res;
4251                  }
4252               }
4253            }
4254            /* Does not match "VMOV Reg, Imm" form.  We'll have to do
4255               it the slow way. */
4256            {
4257               /* local scope */
4258               /* Done via the stack for ease of use. */
4259               /* FIXME: assumes little endian host */
4260               HReg       w3, w2, w1, w0;
4261               HReg       res  = newVRegV(env);
4262               ARMAMode1* sp_0  = ARMAMode1_RI(hregARM_R13(), 0);
4263               ARMAMode1* sp_4  = ARMAMode1_RI(hregARM_R13(), 4);
4264               ARMAMode1* sp_8  = ARMAMode1_RI(hregARM_R13(), 8);
4265               ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
4266               ARMRI84*   c_16  = ARMRI84_I84(16,0);
4267               /* Make space for SP */
4268               addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
4269                                                      hregARM_R13(), c_16));
4270
4271               /* Store the less significant 64 bits */
4272               iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
4273               addInstr(env, ARMInstr_LdSt32(False/*store*/, w0, sp_0));
4274               addInstr(env, ARMInstr_LdSt32(False/*store*/, w1, sp_4));
4275
4276               /* Store the more significant 64 bits */
4277               iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
4278               addInstr(env, ARMInstr_LdSt32(False/*store*/, w2, sp_8));
4279               addInstr(env, ARMInstr_LdSt32(False/*store*/, w3, sp_12));
4280
4281                /* Load result back from stack. */
4282                addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
4283                                              mkARMAModeN_R(hregARM_R13())));
4284
4285                /* Restore SP */
4286                addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
4287                                           hregARM_R13(), c_16));
4288                return res;
4289            } /* local scope */
4290            goto neon_expr_bad;
4291         case Iop_AndV128: {
4292            HReg res = newVRegV(env);
4293            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4294            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4295            addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4296                                           res, argL, argR, 4, True));
4297            return res;
4298         }
4299         case Iop_OrV128: {
4300            HReg res = newVRegV(env);
4301            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4302            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4303            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4304                                           res, argL, argR, 4, True));
4305            return res;
4306         }
4307         case Iop_XorV128: {
4308            HReg res = newVRegV(env);
4309            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4310            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4311            addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4312                                           res, argL, argR, 4, True));
4313            return res;
4314         }
4315         case Iop_Add8x16:
4316         case Iop_Add16x8:
4317         case Iop_Add32x4:
4318         case Iop_Add64x2: {
4319            /*
4320            FIXME: remove this if not used
4321            DECLARE_PATTERN(p_vrhadd_32sx4);
4322            ULong one = (1LL << 32) | 1LL;
4323            DEFINE_PATTERN(p_vrhadd_32sx4,
4324                  binop(Iop_Add32x4,
4325                        binop(Iop_Add32x4,
4326                              binop(Iop_SarN32x4,
4327                                    bind(0),
4328                                    mkU8(1)),
4329                              binop(Iop_SarN32x4,
4330                                    bind(1),
4331                                    mkU8(1))),
4332                        binop(Iop_SarN32x4,
4333                              binop(Iop_Add32x4,
4334                                    binop(Iop_Add32x4,
4335                                          binop(Iop_AndV128,
4336                                                bind(0),
4337                                                mkU128(one)),
4338                                          binop(Iop_AndV128,
4339                                                bind(1),
4340                                                mkU128(one))),
4341                                    mkU128(one)),
4342                              mkU8(1))));
4343            */
4344            HReg res = newVRegV(env);
4345            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4346            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4347            UInt size;
4348            switch (e->Iex.Binop.op) {
4349               case Iop_Add8x16: size = 0; break;
4350               case Iop_Add16x8: size = 1; break;
4351               case Iop_Add32x4: size = 2; break;
4352               case Iop_Add64x2: size = 3; break;
4353               default:
4354                  ppIROp(e->Iex.Binop.op);
4355                  vpanic("Illegal element size in VADD");
4356            }
4357            addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4358                                           res, argL, argR, size, True));
4359            return res;
4360         }
4361         case Iop_Add32Fx4: {
4362            HReg res = newVRegV(env);
4363            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4364            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4365            UInt size = 0;
4366            addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4367                                           res, argL, argR, size, True));
4368            return res;
4369         }
4370         case Iop_Recps32Fx4: {
4371            HReg res = newVRegV(env);
4372            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4373            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4374            UInt size = 0;
4375            addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4376                                           res, argL, argR, size, True));
4377            return res;
4378         }
4379         case Iop_Rsqrts32Fx4: {
4380            HReg res = newVRegV(env);
4381            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4382            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4383            UInt size = 0;
4384            addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4385                                           res, argL, argR, size, True));
4386            return res;
4387         }
4388         case Iop_InterleaveEvenLanes8x16:
4389         case Iop_InterleaveEvenLanes16x8:
4390         case Iop_InterleaveEvenLanes32x4:
4391         case Iop_InterleaveOddLanes8x16:
4392         case Iop_InterleaveOddLanes16x8:
4393         case Iop_InterleaveOddLanes32x4: {
4394            HReg tmp = newVRegV(env);
4395            HReg res = newVRegV(env);
4396            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4397            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4398            UInt size;
4399            UInt is_lo;
4400            switch (e->Iex.Binop.op) {
4401               case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
4402               case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
4403               case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
4404               case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
4405               case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
4406               case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
4407               default:
4408                  ppIROp(e->Iex.Binop.op);
4409                  vpanic("Illegal element size in VTRN");
4410            }
4411            if (is_lo) {
4412               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4413                                             tmp, argL, 4, True));
4414               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4415                                             res, argR, 4, True));
4416               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4417                                            res, tmp, size, True));
4418            } else {
4419               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4420                                             tmp, argR, 4, True));
4421               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4422                                             res, argL, 4, True));
4423               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4424                                            tmp, res, size, True));
4425            }
4426            return res;
4427         }
4428         case Iop_InterleaveHI8x16:
4429         case Iop_InterleaveHI16x8:
4430         case Iop_InterleaveHI32x4:
4431         case Iop_InterleaveLO8x16:
4432         case Iop_InterleaveLO16x8:
4433         case Iop_InterleaveLO32x4: {
4434            HReg tmp = newVRegV(env);
4435            HReg res = newVRegV(env);
4436            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4437            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4438            UInt size;
4439            UInt is_lo;
4440            switch (e->Iex.Binop.op) {
4441               case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
4442               case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
4443               case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
4444               case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
4445               case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
4446               case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
4447               default:
4448                  ppIROp(e->Iex.Binop.op);
4449                  vpanic("Illegal element size in VZIP");
4450            }
4451            if (is_lo) {
4452               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4453                                             tmp, argL, 4, True));
4454               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4455                                             res, argR, 4, True));
4456               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4457                                            res, tmp, size, True));
4458            } else {
4459               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4460                                             tmp, argR, 4, True));
4461               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4462                                             res, argL, 4, True));
4463               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4464                                            tmp, res, size, True));
4465            }
4466            return res;
4467         }
4468         case Iop_CatOddLanes8x16:
4469         case Iop_CatOddLanes16x8:
4470         case Iop_CatOddLanes32x4:
4471         case Iop_CatEvenLanes8x16:
4472         case Iop_CatEvenLanes16x8:
4473         case Iop_CatEvenLanes32x4: {
4474            HReg tmp = newVRegV(env);
4475            HReg res = newVRegV(env);
4476            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4477            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4478            UInt size;
4479            UInt is_lo;
4480            switch (e->Iex.Binop.op) {
4481               case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
4482               case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
4483               case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
4484               case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
4485               case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
4486               case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
4487               default:
4488                  ppIROp(e->Iex.Binop.op);
4489                  vpanic("Illegal element size in VUZP");
4490            }
4491            if (is_lo) {
4492               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4493                                             tmp, argL, 4, True));
4494               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4495                                             res, argR, 4, True));
4496               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4497                                            res, tmp, size, True));
4498            } else {
4499               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4500                                             tmp, argR, 4, True));
4501               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4502                                             res, argL, 4, True));
4503               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4504                                            tmp, res, size, True));
4505            }
4506            return res;
4507         }
4508         case Iop_QAdd8Ux16:
4509         case Iop_QAdd16Ux8:
4510         case Iop_QAdd32Ux4:
4511         case Iop_QAdd64Ux2: {
4512            HReg res = newVRegV(env);
4513            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4514            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4515            UInt size;
4516            switch (e->Iex.Binop.op) {
4517               case Iop_QAdd8Ux16: size = 0; break;
4518               case Iop_QAdd16Ux8: size = 1; break;
4519               case Iop_QAdd32Ux4: size = 2; break;
4520               case Iop_QAdd64Ux2: size = 3; break;
4521               default:
4522                  ppIROp(e->Iex.Binop.op);
4523                  vpanic("Illegal element size in VQADDU");
4524            }
4525            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4526                                           res, argL, argR, size, True));
4527            return res;
4528         }
4529         case Iop_QAdd8Sx16:
4530         case Iop_QAdd16Sx8:
4531         case Iop_QAdd32Sx4:
4532         case Iop_QAdd64Sx2: {
4533            HReg res = newVRegV(env);
4534            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4535            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4536            UInt size;
4537            switch (e->Iex.Binop.op) {
4538               case Iop_QAdd8Sx16: size = 0; break;
4539               case Iop_QAdd16Sx8: size = 1; break;
4540               case Iop_QAdd32Sx4: size = 2; break;
4541               case Iop_QAdd64Sx2: size = 3; break;
4542               default:
4543                  ppIROp(e->Iex.Binop.op);
4544                  vpanic("Illegal element size in VQADDS");
4545            }
4546            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4547                                           res, argL, argR, size, True));
4548            return res;
4549         }
4550         case Iop_Sub8x16:
4551         case Iop_Sub16x8:
4552         case Iop_Sub32x4:
4553         case Iop_Sub64x2: {
4554            HReg res = newVRegV(env);
4555            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4556            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4557            UInt size;
4558            switch (e->Iex.Binop.op) {
4559               case Iop_Sub8x16: size = 0; break;
4560               case Iop_Sub16x8: size = 1; break;
4561               case Iop_Sub32x4: size = 2; break;
4562               case Iop_Sub64x2: size = 3; break;
4563               default:
4564                  ppIROp(e->Iex.Binop.op);
4565                  vpanic("Illegal element size in VSUB");
4566            }
4567            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4568                                           res, argL, argR, size, True));
4569            return res;
4570         }
4571         case Iop_Sub32Fx4: {
4572            HReg res = newVRegV(env);
4573            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4574            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4575            UInt size = 0;
4576            addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
4577                                           res, argL, argR, size, True));
4578            return res;
4579         }
4580         case Iop_QSub8Ux16:
4581         case Iop_QSub16Ux8:
4582         case Iop_QSub32Ux4:
4583         case Iop_QSub64Ux2: {
4584            HReg res = newVRegV(env);
4585            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4586            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4587            UInt size;
4588            switch (e->Iex.Binop.op) {
4589               case Iop_QSub8Ux16: size = 0; break;
4590               case Iop_QSub16Ux8: size = 1; break;
4591               case Iop_QSub32Ux4: size = 2; break;
4592               case Iop_QSub64Ux2: size = 3; break;
4593               default:
4594                  ppIROp(e->Iex.Binop.op);
4595                  vpanic("Illegal element size in VQSUBU");
4596            }
4597            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4598                                           res, argL, argR, size, True));
4599            return res;
4600         }
4601         case Iop_QSub8Sx16:
4602         case Iop_QSub16Sx8:
4603         case Iop_QSub32Sx4:
4604         case Iop_QSub64Sx2: {
4605            HReg res = newVRegV(env);
4606            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4607            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4608            UInt size;
4609            switch (e->Iex.Binop.op) {
4610               case Iop_QSub8Sx16: size = 0; break;
4611               case Iop_QSub16Sx8: size = 1; break;
4612               case Iop_QSub32Sx4: size = 2; break;
4613               case Iop_QSub64Sx2: size = 3; break;
4614               default:
4615                  ppIROp(e->Iex.Binop.op);
4616                  vpanic("Illegal element size in VQSUBS");
4617            }
4618            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4619                                           res, argL, argR, size, True));
4620            return res;
4621         }
4622         case Iop_Max8Ux16:
4623         case Iop_Max16Ux8:
4624         case Iop_Max32Ux4: {
4625            HReg res = newVRegV(env);
4626            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4627            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4628            UInt size;
4629            switch (e->Iex.Binop.op) {
4630               case Iop_Max8Ux16: size = 0; break;
4631               case Iop_Max16Ux8: size = 1; break;
4632               case Iop_Max32Ux4: size = 2; break;
4633               default: vpanic("Illegal element size in VMAXU");
4634            }
4635            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4636                                           res, argL, argR, size, True));
4637            return res;
4638         }
4639         case Iop_Max8Sx16:
4640         case Iop_Max16Sx8:
4641         case Iop_Max32Sx4: {
4642            HReg res = newVRegV(env);
4643            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4644            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4645            UInt size;
4646            switch (e->Iex.Binop.op) {
4647               case Iop_Max8Sx16: size = 0; break;
4648               case Iop_Max16Sx8: size = 1; break;
4649               case Iop_Max32Sx4: size = 2; break;
4650               default: vpanic("Illegal element size in VMAXU");
4651            }
4652            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4653                                           res, argL, argR, size, True));
4654            return res;
4655         }
4656         case Iop_Min8Ux16:
4657         case Iop_Min16Ux8:
4658         case Iop_Min32Ux4: {
4659            HReg res = newVRegV(env);
4660            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4661            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4662            UInt size;
4663            switch (e->Iex.Binop.op) {
4664               case Iop_Min8Ux16: size = 0; break;
4665               case Iop_Min16Ux8: size = 1; break;
4666               case Iop_Min32Ux4: size = 2; break;
4667               default: vpanic("Illegal element size in VMAXU");
4668            }
4669            addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4670                                           res, argL, argR, size, True));
4671            return res;
4672         }
4673         case Iop_Min8Sx16:
4674         case Iop_Min16Sx8:
4675         case Iop_Min32Sx4: {
4676            HReg res = newVRegV(env);
4677            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4678            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4679            UInt size;
4680            switch (e->Iex.Binop.op) {
4681               case Iop_Min8Sx16: size = 0; break;
4682               case Iop_Min16Sx8: size = 1; break;
4683               case Iop_Min32Sx4: size = 2; break;
4684               default: vpanic("Illegal element size in VMAXU");
4685            }
4686            addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4687                                           res, argL, argR, size, True));
4688            return res;
4689         }
4690         case Iop_Sar8x16:
4691         case Iop_Sar16x8:
4692         case Iop_Sar32x4:
4693         case Iop_Sar64x2: {
4694            HReg res = newVRegV(env);
4695            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4696            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4697            HReg argR2 = newVRegV(env);
4698            HReg zero = newVRegV(env);
4699            UInt size;
4700            switch (e->Iex.Binop.op) {
4701               case Iop_Sar8x16: size = 0; break;
4702               case Iop_Sar16x8: size = 1; break;
4703               case Iop_Sar32x4: size = 2; break;
4704               case Iop_Sar64x2: size = 3; break;
4705               default: vassert(0);
4706            }
4707            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4708            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4709                                           argR2, zero, argR, size, True));
4710            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4711                                          res, argL, argR2, size, True));
4712            return res;
4713         }
4714         case Iop_Sal8x16:
4715         case Iop_Sal16x8:
4716         case Iop_Sal32x4:
4717         case Iop_Sal64x2: {
4718            HReg res = newVRegV(env);
4719            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4720            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4721            UInt size;
4722            switch (e->Iex.Binop.op) {
4723               case Iop_Sal8x16: size = 0; break;
4724               case Iop_Sal16x8: size = 1; break;
4725               case Iop_Sal32x4: size = 2; break;
4726               case Iop_Sal64x2: size = 3; break;
4727               default: vassert(0);
4728            }
4729            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4730                                          res, argL, argR, size, True));
4731            return res;
4732         }
4733         case Iop_Shr8x16:
4734         case Iop_Shr16x8:
4735         case Iop_Shr32x4:
4736         case Iop_Shr64x2: {
4737            HReg res = newVRegV(env);
4738            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4739            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4740            HReg argR2 = newVRegV(env);
4741            HReg zero = newVRegV(env);
4742            UInt size;
4743            switch (e->Iex.Binop.op) {
4744               case Iop_Shr8x16: size = 0; break;
4745               case Iop_Shr16x8: size = 1; break;
4746               case Iop_Shr32x4: size = 2; break;
4747               case Iop_Shr64x2: size = 3; break;
4748               default: vassert(0);
4749            }
4750            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4751            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4752                                           argR2, zero, argR, size, True));
4753            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4754                                          res, argL, argR2, size, True));
4755            return res;
4756         }
4757         case Iop_Shl8x16:
4758         case Iop_Shl16x8:
4759         case Iop_Shl32x4:
4760         case Iop_Shl64x2: {
4761            HReg res = newVRegV(env);
4762            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4763            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4764            UInt size;
4765            switch (e->Iex.Binop.op) {
4766               case Iop_Shl8x16: size = 0; break;
4767               case Iop_Shl16x8: size = 1; break;
4768               case Iop_Shl32x4: size = 2; break;
4769               case Iop_Shl64x2: size = 3; break;
4770               default: vassert(0);
4771            }
4772            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4773                                          res, argL, argR, size, True));
4774            return res;
4775         }
4776         case Iop_QShl8x16:
4777         case Iop_QShl16x8:
4778         case Iop_QShl32x4:
4779         case Iop_QShl64x2: {
4780            HReg res = newVRegV(env);
4781            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4782            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4783            UInt size;
4784            switch (e->Iex.Binop.op) {
4785               case Iop_QShl8x16: size = 0; break;
4786               case Iop_QShl16x8: size = 1; break;
4787               case Iop_QShl32x4: size = 2; break;
4788               case Iop_QShl64x2: size = 3; break;
4789               default: vassert(0);
4790            }
4791            addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4792                                          res, argL, argR, size, True));
4793            return res;
4794         }
4795         case Iop_QSal8x16:
4796         case Iop_QSal16x8:
4797         case Iop_QSal32x4:
4798         case Iop_QSal64x2: {
4799            HReg res = newVRegV(env);
4800            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4801            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4802            UInt size;
4803            switch (e->Iex.Binop.op) {
4804               case Iop_QSal8x16: size = 0; break;
4805               case Iop_QSal16x8: size = 1; break;
4806               case Iop_QSal32x4: size = 2; break;
4807               case Iop_QSal64x2: size = 3; break;
4808               default: vassert(0);
4809            }
4810            addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4811                                          res, argL, argR, size, True));
4812            return res;
4813         }
4814         case Iop_QShlN8x16:
4815         case Iop_QShlN16x8:
4816         case Iop_QShlN32x4:
4817         case Iop_QShlN64x2: {
4818            HReg res = newVRegV(env);
4819            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4820            UInt size, imm;
4821            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4822                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4823               vpanic("ARM taget supports Iop_QShlNAxB with constant "
4824                      "second argument only\n");
4825            }
4826            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4827            switch (e->Iex.Binop.op) {
4828               case Iop_QShlN8x16: size = 8 | imm; break;
4829               case Iop_QShlN16x8: size = 16 | imm; break;
4830               case Iop_QShlN32x4: size = 32 | imm; break;
4831               case Iop_QShlN64x2: size = 64 | imm; break;
4832               default: vassert(0);
4833            }
4834            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4835                                          res, argL, size, True));
4836            return res;
4837         }
4838         case Iop_QShlN8Sx16:
4839         case Iop_QShlN16Sx8:
4840         case Iop_QShlN32Sx4:
4841         case Iop_QShlN64Sx2: {
4842            HReg res = newVRegV(env);
4843            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4844            UInt size, imm;
4845            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4846                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4847               vpanic("ARM taget supports Iop_QShlNASxB with constant "
4848                      "second argument only\n");
4849            }
4850            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4851            switch (e->Iex.Binop.op) {
4852               case Iop_QShlN8Sx16: size = 8 | imm; break;
4853               case Iop_QShlN16Sx8: size = 16 | imm; break;
4854               case Iop_QShlN32Sx4: size = 32 | imm; break;
4855               case Iop_QShlN64Sx2: size = 64 | imm; break;
4856               default: vassert(0);
4857            }
4858            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4859                                          res, argL, size, True));
4860            return res;
4861         }
4862         case Iop_QSalN8x16:
4863         case Iop_QSalN16x8:
4864         case Iop_QSalN32x4:
4865         case Iop_QSalN64x2: {
4866            HReg res = newVRegV(env);
4867            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4868            UInt size, imm;
4869            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4870                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4871               vpanic("ARM taget supports Iop_QShlNAxB with constant "
4872                      "second argument only\n");
4873            }
4874            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4875            switch (e->Iex.Binop.op) {
4876               case Iop_QSalN8x16: size = 8 | imm; break;
4877               case Iop_QSalN16x8: size = 16 | imm; break;
4878               case Iop_QSalN32x4: size = 32 | imm; break;
4879               case Iop_QSalN64x2: size = 64 | imm; break;
4880               default: vassert(0);
4881            }
4882            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4883                                          res, argL, size, True));
4884            return res;
4885         }
4886         case Iop_ShrN8x16:
4887         case Iop_ShrN16x8:
4888         case Iop_ShrN32x4:
4889         case Iop_ShrN64x2: {
4890            HReg res = newVRegV(env);
4891            HReg tmp = newVRegV(env);
4892            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4893            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4894            HReg argR2 = newVRegI(env);
4895            UInt size;
4896            switch (e->Iex.Binop.op) {
4897               case Iop_ShrN8x16: size = 0; break;
4898               case Iop_ShrN16x8: size = 1; break;
4899               case Iop_ShrN32x4: size = 2; break;
4900               case Iop_ShrN64x2: size = 3; break;
4901               default: vassert(0);
4902            }
4903            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4904            addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4905                                          tmp, argR2, 0, True));
4906            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4907                                          res, argL, tmp, size, True));
4908            return res;
4909         }
4910         case Iop_ShlN8x16:
4911         case Iop_ShlN16x8:
4912         case Iop_ShlN32x4:
4913         case Iop_ShlN64x2: {
4914            HReg res = newVRegV(env);
4915            HReg tmp = newVRegV(env);
4916            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4917            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4918            UInt size;
4919            switch (e->Iex.Binop.op) {
4920               case Iop_ShlN8x16: size = 0; break;
4921               case Iop_ShlN16x8: size = 1; break;
4922               case Iop_ShlN32x4: size = 2; break;
4923               case Iop_ShlN64x2: size = 3; break;
4924               default: vassert(0);
4925            }
4926            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4927            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4928                                          res, argL, tmp, size, True));
4929            return res;
4930         }
4931         case Iop_SarN8x16:
4932         case Iop_SarN16x8:
4933         case Iop_SarN32x4:
4934         case Iop_SarN64x2: {
4935            HReg res = newVRegV(env);
4936            HReg tmp = newVRegV(env);
4937            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4938            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4939            HReg argR2 = newVRegI(env);
4940            UInt size;
4941            switch (e->Iex.Binop.op) {
4942               case Iop_SarN8x16: size = 0; break;
4943               case Iop_SarN16x8: size = 1; break;
4944               case Iop_SarN32x4: size = 2; break;
4945               case Iop_SarN64x2: size = 3; break;
4946               default: vassert(0);
4947            }
4948            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4949            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4950            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4951                                          res, argL, tmp, size, True));
4952            return res;
4953         }
4954         case Iop_CmpGT8Ux16:
4955         case Iop_CmpGT16Ux8:
4956         case Iop_CmpGT32Ux4: {
4957            HReg res = newVRegV(env);
4958            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4959            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4960            UInt size;
4961            switch (e->Iex.Binop.op) {
4962               case Iop_CmpGT8Ux16: size = 0; break;
4963               case Iop_CmpGT16Ux8: size = 1; break;
4964               case Iop_CmpGT32Ux4: size = 2; break;
4965               default: vassert(0);
4966            }
4967            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
4968                                           res, argL, argR, size, True));
4969            return res;
4970         }
4971         case Iop_CmpGT8Sx16:
4972         case Iop_CmpGT16Sx8:
4973         case Iop_CmpGT32Sx4: {
4974            HReg res = newVRegV(env);
4975            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4976            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4977            UInt size;
4978            switch (e->Iex.Binop.op) {
4979               case Iop_CmpGT8Sx16: size = 0; break;
4980               case Iop_CmpGT16Sx8: size = 1; break;
4981               case Iop_CmpGT32Sx4: size = 2; break;
4982               default: vassert(0);
4983            }
4984            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
4985                                           res, argL, argR, size, True));
4986            return res;
4987         }
4988         case Iop_CmpEQ8x16:
4989         case Iop_CmpEQ16x8:
4990         case Iop_CmpEQ32x4: {
4991            HReg res = newVRegV(env);
4992            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4993            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4994            UInt size;
4995            switch (e->Iex.Binop.op) {
4996               case Iop_CmpEQ8x16: size = 0; break;
4997               case Iop_CmpEQ16x8: size = 1; break;
4998               case Iop_CmpEQ32x4: size = 2; break;
4999               default: vassert(0);
5000            }
5001            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5002                                           res, argL, argR, size, True));
5003            return res;
5004         }
5005         case Iop_Mul8x16:
5006         case Iop_Mul16x8:
5007         case Iop_Mul32x4: {
5008            HReg res = newVRegV(env);
5009            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5010            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5011            UInt size = 0;
5012            switch(e->Iex.Binop.op) {
5013               case Iop_Mul8x16: size = 0; break;
5014               case Iop_Mul16x8: size = 1; break;
5015               case Iop_Mul32x4: size = 2; break;
5016               default: vassert(0);
5017            }
5018            addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5019                                           res, argL, argR, size, True));
5020            return res;
5021         }
5022         case Iop_Mul32Fx4: {
5023            HReg res = newVRegV(env);
5024            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5025            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5026            UInt size = 0;
5027            addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
5028                                           res, argL, argR, size, True));
5029            return res;
5030         }
5031         case Iop_Mull8Ux8:
5032         case Iop_Mull16Ux4:
5033         case Iop_Mull32Ux2: {
5034            HReg res = newVRegV(env);
5035            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5036            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5037            UInt size = 0;
5038            switch(e->Iex.Binop.op) {
5039               case Iop_Mull8Ux8: size = 0; break;
5040               case Iop_Mull16Ux4: size = 1; break;
5041               case Iop_Mull32Ux2: size = 2; break;
5042               default: vassert(0);
5043            }
5044            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5045                                           res, argL, argR, size, True));
5046            return res;
5047         }
5048
5049         case Iop_Mull8Sx8:
5050         case Iop_Mull16Sx4:
5051         case Iop_Mull32Sx2: {
5052            HReg res = newVRegV(env);
5053            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5054            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5055            UInt size = 0;
5056            switch(e->Iex.Binop.op) {
5057               case Iop_Mull8Sx8: size = 0; break;
5058               case Iop_Mull16Sx4: size = 1; break;
5059               case Iop_Mull32Sx2: size = 2; break;
5060               default: vassert(0);
5061            }
5062            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5063                                           res, argL, argR, size, True));
5064            return res;
5065         }
5066
5067         case Iop_QDMulHi16Sx8:
5068         case Iop_QDMulHi32Sx4: {
5069            HReg res = newVRegV(env);
5070            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5071            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5072            UInt size = 0;
5073            switch(e->Iex.Binop.op) {
5074               case Iop_QDMulHi16Sx8: size = 1; break;
5075               case Iop_QDMulHi32Sx4: size = 2; break;
5076               default: vassert(0);
5077            }
5078            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5079                                           res, argL, argR, size, True));
5080            return res;
5081         }
5082
5083         case Iop_QRDMulHi16Sx8:
5084         case Iop_QRDMulHi32Sx4: {
5085            HReg res = newVRegV(env);
5086            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5087            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5088            UInt size = 0;
5089            switch(e->Iex.Binop.op) {
5090               case Iop_QRDMulHi16Sx8: size = 1; break;
5091               case Iop_QRDMulHi32Sx4: size = 2; break;
5092               default: vassert(0);
5093            }
5094            addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5095                                           res, argL, argR, size, True));
5096            return res;
5097         }
5098
5099         case Iop_QDMulLong16Sx4:
5100         case Iop_QDMulLong32Sx2: {
5101            HReg res = newVRegV(env);
5102            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5103            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5104            UInt size = 0;
5105            switch(e->Iex.Binop.op) {
5106               case Iop_QDMulLong16Sx4: size = 1; break;
5107               case Iop_QDMulLong32Sx2: size = 2; break;
5108               default: vassert(0);
5109            }
5110            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5111                                           res, argL, argR, size, True));
5112            return res;
5113         }
5114         case Iop_PolynomialMul8x16: {
5115            HReg res = newVRegV(env);
5116            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5117            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5118            UInt size = 0;
5119            addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5120                                           res, argL, argR, size, True));
5121            return res;
5122         }
5123         case Iop_Max32Fx4: {
5124            HReg res = newVRegV(env);
5125            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5126            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5127            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5128                                           res, argL, argR, 2, True));
5129            return res;
5130         }
5131         case Iop_Min32Fx4: {
5132            HReg res = newVRegV(env);
5133            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5134            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5135            addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5136                                           res, argL, argR, 2, True));
5137            return res;
5138         }
5139         case Iop_PwMax32Fx4: {
5140            HReg res = newVRegV(env);
5141            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5142            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5143            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5144                                           res, argL, argR, 2, True));
5145            return res;
5146         }
5147         case Iop_PwMin32Fx4: {
5148            HReg res = newVRegV(env);
5149            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5150            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5151            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5152                                           res, argL, argR, 2, True));
5153            return res;
5154         }
5155         case Iop_CmpGT32Fx4: {
5156            HReg res = newVRegV(env);
5157            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5158            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5159            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5160                                           res, argL, argR, 2, True));
5161            return res;
5162         }
5163         case Iop_CmpGE32Fx4: {
5164            HReg res = newVRegV(env);
5165            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5166            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5167            addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5168                                           res, argL, argR, 2, True));
5169            return res;
5170         }
5171         case Iop_CmpEQ32Fx4: {
5172            HReg res = newVRegV(env);
5173            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5174            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5175            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5176                                           res, argL, argR, 2, True));
5177            return res;
5178         }
5179
5180         case Iop_PolynomialMull8x8: {
5181            HReg res = newVRegV(env);
5182            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5183            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5184            UInt size = 0;
5185            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5186                                           res, argL, argR, size, True));
5187            return res;
5188         }
5189         case Iop_F32ToFixed32Ux4_RZ:
5190         case Iop_F32ToFixed32Sx4_RZ:
5191         case Iop_Fixed32UToF32x4_RN:
5192         case Iop_Fixed32SToF32x4_RN: {
5193            HReg res = newVRegV(env);
5194            HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5195            ARMNeonUnOp op;
5196            UInt imm6;
5197            if (e->Iex.Binop.arg2->tag != Iex_Const ||
5198               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5199                  vpanic("ARM supports FP <-> Fixed conversion with constant "
5200                         "second argument less than 33 only\n");
5201            }
5202            imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5203            vassert(imm6 <= 32 && imm6 > 0);
5204            imm6 = 64 - imm6;
5205            switch(e->Iex.Binop.op) {
5206               case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5207               case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5208               case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5209               case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5210               default: vassert(0);
5211            }
5212            addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5213            return res;
5214         }
5215         /*
5216         FIXME remove if not used
5217         case Iop_VDup8x16:
5218         case Iop_VDup16x8:
5219         case Iop_VDup32x4: {
5220            HReg res = newVRegV(env);
5221            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5222            UInt imm4;
5223            UInt index;
5224            if (e->Iex.Binop.arg2->tag != Iex_Const ||
5225               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5226                  vpanic("ARM supports Iop_VDup with constant "
5227                         "second argument less than 16 only\n");
5228            }
5229            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5230            switch(e->Iex.Binop.op) {
5231               case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5232               case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5233               case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5234               default: vassert(0);
5235            }
5236            if (imm4 >= 16) {
5237               vpanic("ARM supports Iop_VDup with constant "
5238                      "second argument less than 16 only\n");
5239            }
5240            addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5241                                          res, argL, imm4, True));
5242            return res;
5243         }
5244         */
5245         case Iop_PwAdd8x16:
5246         case Iop_PwAdd16x8:
5247         case Iop_PwAdd32x4: {
5248            HReg res = newVRegV(env);
5249            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5250            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5251            UInt size = 0;
5252            switch(e->Iex.Binop.op) {
5253               case Iop_PwAdd8x16: size = 0; break;
5254               case Iop_PwAdd16x8: size = 1; break;
5255               case Iop_PwAdd32x4: size = 2; break;
5256               default: vassert(0);
5257            }
5258            addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5259                                           res, argL, argR, size, True));
5260            return res;
5261         }
5262         /* ... */
5263         default:
5264            break;
5265      }
5266   }
5267
5268   if (e->tag == Iex_Triop) {
5269      IRTriop *triop = e->Iex.Triop.details;
5270
5271      switch (triop->op) {
5272         case Iop_ExtractV128: {
5273            HReg res = newVRegV(env);
5274            HReg argL = iselNeonExpr(env, triop->arg1);
5275            HReg argR = iselNeonExpr(env, triop->arg2);
5276            UInt imm4;
5277            if (triop->arg3->tag != Iex_Const ||
5278                typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5279               vpanic("ARM target supports Iop_ExtractV128 with constant "
5280                      "third argument less than 16 only\n");
5281            }
5282            imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5283            if (imm4 >= 16) {
5284               vpanic("ARM target supports Iop_ExtractV128 with constant "
5285                      "third argument less than 16 only\n");
5286            }
5287            addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5288                                           res, argL, argR, imm4, True));
5289            return res;
5290         }
5291         default:
5292            break;
5293      }
5294   }
5295
5296   if (e->tag == Iex_Mux0X) {
5297      HReg r8;
5298      HReg rX  = iselNeonExpr(env, e->Iex.Mux0X.exprX);
5299      HReg r0  = iselNeonExpr(env, e->Iex.Mux0X.expr0);
5300      HReg dst = newVRegV(env);
5301      addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
5302      r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5303      addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5304                                      ARMRI84_I84(0xFF,0)));
5305      addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
5306      return dst;
5307   }
5308
5309  neon_expr_bad:
5310   ppIRExpr(e);
5311   vpanic("iselNeonExpr_wrk");
5312}
5313
5314/*---------------------------------------------------------*/
5315/*--- ISEL: Floating point expressions (64 bit)         ---*/
5316/*---------------------------------------------------------*/
5317
5318/* Compute a 64-bit floating point value into a register, the identity
5319   of which is returned.  As with iselIntExpr_R, the reg may be either
5320   real or virtual; in any case it must not be changed by subsequent
5321   code emitted by the caller.  */
5322
5323static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5324{
5325   HReg r = iselDblExpr_wrk( env, e );
5326#  if 0
5327   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5328#  endif
5329   vassert(hregClass(r) == HRcFlt64);
5330   vassert(hregIsVirtual(r));
5331   return r;
5332}
5333
5334/* DO NOT CALL THIS DIRECTLY */
5335static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5336{
5337   IRType ty = typeOfIRExpr(env->type_env,e);
5338   vassert(e);
5339   vassert(ty == Ity_F64);
5340
5341   if (e->tag == Iex_RdTmp) {
5342      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5343   }
5344
5345   if (e->tag == Iex_Const) {
5346      /* Just handle the zero case. */
5347      IRConst* con = e->Iex.Const.con;
5348      if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5349         HReg z32 = newVRegI(env);
5350         HReg dst = newVRegD(env);
5351         addInstr(env, ARMInstr_Imm32(z32, 0));
5352         addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5353         return dst;
5354      }
5355   }
5356
5357   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5358      ARMAModeV* am;
5359      HReg res = newVRegD(env);
5360      vassert(e->Iex.Load.ty == Ity_F64);
5361      am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5362      addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5363      return res;
5364   }
5365
5366   if (e->tag == Iex_Get) {
5367      // XXX This won't work if offset > 1020 or is not 0 % 4.
5368      // In which case we'll have to generate more longwinded code.
5369      ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5370      HReg       res = newVRegD(env);
5371      addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5372      return res;
5373   }
5374
5375   if (e->tag == Iex_Unop) {
5376      switch (e->Iex.Unop.op) {
5377         case Iop_ReinterpI64asF64: {
5378            if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5379               return iselNeon64Expr(env, e->Iex.Unop.arg);
5380            } else {
5381               HReg srcHi, srcLo;
5382               HReg dst = newVRegD(env);
5383               iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5384               addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5385               return dst;
5386            }
5387         }
5388         case Iop_NegF64: {
5389            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5390            HReg dst = newVRegD(env);
5391            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5392            return dst;
5393         }
5394         case Iop_AbsF64: {
5395            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5396            HReg dst = newVRegD(env);
5397            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5398            return dst;
5399         }
5400         case Iop_F32toF64: {
5401            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5402            HReg dst = newVRegD(env);
5403            addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5404            return dst;
5405         }
5406         case Iop_I32UtoF64:
5407         case Iop_I32StoF64: {
5408            HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
5409            HReg f32   = newVRegF(env);
5410            HReg dst   = newVRegD(env);
5411            Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5412            /* VMOV f32, src */
5413            addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5414            /* FSITOD dst, f32 */
5415            addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5416                                          dst, f32));
5417            return dst;
5418         }
5419         default:
5420            break;
5421      }
5422   }
5423
5424   if (e->tag == Iex_Binop) {
5425      switch (e->Iex.Binop.op) {
5426         case Iop_SqrtF64: {
5427            /* first arg is rounding mode; we ignore it. */
5428            HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5429            HReg dst = newVRegD(env);
5430            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5431            return dst;
5432         }
5433         default:
5434            break;
5435      }
5436   }
5437
5438   if (e->tag == Iex_Triop) {
5439      IRTriop *triop = e->Iex.Triop.details;
5440
5441      switch (triop->op) {
5442         case Iop_DivF64:
5443         case Iop_MulF64:
5444         case Iop_AddF64:
5445         case Iop_SubF64: {
5446            ARMVfpOp op = 0; /*INVALID*/
5447            HReg argL = iselDblExpr(env, triop->arg2);
5448            HReg argR = iselDblExpr(env, triop->arg3);
5449            HReg dst  = newVRegD(env);
5450            switch (triop->op) {
5451               case Iop_DivF64: op = ARMvfp_DIV; break;
5452               case Iop_MulF64: op = ARMvfp_MUL; break;
5453               case Iop_AddF64: op = ARMvfp_ADD; break;
5454               case Iop_SubF64: op = ARMvfp_SUB; break;
5455               default: vassert(0);
5456            }
5457            addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5458            return dst;
5459         }
5460         default:
5461            break;
5462      }
5463   }
5464
5465   if (e->tag == Iex_Mux0X) {
5466      if (ty == Ity_F64
5467          && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5468         HReg r8;
5469         HReg rX  = iselDblExpr(env, e->Iex.Mux0X.exprX);
5470         HReg r0  = iselDblExpr(env, e->Iex.Mux0X.expr0);
5471         HReg dst = newVRegD(env);
5472         addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
5473         r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5474         addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5475                                         ARMRI84_I84(0xFF,0)));
5476         addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
5477         return dst;
5478      }
5479   }
5480
5481   ppIRExpr(e);
5482   vpanic("iselDblExpr_wrk");
5483}
5484
5485
5486/*---------------------------------------------------------*/
5487/*--- ISEL: Floating point expressions (32 bit)         ---*/
5488/*---------------------------------------------------------*/
5489
5490/* Compute a 64-bit floating point value into a register, the identity
5491   of which is returned.  As with iselIntExpr_R, the reg may be either
5492   real or virtual; in any case it must not be changed by subsequent
5493   code emitted by the caller.  */
5494
5495static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5496{
5497   HReg r = iselFltExpr_wrk( env, e );
5498#  if 0
5499   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5500#  endif
5501   vassert(hregClass(r) == HRcFlt32);
5502   vassert(hregIsVirtual(r));
5503   return r;
5504}
5505
5506/* DO NOT CALL THIS DIRECTLY */
5507static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5508{
5509   IRType ty = typeOfIRExpr(env->type_env,e);
5510   vassert(e);
5511   vassert(ty == Ity_F32);
5512
5513   if (e->tag == Iex_RdTmp) {
5514      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5515   }
5516
5517   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5518      ARMAModeV* am;
5519      HReg res = newVRegF(env);
5520      vassert(e->Iex.Load.ty == Ity_F32);
5521      am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5522      addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5523      return res;
5524   }
5525
5526   if (e->tag == Iex_Get) {
5527      // XXX This won't work if offset > 1020 or is not 0 % 4.
5528      // In which case we'll have to generate more longwinded code.
5529      ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5530      HReg       res = newVRegF(env);
5531      addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5532      return res;
5533   }
5534
5535   if (e->tag == Iex_Unop) {
5536      switch (e->Iex.Unop.op) {
5537         case Iop_ReinterpI32asF32: {
5538            HReg dst = newVRegF(env);
5539            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5540            addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5541            return dst;
5542         }
5543         case Iop_NegF32: {
5544            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5545            HReg dst = newVRegF(env);
5546            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5547            return dst;
5548         }
5549         case Iop_AbsF32: {
5550            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5551            HReg dst = newVRegF(env);
5552            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5553            return dst;
5554         }
5555         default:
5556            break;
5557      }
5558   }
5559
5560   if (e->tag == Iex_Binop) {
5561      switch (e->Iex.Binop.op) {
5562         case Iop_SqrtF32: {
5563            /* first arg is rounding mode; we ignore it. */
5564            HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5565            HReg dst = newVRegF(env);
5566            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5567            return dst;
5568         }
5569         case Iop_F64toF32: {
5570            HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5571            set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5572            HReg valS = newVRegF(env);
5573            /* FCVTSD valS, valD */
5574            addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5575            set_VFP_rounding_default(env);
5576            return valS;
5577         }
5578         default:
5579            break;
5580      }
5581   }
5582
5583   if (e->tag == Iex_Triop) {
5584      IRTriop *triop = e->Iex.Triop.details;
5585
5586      switch (triop->op) {
5587         case Iop_DivF32:
5588         case Iop_MulF32:
5589         case Iop_AddF32:
5590         case Iop_SubF32: {
5591            ARMVfpOp op = 0; /*INVALID*/
5592            HReg argL = iselFltExpr(env, triop->arg2);
5593            HReg argR = iselFltExpr(env, triop->arg3);
5594            HReg dst  = newVRegF(env);
5595            switch (triop->op) {
5596               case Iop_DivF32: op = ARMvfp_DIV; break;
5597               case Iop_MulF32: op = ARMvfp_MUL; break;
5598               case Iop_AddF32: op = ARMvfp_ADD; break;
5599               case Iop_SubF32: op = ARMvfp_SUB; break;
5600               default: vassert(0);
5601            }
5602            addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5603            return dst;
5604         }
5605         default:
5606            break;
5607      }
5608   }
5609
5610   if (e->tag == Iex_Mux0X) {
5611      if (ty == Ity_F32
5612          && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5613         HReg r8;
5614         HReg rX  = iselFltExpr(env, e->Iex.Mux0X.exprX);
5615         HReg r0  = iselFltExpr(env, e->Iex.Mux0X.expr0);
5616         HReg dst = newVRegF(env);
5617         addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
5618         r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5619         addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5620                                         ARMRI84_I84(0xFF,0)));
5621         addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
5622         return dst;
5623      }
5624   }
5625
5626   ppIRExpr(e);
5627   vpanic("iselFltExpr_wrk");
5628}
5629
5630
5631/*---------------------------------------------------------*/
5632/*--- ISEL: Statements                                  ---*/
5633/*---------------------------------------------------------*/
5634
5635static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5636{
5637   if (vex_traceflags & VEX_TRACE_VCODE) {
5638      vex_printf("\n-- ");
5639      ppIRStmt(stmt);
5640      vex_printf("\n");
5641   }
5642   switch (stmt->tag) {
5643
5644   /* --------- STORE --------- */
5645   /* little-endian write to memory */
5646   case Ist_Store: {
5647      IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5648      IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5649      IREndness end  = stmt->Ist.Store.end;
5650
5651      if (tya != Ity_I32 || end != Iend_LE)
5652         goto stmt_fail;
5653
5654      if (tyd == Ity_I32) {
5655         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5656         ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5657         addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5658         return;
5659      }
5660      if (tyd == Ity_I16) {
5661         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5662         ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5663         addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
5664                                       False/*!isSignedLoad*/, rD, am));
5665         return;
5666      }
5667      if (tyd == Ity_I8) {
5668         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5669         ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5670         addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
5671         return;
5672      }
5673      if (tyd == Ity_I64) {
5674         if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5675            HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5676            ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5677            addInstr(env, ARMInstr_NLdStD(False, dD, am));
5678         } else {
5679            HReg rDhi, rDlo, rA;
5680            iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5681            rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5682            addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
5683                                          ARMAMode1_RI(rA,4)));
5684            addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
5685                                          ARMAMode1_RI(rA,0)));
5686         }
5687         return;
5688      }
5689      if (tyd == Ity_F64) {
5690         HReg       dD = iselDblExpr(env, stmt->Ist.Store.data);
5691         ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5692         addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5693         return;
5694      }
5695      if (tyd == Ity_F32) {
5696         HReg       fD = iselFltExpr(env, stmt->Ist.Store.data);
5697         ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5698         addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5699         return;
5700      }
5701      if (tyd == Ity_V128) {
5702         HReg       qD = iselNeonExpr(env, stmt->Ist.Store.data);
5703         ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5704         addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5705         return;
5706      }
5707
5708      break;
5709   }
5710
5711   /* --------- PUT --------- */
5712   /* write guest state, fixed offset */
5713   case Ist_Put: {
5714       IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
5715
5716       if (tyd == Ity_I32) {
5717           HReg       rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5718           ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5719           addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5720           return;
5721       }
5722       if (tyd == Ity_I64) {
5723          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5724             HReg addr = newVRegI(env);
5725             HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5726             addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5727                                                stmt->Ist.Put.offset));
5728             addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5729          } else {
5730             HReg rDhi, rDlo;
5731             ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5732                                           stmt->Ist.Put.offset + 0);
5733             ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5734                                           stmt->Ist.Put.offset + 4);
5735             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5736             addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
5737             addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
5738          }
5739          return;
5740       }
5741       if (tyd == Ity_F64) {
5742          // XXX This won't work if offset > 1020 or is not 0 % 4.
5743          // In which case we'll have to generate more longwinded code.
5744          ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5745          HReg       rD = iselDblExpr(env, stmt->Ist.Put.data);
5746          addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5747          return;
5748       }
5749       if (tyd == Ity_F32) {
5750          // XXX This won't work if offset > 1020 or is not 0 % 4.
5751          // In which case we'll have to generate more longwinded code.
5752          ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5753          HReg       rD = iselFltExpr(env, stmt->Ist.Put.data);
5754          addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5755          return;
5756       }
5757       if (tyd == Ity_V128) {
5758          HReg addr = newVRegI(env);
5759          HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5760          addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5761                                       stmt->Ist.Put.offset));
5762          addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5763          return;
5764       }
5765       break;
5766   }
5767
5768//zz   /* --------- Indexed PUT --------- */
5769//zz   /* write guest state, run-time offset */
5770//zz   case Ist_PutI: {
5771//zz      ARMAMode2* am2
5772//zz           = genGuestArrayOffset(
5773//zz               env, stmt->Ist.PutI.descr,
5774//zz               stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
5775//zz
5776//zz       IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
5777//zz
5778//zz       if (tyd == Ity_I8) {
5779//zz           HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
5780//zz           addInstr(env, ARMInstr_StoreB(reg, am2));
5781//zz           return;
5782//zz       }
5783//zz// CAB: Ity_I32, Ity_I16 ?
5784//zz       break;
5785//zz   }
5786
5787   /* --------- TMP --------- */
5788   /* assign value to temporary */
5789   case Ist_WrTmp: {
5790      IRTemp tmp = stmt->Ist.WrTmp.tmp;
5791      IRType ty = typeOfIRTemp(env->type_env, tmp);
5792
5793      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
5794         ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5795                                          env, stmt->Ist.WrTmp.data);
5796         HReg     dst  = lookupIRTemp(env, tmp);
5797         addInstr(env, ARMInstr_Mov(dst,ri84));
5798         return;
5799      }
5800      if (ty == Ity_I1) {
5801         HReg        dst  = lookupIRTemp(env, tmp);
5802         ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5803         addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5804         addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5805         return;
5806      }
5807      if (ty == Ity_I64) {
5808         if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5809            HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5810            HReg dst = lookupIRTemp(env, tmp);
5811            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5812         } else {
5813            HReg rHi, rLo, dstHi, dstLo;
5814            iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5815            lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5816            addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5817            addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5818         }
5819         return;
5820      }
5821      if (ty == Ity_F64) {
5822         HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5823         HReg dst = lookupIRTemp(env, tmp);
5824         addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5825         return;
5826      }
5827      if (ty == Ity_F32) {
5828         HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5829         HReg dst = lookupIRTemp(env, tmp);
5830         addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5831         return;
5832      }
5833      if (ty == Ity_V128) {
5834         HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5835         HReg dst = lookupIRTemp(env, tmp);
5836         addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5837         return;
5838      }
5839      break;
5840   }
5841
5842   /* --------- Call to DIRTY helper --------- */
5843   /* call complex ("dirty") helper function */
5844   case Ist_Dirty: {
5845      IRType   retty;
5846      IRDirty* d = stmt->Ist.Dirty.details;
5847      Bool     passBBP = False;
5848
5849      if (d->nFxState == 0)
5850         vassert(!d->needsBBP);
5851
5852      passBBP = toBool(d->nFxState > 0 && d->needsBBP);
5853
5854      /* Marshal args, do the call, clear stack. */
5855      Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
5856      if (!ok)
5857         break; /* will go to stmt_fail: */
5858
5859      /* Now figure out what to do with the returned value, if any. */
5860      if (d->tmp == IRTemp_INVALID)
5861         /* No return value.  Nothing to do. */
5862         return;
5863
5864      retty = typeOfIRTemp(env->type_env, d->tmp);
5865
5866      if (retty == Ity_I64) {
5867         if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5868            HReg tmp = lookupIRTemp(env, d->tmp);
5869            addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
5870                                                     hregARM_R0()));
5871         } else {
5872            HReg dstHi, dstLo;
5873            /* The returned value is in r1:r0.  Park it in the
5874               register-pair associated with tmp. */
5875            lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
5876            addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
5877            addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
5878         }
5879         return;
5880      }
5881      if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
5882         /* The returned value is in r0.  Park it in the register
5883            associated with tmp. */
5884         HReg dst = lookupIRTemp(env, d->tmp);
5885         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
5886         return;
5887      }
5888
5889      break;
5890   }
5891
5892   /* --------- Load Linked and Store Conditional --------- */
5893   case Ist_LLSC: {
5894      if (stmt->Ist.LLSC.storedata == NULL) {
5895         /* LL */
5896         IRTemp res = stmt->Ist.LLSC.result;
5897         IRType ty  = typeOfIRTemp(env->type_env, res);
5898         if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
5899            Int  szB   = 0;
5900            HReg r_dst = lookupIRTemp(env, res);
5901            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5902            switch (ty) {
5903               case Ity_I8:  szB = 1; break;
5904               case Ity_I16: szB = 2; break;
5905               case Ity_I32: szB = 4; break;
5906               default:      vassert(0);
5907            }
5908            addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
5909            addInstr(env, ARMInstr_LdrEX(szB));
5910            addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
5911            return;
5912         }
5913         if (ty == Ity_I64) {
5914            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5915            addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
5916            addInstr(env, ARMInstr_LdrEX(8));
5917            /* Result is in r3:r2.  On a non-NEON capable CPU, we must
5918               move it into a result register pair.  On a NEON capable
5919               CPU, the result register will be a 64 bit NEON
5920               register, so we must move it there instead. */
5921            if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
5922               HReg dst = lookupIRTemp(env, res);
5923               addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
5924                                                        hregARM_R2()));
5925            } else {
5926               HReg r_dst_hi, r_dst_lo;
5927               lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
5928               addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
5929               addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
5930            }
5931            return;
5932         }
5933         /*NOTREACHED*/
5934         vassert(0);
5935      } else {
5936         /* SC */
5937         IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
5938         if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
5939            Int  szB = 0;
5940            HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
5941            HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5942            switch (tyd) {
5943               case Ity_I8:  szB = 1; break;
5944               case Ity_I16: szB = 2; break;
5945               case Ity_I32: szB = 4; break;
5946               default:      vassert(0);
5947            }
5948            addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
5949            addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
5950            addInstr(env, ARMInstr_StrEX(szB));
5951         } else {
5952            vassert(tyd == Ity_I64);
5953            /* This is really ugly.  There is no is/is-not NEON
5954               decision akin to the case for LL, because iselInt64Expr
5955               fudges this for us, and always gets the result into two
5956               GPRs even if this means moving it from a NEON
5957               register. */
5958            HReg rDhi, rDlo;
5959            iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
5960            HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5961            addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
5962            addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
5963            addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
5964            addInstr(env, ARMInstr_StrEX(8));
5965         }
5966         /* now r0 is 1 if failed, 0 if success.  Change to IR
5967            conventions (0 is fail, 1 is success).  Also transfer
5968            result to r_res. */
5969         IRTemp   res   = stmt->Ist.LLSC.result;
5970         IRType   ty    = typeOfIRTemp(env->type_env, res);
5971         HReg     r_res = lookupIRTemp(env, res);
5972         ARMRI84* one   = ARMRI84_I84(1,0);
5973         vassert(ty == Ity_I1);
5974         addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
5975         /* And be conservative -- mask off all but the lowest bit */
5976         addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
5977         return;
5978      }
5979      break;
5980   }
5981
5982   /* --------- MEM FENCE --------- */
5983   case Ist_MBE:
5984      switch (stmt->Ist.MBE.event) {
5985         case Imbe_Fence:
5986            addInstr(env, ARMInstr_MFence());
5987            return;
5988         case Imbe_CancelReservation:
5989            addInstr(env, ARMInstr_CLREX());
5990            return;
5991         default:
5992            break;
5993      }
5994      break;
5995
5996   /* --------- INSTR MARK --------- */
5997   /* Doesn't generate any executable code ... */
5998   case Ist_IMark:
5999       return;
6000
6001   /* --------- NO-OP --------- */
6002   case Ist_NoOp:
6003       return;
6004
6005   /* --------- EXIT --------- */
6006   case Ist_Exit: {
6007      if (stmt->Ist.Exit.dst->tag != Ico_U32)
6008         vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
6009
6010      ARMCondCode cc     = iselCondCode(env, stmt->Ist.Exit.guard);
6011      ARMAMode1*  amR15T = ARMAMode1_RI(hregARM_R8(),
6012                                        stmt->Ist.Exit.offsIP);
6013
6014      /* Case: boring transfer to known address */
6015      if (stmt->Ist.Exit.jk == Ijk_Boring
6016          || stmt->Ist.Exit.jk == Ijk_Call
6017          || stmt->Ist.Exit.jk == Ijk_Ret) {
6018         if (env->chainingAllowed) {
6019            /* .. almost always true .. */
6020            /* Skip the event check at the dst if this is a forwards
6021               edge. */
6022            Bool toFastEP
6023               = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
6024            if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6025            addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
6026                                           amR15T, cc, toFastEP));
6027         } else {
6028            /* .. very occasionally .. */
6029            /* We can't use chaining, so ask for an assisted transfer,
6030               as that's the only alternative that is allowable. */
6031            HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6032            addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
6033         }
6034         return;
6035      }
6036
6037      /* Case: assisted transfer to arbitrary address */
6038      switch (stmt->Ist.Exit.jk) {
6039         /* Keep this list in sync with that in iselNext below */
6040         case Ijk_ClientReq:
6041         case Ijk_NoDecode:
6042         case Ijk_NoRedir:
6043         case Ijk_Sys_syscall:
6044         case Ijk_TInval:
6045         {
6046            HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6047            addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6048                                             stmt->Ist.Exit.jk));
6049            return;
6050         }
6051         default:
6052            break;
6053      }
6054
6055      /* Do we ever expect to see any other kind? */
6056      goto stmt_fail;
6057   }
6058
6059   default: break;
6060   }
6061  stmt_fail:
6062   ppIRStmt(stmt);
6063   vpanic("iselStmt");
6064}
6065
6066
6067/*---------------------------------------------------------*/
6068/*--- ISEL: Basic block terminators (Nexts)             ---*/
6069/*---------------------------------------------------------*/
6070
6071static void iselNext ( ISelEnv* env,
6072                       IRExpr* next, IRJumpKind jk, Int offsIP )
6073{
6074   if (vex_traceflags & VEX_TRACE_VCODE) {
6075      vex_printf( "\n-- PUT(%d) = ", offsIP);
6076      ppIRExpr( next );
6077      vex_printf( "; exit-");
6078      ppIRJumpKind(jk);
6079      vex_printf( "\n");
6080   }
6081
6082   /* Case: boring transfer to known address */
6083   if (next->tag == Iex_Const) {
6084      IRConst* cdst = next->Iex.Const.con;
6085      vassert(cdst->tag == Ico_U32);
6086      if (jk == Ijk_Boring || jk == Ijk_Call) {
6087         /* Boring transfer to known address */
6088         ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6089         if (env->chainingAllowed) {
6090            /* .. almost always true .. */
6091            /* Skip the event check at the dst if this is a forwards
6092               edge. */
6093            Bool toFastEP
6094               = ((Addr64)cdst->Ico.U32) > env->max_ga;
6095            if (0) vex_printf("%s", toFastEP ? "X" : ".");
6096            addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
6097                                           amR15T, ARMcc_AL,
6098                                           toFastEP));
6099         } else {
6100            /* .. very occasionally .. */
6101            /* We can't use chaining, so ask for an assisted transfer,
6102               as that's the only alternative that is allowable. */
6103            HReg r = iselIntExpr_R(env, next);
6104            addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6105                                             Ijk_Boring));
6106         }
6107         return;
6108      }
6109   }
6110
6111   /* Case: call/return (==boring) transfer to any address */
6112   switch (jk) {
6113      case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6114         HReg       r      = iselIntExpr_R(env, next);
6115         ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6116         if (env->chainingAllowed) {
6117            addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
6118         } else {
6119            addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
6120                                                Ijk_Boring));
6121         }
6122         return;
6123      }
6124      default:
6125         break;
6126   }
6127
6128   /* Case: assisted transfer to arbitrary address */
6129   switch (jk) {
6130      /* Keep this list in sync with that for Ist_Exit above */
6131      case Ijk_ClientReq:
6132      case Ijk_NoDecode:
6133      case Ijk_NoRedir:
6134      case Ijk_Sys_syscall:
6135      {
6136         HReg       r      = iselIntExpr_R(env, next);
6137         ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
6138         addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
6139         return;
6140      }
6141      default:
6142         break;
6143   }
6144
6145   vex_printf( "\n-- PUT(%d) = ", offsIP);
6146   ppIRExpr( next );
6147   vex_printf( "; exit-");
6148   ppIRJumpKind(jk);
6149   vex_printf( "\n");
6150   vassert(0); // are we expecting any other kind?
6151}
6152
6153
6154/*---------------------------------------------------------*/
6155/*--- Insn selector top-level                           ---*/
6156/*---------------------------------------------------------*/
6157
6158/* Translate an entire SB to arm code. */
6159
6160HInstrArray* iselSB_ARM ( IRSB* bb,
6161                          VexArch      arch_host,
6162                          VexArchInfo* archinfo_host,
6163                          VexAbiInfo*  vbi/*UNUSED*/,
6164                          Int offs_Host_EvC_Counter,
6165                          Int offs_Host_EvC_FailAddr,
6166                          Bool chainingAllowed,
6167                          Bool addProfInc,
6168                          Addr64 max_ga )
6169{
6170   Int       i, j;
6171   HReg      hreg, hregHI;
6172   ISelEnv*  env;
6173   UInt      hwcaps_host = archinfo_host->hwcaps;
6174   ARMAMode1 *amCounter, *amFailAddr;
6175
6176   /* sanity ... */
6177   vassert(arch_host == VexArchARM);
6178
6179   /* hwcaps should not change from one ISEL call to another. */
6180   arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
6181
6182   /* Make up an initial environment to use. */
6183   env = LibVEX_Alloc(sizeof(ISelEnv));
6184   env->vreg_ctr = 0;
6185
6186   /* Set up output code array. */
6187   env->code = newHInstrArray();
6188
6189   /* Copy BB's type env. */
6190   env->type_env = bb->tyenv;
6191
6192   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
6193      change as we go along. */
6194   env->n_vregmap = bb->tyenv->types_used;
6195   env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6196   env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6197
6198   /* and finally ... */
6199   env->chainingAllowed = chainingAllowed;
6200   env->hwcaps          = hwcaps_host;
6201   env->max_ga          = max_ga;
6202
6203   /* For each IR temporary, allocate a suitably-kinded virtual
6204      register. */
6205   j = 0;
6206   for (i = 0; i < env->n_vregmap; i++) {
6207      hregHI = hreg = INVALID_HREG;
6208      switch (bb->tyenv->types[i]) {
6209         case Ity_I1:
6210         case Ity_I8:
6211         case Ity_I16:
6212         case Ity_I32:  hreg   = mkHReg(j++, HRcInt32, True); break;
6213         case Ity_I64:
6214            if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
6215               hreg = mkHReg(j++, HRcFlt64, True);
6216            } else {
6217               hregHI = mkHReg(j++, HRcInt32, True);
6218               hreg   = mkHReg(j++, HRcInt32, True);
6219            }
6220            break;
6221         case Ity_F32:  hreg   = mkHReg(j++, HRcFlt32, True); break;
6222         case Ity_F64:  hreg   = mkHReg(j++, HRcFlt64, True); break;
6223         case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True); break;
6224         default: ppIRType(bb->tyenv->types[i]);
6225                  vpanic("iselBB: IRTemp type");
6226      }
6227      env->vregmap[i]   = hreg;
6228      env->vregmapHI[i] = hregHI;
6229   }
6230   env->vreg_ctr = j;
6231
6232   /* The very first instruction must be an event check. */
6233   amCounter  = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
6234   amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
6235   addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
6236
6237   /* Possibly a block counter increment (for profiling).  At this
6238      point we don't know the address of the counter, so just pretend
6239      it is zero.  It will have to be patched later, but before this
6240      translation is used, by a call to LibVEX_patchProfCtr. */
6241   if (addProfInc) {
6242      addInstr(env, ARMInstr_ProfInc());
6243   }
6244
6245   /* Ok, finally we can iterate over the statements. */
6246   for (i = 0; i < bb->stmts_used; i++)
6247      iselStmt(env, bb->stmts[i]);
6248
6249   iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
6250
6251   /* record the number of vregs we used. */
6252   env->code->n_vregs = env->vreg_ctr;
6253   return env->code;
6254}
6255
6256
6257/*---------------------------------------------------------------*/
6258/*--- end                                     host_arm_isel.c ---*/
6259/*---------------------------------------------------------------*/
6260