host_arm_isel.c revision 8f943afc22a6a683b78271836c8ddc462b4824a9
1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_arm_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2011 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2011 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39#include "ir_match.h"
40
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"  // for 32-bit SIMD helpers
45#include "host_arm_defs.h"
46
47
48/*---------------------------------------------------------*/
49/*--- ARMvfp control word stuff                         ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53   exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54   flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
55   this corresponds to a FPSCR value of zero.
56
57   fpscr should therefore be zero on entry to Vex-generated code, and
58   should be unchanged at exit.  (Or at least the bottom 28 bits
59   should be zero).
60*/
61
62#define DEFAULT_FPSCR 0
63
64
65/*---------------------------------------------------------*/
66/*--- ISelEnv                                           ---*/
67/*---------------------------------------------------------*/
68
69/* This carries around:
70
71   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72     might encounter.  This is computed before insn selection starts,
73     and does not change.
74
75   - A mapping from IRTemp to HReg.  This tells the insn selector
76     which virtual register(s) are associated with each IRTemp
77     temporary.  This is computed before insn selection starts, and
78     does not change.  We expect this mapping to map precisely the
79     same set of IRTemps as the type mapping does.
80
81        - vregmap   holds the primary register for the IRTemp.
82        - vregmapHI is only used for 64-bit integer-typed
83             IRTemps.  It holds the identity of a second
84             32-bit virtual HReg, which holds the high half
85             of the value.
86
87   - The name of the vreg in which we stash a copy of the link reg, so
88     helper functions don't kill it.
89
90   - The code array, that is, the insns selected so far.
91
92   - A counter, for generating new virtual registers.
93
94   - The host hardware capabilities word.  This is set at the start
95     and does not change.
96
97   Note, this is all host-independent.  */
98
99typedef
100   struct {
101      IRTypeEnv*   type_env;
102
103      HReg*        vregmap;
104      HReg*        vregmapHI;
105      Int          n_vregmap;
106
107      HReg         savedLR;
108
109      HInstrArray* code;
110
111      Int          vreg_ctr;
112
113      UInt         hwcaps;
114   }
115   ISelEnv;
116
117static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
118{
119   vassert(tmp >= 0);
120   vassert(tmp < env->n_vregmap);
121   return env->vregmap[tmp];
122}
123
124static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
125{
126   vassert(tmp >= 0);
127   vassert(tmp < env->n_vregmap);
128   vassert(env->vregmapHI[tmp] != INVALID_HREG);
129   *vrLO = env->vregmap[tmp];
130   *vrHI = env->vregmapHI[tmp];
131}
132
133static void addInstr ( ISelEnv* env, ARMInstr* instr )
134{
135   addHInstr(env->code, instr);
136   if (vex_traceflags & VEX_TRACE_VCODE) {
137      ppARMInstr(instr);
138      vex_printf("\n");
139   }
140#if 0
141   if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
142         || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
143         || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
144      ppARMInstr(instr);
145      vex_printf("\n");
146   }
147#endif
148}
149
150static HReg newVRegI ( ISelEnv* env )
151{
152   HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
153   env->vreg_ctr++;
154   return reg;
155}
156
157static HReg newVRegD ( ISelEnv* env )
158{
159   HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
160   env->vreg_ctr++;
161   return reg;
162}
163
164static HReg newVRegF ( ISelEnv* env )
165{
166   HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
167   env->vreg_ctr++;
168   return reg;
169}
170
171static HReg newVRegV ( ISelEnv* env )
172{
173   HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
174   env->vreg_ctr++;
175   return reg;
176}
177
178/* These are duplicated in guest_arm_toIR.c */
179static IRExpr* unop ( IROp op, IRExpr* a )
180{
181   return IRExpr_Unop(op, a);
182}
183
184static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
185{
186   return IRExpr_Binop(op, a1, a2);
187}
188
189static IRExpr* bind ( Int binder )
190{
191   return IRExpr_Binder(binder);
192}
193
194
195/*---------------------------------------------------------*/
196/*--- ISEL: Forward declarations                        ---*/
197/*---------------------------------------------------------*/
198
199/* These are organised as iselXXX and iselXXX_wrk pairs.  The
200   iselXXX_wrk do the real work, but are not to be called directly.
201   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
202   checks that all returned registers are virtual.  You should not
203   call the _wrk version directly.
204*/
205static ARMAMode1*  iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
206static ARMAMode1*  iselIntExpr_AMode1     ( ISelEnv* env, IRExpr* e );
207
208static ARMAMode2*  iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
209static ARMAMode2*  iselIntExpr_AMode2     ( ISelEnv* env, IRExpr* e );
210
211static ARMAModeV*  iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
212static ARMAModeV*  iselIntExpr_AModeV     ( ISelEnv* env, IRExpr* e );
213
214static ARMAModeN*  iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
215static ARMAModeN*  iselIntExpr_AModeN     ( ISelEnv* env, IRExpr* e );
216
217static ARMRI84*    iselIntExpr_RI84_wrk
218        ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
219static ARMRI84*    iselIntExpr_RI84
220        ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
221
222static ARMRI5*     iselIntExpr_RI5_wrk    ( ISelEnv* env, IRExpr* e );
223static ARMRI5*     iselIntExpr_RI5        ( ISelEnv* env, IRExpr* e );
224
225static ARMCondCode iselCondCode_wrk       ( ISelEnv* env, IRExpr* e );
226static ARMCondCode iselCondCode           ( ISelEnv* env, IRExpr* e );
227
228static HReg        iselIntExpr_R_wrk      ( ISelEnv* env, IRExpr* e );
229static HReg        iselIntExpr_R          ( ISelEnv* env, IRExpr* e );
230
231static void        iselInt64Expr_wrk      ( HReg* rHi, HReg* rLo,
232                                            ISelEnv* env, IRExpr* e );
233static void        iselInt64Expr          ( HReg* rHi, HReg* rLo,
234                                            ISelEnv* env, IRExpr* e );
235
236static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
237static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
238
239static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
240static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
241
242static HReg        iselNeon64Expr_wrk     ( ISelEnv* env, IRExpr* e );
243static HReg        iselNeon64Expr         ( ISelEnv* env, IRExpr* e );
244
245static HReg        iselNeonExpr_wrk       ( ISelEnv* env, IRExpr* e );
246static HReg        iselNeonExpr           ( ISelEnv* env, IRExpr* e );
247
248/*---------------------------------------------------------*/
249/*--- ISEL: Misc helpers                                ---*/
250/*---------------------------------------------------------*/
251
252static UInt ROR32 ( UInt x, UInt sh ) {
253   vassert(sh >= 0 && sh < 32);
254   if (sh == 0)
255      return x;
256   else
257      return (x << (32-sh)) | (x >> sh);
258}
259
260/* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
261   form, and if so return the components. */
262static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
263{
264   UInt i;
265   for (i = 0; i < 16; i++) {
266      if (0 == (u & 0xFFFFFF00)) {
267         *u8 = u;
268         *u4 = i;
269         return True;
270      }
271      u = ROR32(u, 30);
272   }
273   vassert(i == 16);
274   return False;
275}
276
277/* Make a int reg-reg move. */
278static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
279{
280   vassert(hregClass(src) == HRcInt32);
281   vassert(hregClass(dst) == HRcInt32);
282   return ARMInstr_Mov(dst, ARMRI84_R(src));
283}
284
285/* Set the VFP unit's rounding mode to default (round to nearest). */
286static void set_VFP_rounding_default ( ISelEnv* env )
287{
288   /* mov rTmp, #DEFAULT_FPSCR
289      fmxr fpscr, rTmp
290   */
291   HReg rTmp = newVRegI(env);
292   addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
293   addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
294}
295
296/* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
297   expression denoting a value in the range 0 .. 3, indicating a round
298   mode encoded as per type IRRoundingMode.  Set FPSCR to have the
299   same rounding.
300*/
301static
302void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
303{
304   /* This isn't simple, because 'mode' carries an IR rounding
305      encoding, and we need to translate that to an ARMvfp one:
306      The IR encoding:
307         00  to nearest (the default)
308         10  to +infinity
309         01  to -infinity
310         11  to zero
311      The ARMvfp encoding:
312         00  to nearest
313         01  to +infinity
314         10  to -infinity
315         11  to zero
316      Easy enough to do; just swap the two bits.
317   */
318   HReg irrm = iselIntExpr_R(env, mode);
319   HReg tL   = newVRegI(env);
320   HReg tR   = newVRegI(env);
321   HReg t3   = newVRegI(env);
322   /* tL = irrm << 1;
323      tR = irrm >> 1;  if we're lucky, these will issue together
324      tL &= 2;
325      tR &= 1;         ditto
326      t3 = tL | tR;
327      t3 <<= 22;
328      fmxr fpscr, t3
329   */
330   addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
331   addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
332   addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
333   addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
334   addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
335   addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
336   addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
337}
338
339
340/*---------------------------------------------------------*/
341/*--- ISEL: Function call helpers                       ---*/
342/*---------------------------------------------------------*/
343
344/* Used only in doHelperCall.  See big comment in doHelperCall re
345   handling of register-parameter args.  This function figures out
346   whether evaluation of an expression might require use of a fixed
347   register.  If in doubt return True (safe but suboptimal).
348*/
349static
350Bool mightRequireFixedRegs ( IRExpr* e )
351{
352   switch (e->tag) {
353   case Iex_RdTmp: case Iex_Const: case Iex_Get:
354      return False;
355   default:
356      return True;
357   }
358}
359
360
361/* Do a complete function call.  guard is a Ity_Bit expression
362   indicating whether or not the call happens.  If guard==NULL, the
363   call is unconditional.  Returns True iff it managed to handle this
364   combination of arg/return types, else returns False. */
365
366static
367Bool doHelperCall ( ISelEnv* env,
368                    Bool passBBP,
369                    IRExpr* guard, IRCallee* cee, IRExpr** args )
370{
371   ARMCondCode cc;
372   HReg        argregs[ARM_N_ARGREGS];
373   HReg        tmpregs[ARM_N_ARGREGS];
374   Bool        go_fast;
375   Int         n_args, i, nextArgReg;
376   ULong       target;
377
378   vassert(ARM_N_ARGREGS == 4);
379
380   /* Marshal args for a call and do the call.
381
382      If passBBP is True, r8 (the baseblock pointer) is to be passed
383      as the first arg.
384
385      This function only deals with a tiny set of possibilities, which
386      cover all helpers in practice.  The restrictions are that only
387      arguments in registers are supported, hence only ARM_N_REGPARMS
388      x 32 integer bits in total can be passed.  In fact the only
389      supported arg types are I32 and I64.
390
391      Generating code which is both efficient and correct when
392      parameters are to be passed in registers is difficult, for the
393      reasons elaborated in detail in comments attached to
394      doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
395      of the method described in those comments.
396
397      The problem is split into two cases: the fast scheme and the
398      slow scheme.  In the fast scheme, arguments are computed
399      directly into the target (real) registers.  This is only safe
400      when we can be sure that computation of each argument will not
401      trash any real registers set by computation of any other
402      argument.
403
404      In the slow scheme, all args are first computed into vregs, and
405      once they are all done, they are moved to the relevant real
406      regs.  This always gives correct code, but it also gives a bunch
407      of vreg-to-rreg moves which are usually redundant but are hard
408      for the register allocator to get rid of.
409
410      To decide which scheme to use, all argument expressions are
411      first examined.  If they are all so simple that it is clear they
412      will be evaluated without use of any fixed registers, use the
413      fast scheme, else use the slow scheme.  Note also that only
414      unconditional calls may use the fast scheme, since having to
415      compute a condition expression could itself trash real
416      registers.
417
418      Note this requires being able to examine an expression and
419      determine whether or not evaluation of it might use a fixed
420      register.  That requires knowledge of how the rest of this insn
421      selector works.  Currently just the following 3 are regarded as
422      safe -- hopefully they cover the majority of arguments in
423      practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
424   */
425
426   /* Note that the cee->regparms field is meaningless on ARM hosts
427      (since there is only one calling convention) and so we always
428      ignore it. */
429
430   n_args = 0;
431   for (i = 0; args[i]; i++)
432      n_args++;
433
434   argregs[0] = hregARM_R0();
435   argregs[1] = hregARM_R1();
436   argregs[2] = hregARM_R2();
437   argregs[3] = hregARM_R3();
438
439   tmpregs[0] = tmpregs[1] = tmpregs[2] =
440   tmpregs[3] = INVALID_HREG;
441
442   /* First decide which scheme (slow or fast) is to be used.  First
443      assume the fast scheme, and select slow if any contraindications
444      (wow) appear. */
445
446   go_fast = True;
447
448   if (guard) {
449      if (guard->tag == Iex_Const
450          && guard->Iex.Const.con->tag == Ico_U1
451          && guard->Iex.Const.con->Ico.U1 == True) {
452         /* unconditional */
453      } else {
454         /* Not manifestly unconditional -- be conservative. */
455         go_fast = False;
456      }
457   }
458
459   if (go_fast) {
460      for (i = 0; i < n_args; i++) {
461         if (mightRequireFixedRegs(args[i])) {
462            go_fast = False;
463            break;
464         }
465      }
466   }
467   /* At this point the scheme to use has been established.  Generate
468      code to get the arg values into the argument rregs.  If we run
469      out of arg regs, give up. */
470
471   if (go_fast) {
472
473      /* FAST SCHEME */
474      nextArgReg = 0;
475      if (passBBP) {
476         addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
477                                     hregARM_R8() ));
478         nextArgReg++;
479      }
480
481      for (i = 0; i < n_args; i++) {
482         IRType aTy = typeOfIRExpr(env->type_env, args[i]);
483         if (nextArgReg >= ARM_N_ARGREGS)
484            return False; /* out of argregs */
485         if (aTy == Ity_I32) {
486            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
487                                        iselIntExpr_R(env, args[i]) ));
488            nextArgReg++;
489         }
490         else if (aTy == Ity_I64) {
491            /* 64-bit args must be passed in an a reg-pair of the form
492               n:n+1, where n is even.  Hence either r0:r1 or r2:r3.
493               On a little-endian host, the less significant word is
494               passed in the lower-numbered register. */
495            if (nextArgReg & 1) {
496               if (nextArgReg >= ARM_N_ARGREGS)
497                  return False; /* out of argregs */
498               addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
499               nextArgReg++;
500            }
501            if (nextArgReg >= ARM_N_ARGREGS)
502               return False; /* out of argregs */
503            HReg raHi, raLo;
504            iselInt64Expr(&raHi, &raLo, env, args[i]);
505            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
506            nextArgReg++;
507            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
508            nextArgReg++;
509         }
510         else
511            return False; /* unhandled arg type */
512      }
513
514      /* Fast scheme only applies for unconditional calls.  Hence: */
515      cc = ARMcc_AL;
516
517   } else {
518
519      /* SLOW SCHEME; move via temporaries */
520      nextArgReg = 0;
521
522      if (passBBP) {
523         /* This is pretty stupid; better to move directly to r0
524            after the rest of the args are done. */
525         tmpregs[nextArgReg] = newVRegI(env);
526         addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
527                                     hregARM_R8() ));
528         nextArgReg++;
529      }
530
531      for (i = 0; i < n_args; i++) {
532         IRType aTy = typeOfIRExpr(env->type_env, args[i]);
533         if (nextArgReg >= ARM_N_ARGREGS)
534            return False; /* out of argregs */
535         if (aTy == Ity_I32) {
536            tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
537            nextArgReg++;
538         }
539         else if (aTy == Ity_I64) {
540            /* Same comment applies as in the Fast-scheme case. */
541            if (nextArgReg & 1)
542               nextArgReg++;
543            if (nextArgReg + 1 >= ARM_N_ARGREGS)
544               return False; /* out of argregs */
545            HReg raHi, raLo;
546            iselInt64Expr(&raHi, &raLo, env, args[i]);
547            tmpregs[nextArgReg] = raLo;
548            nextArgReg++;
549            tmpregs[nextArgReg] = raHi;
550            nextArgReg++;
551         }
552      }
553
554      /* Now we can compute the condition.  We can't do it earlier
555         because the argument computations could trash the condition
556         codes.  Be a bit clever to handle the common case where the
557         guard is 1:Bit. */
558      cc = ARMcc_AL;
559      if (guard) {
560         if (guard->tag == Iex_Const
561             && guard->Iex.Const.con->tag == Ico_U1
562             && guard->Iex.Const.con->Ico.U1 == True) {
563            /* unconditional -- do nothing */
564         } else {
565            cc = iselCondCode( env, guard );
566         }
567      }
568
569      /* Move the args to their final destinations. */
570      for (i = 0; i < nextArgReg; i++) {
571         if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
572            addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
573            continue;
574         }
575         /* None of these insns, including any spill code that might
576            be generated, may alter the condition codes. */
577         addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
578      }
579
580   }
581
582   /* Should be assured by checks above */
583   vassert(nextArgReg <= ARM_N_ARGREGS);
584
585   target = (HWord)Ptr_to_ULong(cee->addr);
586
587   /* nextArgReg doles out argument registers.  Since these are
588      assigned in the order r0, r1, r2, r3, its numeric value at this
589      point, which must be between 0 and 4 inclusive, is going to be
590      equal to the number of arg regs in use for the call.  Hence bake
591      that number into the call (we'll need to know it when doing
592      register allocation, to know what regs the call reads.)
593
594      There is a bit of a twist -- harmless but worth recording.
595      Suppose the arg types are (Ity_I32, Ity_I64).  Then we will have
596      the first arg in r0 and the second in r3:r2, but r1 isn't used.
597      We nevertheless have nextArgReg==4 and bake that into the call
598      instruction.  This will mean the register allocator wil believe
599      this insn reads r1 when in fact it doesn't.  But that's
600      harmless; it just artificially extends the live range of r1
601      unnecessarily.  The best fix would be to put into the
602      instruction, a bitmask indicating which of r0/1/2/3 carry live
603      values.  But that's too much hassle. */
604
605   /* Finally, the call itself. */
606   addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
607
608   return True; /* success */
609}
610
611
612/*---------------------------------------------------------*/
613/*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
614/*---------------------------------------------------------*/
615
616/* Select insns for an integer-typed expression, and add them to the
617   code list.  Return a reg holding the result.  This reg will be a
618   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
619   want to modify it, ask for a new vreg, copy it in there, and modify
620   the copy.  The register allocator will do its best to map both
621   vregs to the same real register, so the copies will often disappear
622   later in the game.
623
624   This should handle expressions of 32, 16 and 8-bit type.  All
625   results are returned in a 32-bit register.  For 16- and 8-bit
626   expressions, the upper 16/24 bits are arbitrary, so you should mask
627   or sign extend partial values if necessary.
628*/
629
630/* --------------------- AMode1 --------------------- */
631
632/* Return an AMode1 which computes the value of the specified
633   expression, possibly also adding insns to the code list as a
634   result.  The expression may only be a 32-bit one.
635*/
636
637static Bool sane_AMode1 ( ARMAMode1* am )
638{
639   switch (am->tag) {
640      case ARMam1_RI:
641         return
642            toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
643                    && (hregIsVirtual(am->ARMam1.RI.reg)
644                        || am->ARMam1.RI.reg == hregARM_R8())
645                    && am->ARMam1.RI.simm13 >= -4095
646                    && am->ARMam1.RI.simm13 <= 4095 );
647      case ARMam1_RRS:
648         return
649            toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
650                    && hregIsVirtual(am->ARMam1.RRS.base)
651                    && hregClass(am->ARMam1.RRS.index) == HRcInt32
652                    && hregIsVirtual(am->ARMam1.RRS.index)
653                    && am->ARMam1.RRS.shift >= 0
654                    && am->ARMam1.RRS.shift <= 3 );
655      default:
656         vpanic("sane_AMode: unknown ARM AMode1 tag");
657   }
658}
659
660static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
661{
662   ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
663   vassert(sane_AMode1(am));
664   return am;
665}
666
667static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
668{
669   IRType ty = typeOfIRExpr(env->type_env,e);
670   vassert(ty == Ity_I32);
671
672   /* FIXME: add RRS matching */
673
674   /* {Add32,Sub32}(expr,simm13) */
675   if (e->tag == Iex_Binop
676       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
677       && e->Iex.Binop.arg2->tag == Iex_Const
678       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
679      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
680      if (simm >= -4095 && simm <= 4095) {
681         HReg reg;
682         if (e->Iex.Binop.op == Iop_Sub32)
683            simm = -simm;
684         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
685         return ARMAMode1_RI(reg, simm);
686      }
687   }
688
689   /* Doesn't match anything in particular.  Generate it into
690      a register and use that. */
691   {
692      HReg reg = iselIntExpr_R(env, e);
693      return ARMAMode1_RI(reg, 0);
694   }
695
696}
697
698
699/* --------------------- AMode2 --------------------- */
700
701/* Return an AMode2 which computes the value of the specified
702   expression, possibly also adding insns to the code list as a
703   result.  The expression may only be a 32-bit one.
704*/
705
706static Bool sane_AMode2 ( ARMAMode2* am )
707{
708   switch (am->tag) {
709      case ARMam2_RI:
710         return
711            toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
712                    && hregIsVirtual(am->ARMam2.RI.reg)
713                    && am->ARMam2.RI.simm9 >= -255
714                    && am->ARMam2.RI.simm9 <= 255 );
715      case ARMam2_RR:
716         return
717            toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
718                    && hregIsVirtual(am->ARMam2.RR.base)
719                    && hregClass(am->ARMam2.RR.index) == HRcInt32
720                    && hregIsVirtual(am->ARMam2.RR.index) );
721      default:
722         vpanic("sane_AMode: unknown ARM AMode2 tag");
723   }
724}
725
726static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
727{
728   ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
729   vassert(sane_AMode2(am));
730   return am;
731}
732
733static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
734{
735   IRType ty = typeOfIRExpr(env->type_env,e);
736   vassert(ty == Ity_I32);
737
738   /* FIXME: add RR matching */
739
740   /* {Add32,Sub32}(expr,simm8) */
741   if (e->tag == Iex_Binop
742       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
743       && e->Iex.Binop.arg2->tag == Iex_Const
744       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
745      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
746      if (simm >= -255 && simm <= 255) {
747         HReg reg;
748         if (e->Iex.Binop.op == Iop_Sub32)
749            simm = -simm;
750         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
751         return ARMAMode2_RI(reg, simm);
752      }
753   }
754
755   /* Doesn't match anything in particular.  Generate it into
756      a register and use that. */
757   {
758      HReg reg = iselIntExpr_R(env, e);
759      return ARMAMode2_RI(reg, 0);
760   }
761
762}
763
764
765/* --------------------- AModeV --------------------- */
766
767/* Return an AModeV which computes the value of the specified
768   expression, possibly also adding insns to the code list as a
769   result.  The expression may only be a 32-bit one.
770*/
771
772static Bool sane_AModeV ( ARMAModeV* am )
773{
774  return toBool( hregClass(am->reg) == HRcInt32
775                 && hregIsVirtual(am->reg)
776                 && am->simm11 >= -1020 && am->simm11 <= 1020
777                 && 0 == (am->simm11 & 3) );
778}
779
780static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
781{
782   ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
783   vassert(sane_AModeV(am));
784   return am;
785}
786
787static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
788{
789   IRType ty = typeOfIRExpr(env->type_env,e);
790   vassert(ty == Ity_I32);
791
792   /* {Add32,Sub32}(expr, simm8 << 2) */
793   if (e->tag == Iex_Binop
794       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
795       && e->Iex.Binop.arg2->tag == Iex_Const
796       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
797      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
798      if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
799         HReg reg;
800         if (e->Iex.Binop.op == Iop_Sub32)
801            simm = -simm;
802         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
803         return mkARMAModeV(reg, simm);
804      }
805   }
806
807   /* Doesn't match anything in particular.  Generate it into
808      a register and use that. */
809   {
810      HReg reg = iselIntExpr_R(env, e);
811      return mkARMAModeV(reg, 0);
812   }
813
814}
815
816/* -------------------- AModeN -------------------- */
817
818static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
819{
820   return iselIntExpr_AModeN_wrk(env, e);
821}
822
823static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
824{
825   HReg reg = iselIntExpr_R(env, e);
826   return mkARMAModeN_R(reg);
827}
828
829
830/* --------------------- RI84 --------------------- */
831
832/* Select instructions to generate 'e' into a RI84.  If mayInv is
833   true, then the caller will also accept an I84 form that denotes
834   'not e'.  In this case didInv may not be NULL, and *didInv is set
835   to True.  This complication is so as to allow generation of an RI84
836   which is suitable for use in either an AND or BIC instruction,
837   without knowing (before this call) which one.
838*/
839static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
840                                   ISelEnv* env, IRExpr* e )
841{
842   ARMRI84* ri;
843   if (mayInv)
844      vassert(didInv != NULL);
845   ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
846   /* sanity checks ... */
847   switch (ri->tag) {
848      case ARMri84_I84:
849         return ri;
850      case ARMri84_R:
851         vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
852         vassert(hregIsVirtual(ri->ARMri84.R.reg));
853         return ri;
854      default:
855         vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
856   }
857}
858
859/* DO NOT CALL THIS DIRECTLY ! */
860static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
861                                       ISelEnv* env, IRExpr* e )
862{
863   IRType ty = typeOfIRExpr(env->type_env,e);
864   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
865
866   if (didInv) *didInv = False;
867
868   /* special case: immediate */
869   if (e->tag == Iex_Const) {
870      UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
871      switch (e->Iex.Const.con->tag) {
872         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
873         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
874         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
875         default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
876      }
877      if (fitsIn8x4(&u8, &u4, u)) {
878         return ARMRI84_I84( (UShort)u8, (UShort)u4 );
879      }
880      if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
881         vassert(didInv);
882         *didInv = True;
883         return ARMRI84_I84( (UShort)u8, (UShort)u4 );
884      }
885      /* else fail, fall through to default case */
886   }
887
888   /* default case: calculate into a register and return that */
889   {
890      HReg r = iselIntExpr_R ( env, e );
891      return ARMRI84_R(r);
892   }
893}
894
895
896/* --------------------- RI5 --------------------- */
897
898/* Select instructions to generate 'e' into a RI5. */
899
900static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
901{
902   ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
903   /* sanity checks ... */
904   switch (ri->tag) {
905      case ARMri5_I5:
906         return ri;
907      case ARMri5_R:
908         vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
909         vassert(hregIsVirtual(ri->ARMri5.R.reg));
910         return ri;
911      default:
912         vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
913   }
914}
915
916/* DO NOT CALL THIS DIRECTLY ! */
917static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
918{
919   IRType ty = typeOfIRExpr(env->type_env,e);
920   vassert(ty == Ity_I32 || ty == Ity_I8);
921
922   /* special case: immediate */
923   if (e->tag == Iex_Const) {
924      UInt u; /* both invalid */
925      switch (e->Iex.Const.con->tag) {
926         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
927         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
928         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
929         default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
930      }
931      if (u >= 1 && u <= 31) {
932         return ARMRI5_I5(u);
933      }
934      /* else fail, fall through to default case */
935   }
936
937   /* default case: calculate into a register and return that */
938   {
939      HReg r = iselIntExpr_R ( env, e );
940      return ARMRI5_R(r);
941   }
942}
943
944
945/* ------------------- CondCode ------------------- */
946
947/* Generate code to evaluated a bit-typed expression, returning the
948   condition code which would correspond when the expression would
949   notionally have returned 1. */
950
951static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
952{
953   ARMCondCode cc = iselCondCode_wrk(env,e);
954   vassert(cc != ARMcc_NV);
955   return cc;
956}
957
958static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
959{
960   vassert(e);
961   vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
962
963   /* var */
964   if (e->tag == Iex_RdTmp) {
965      HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
966      /* CmpOrTst doesn't modify rTmp; so this is OK. */
967      ARMRI84* one  = ARMRI84_I84(1,0);
968      addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
969      return ARMcc_NE;
970   }
971
972   /* Not1(e) */
973   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
974      /* Generate code for the arg, and negate the test condition */
975      return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
976   }
977
978   /* --- patterns rooted at: 32to1 --- */
979
980   if (e->tag == Iex_Unop
981       && e->Iex.Unop.op == Iop_32to1) {
982      HReg     rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
983      ARMRI84* one  = ARMRI84_I84(1,0);
984      addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
985      return ARMcc_NE;
986   }
987
988   /* --- patterns rooted at: CmpNEZ8 --- */
989
990   if (e->tag == Iex_Unop
991       && e->Iex.Unop.op == Iop_CmpNEZ8) {
992      HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
993      ARMRI84* xFF  = ARMRI84_I84(0xFF,0);
994      addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
995      return ARMcc_NE;
996   }
997
998   /* --- patterns rooted at: CmpNEZ32 --- */
999
1000   if (e->tag == Iex_Unop
1001       && e->Iex.Unop.op == Iop_CmpNEZ32) {
1002      HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1003      ARMRI84* zero = ARMRI84_I84(0,0);
1004      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1005      return ARMcc_NE;
1006   }
1007
1008   /* --- patterns rooted at: CmpNEZ64 --- */
1009
1010   if (e->tag == Iex_Unop
1011       && e->Iex.Unop.op == Iop_CmpNEZ64) {
1012      HReg     tHi, tLo;
1013      HReg     tmp  = newVRegI(env);
1014      ARMRI84* zero = ARMRI84_I84(0,0);
1015      iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1016      addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1017      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1018      return ARMcc_NE;
1019   }
1020
1021   /* --- Cmp*32*(x,y) --- */
1022   if (e->tag == Iex_Binop
1023       && (e->Iex.Binop.op == Iop_CmpEQ32
1024           || e->Iex.Binop.op == Iop_CmpNE32
1025           || e->Iex.Binop.op == Iop_CmpLT32S
1026           || e->Iex.Binop.op == Iop_CmpLT32U
1027           || e->Iex.Binop.op == Iop_CmpLE32S
1028           || e->Iex.Binop.op == Iop_CmpLE32U)) {
1029      HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1030      ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1031                                       env, e->Iex.Binop.arg2);
1032      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1033      switch (e->Iex.Binop.op) {
1034         case Iop_CmpEQ32:  return ARMcc_EQ;
1035         case Iop_CmpNE32:  return ARMcc_NE;
1036         case Iop_CmpLT32S: return ARMcc_LT;
1037         case Iop_CmpLT32U: return ARMcc_LO;
1038         case Iop_CmpLE32S: return ARMcc_LE;
1039         case Iop_CmpLE32U: return ARMcc_LS;
1040         default: vpanic("iselCondCode(arm): CmpXX32");
1041      }
1042   }
1043
1044   /* --- CasCmpEQ* --- */
1045   /* Ist_Cas has a dummy argument to compare with, so comparison is
1046      always true. */
1047   if (e->tag == Iex_Binop
1048       && (e->Iex.Binop.op == Iop_CasCmpEQ32
1049           || e->Iex.Binop.op == Iop_CasCmpEQ16
1050           || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1051      return ARMcc_AL;
1052   }
1053
1054   ppIRExpr(e);
1055   vpanic("iselCondCode");
1056}
1057
1058
1059/* --------------------- Reg --------------------- */
1060
1061static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1062{
1063   HReg r = iselIntExpr_R_wrk(env, e);
1064   /* sanity checks ... */
1065#  if 0
1066   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1067#  endif
1068   vassert(hregClass(r) == HRcInt32);
1069   vassert(hregIsVirtual(r));
1070   return r;
1071}
1072
1073/* DO NOT CALL THIS DIRECTLY ! */
1074static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1075{
1076   IRType ty = typeOfIRExpr(env->type_env,e);
1077   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1078//   vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1079
1080   switch (e->tag) {
1081
1082   /* --------- TEMP --------- */
1083   case Iex_RdTmp: {
1084      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1085   }
1086
1087   /* --------- LOAD --------- */
1088   case Iex_Load: {
1089      HReg dst  = newVRegI(env);
1090
1091      if (e->Iex.Load.end != Iend_LE)
1092         goto irreducible;
1093
1094      if (ty == Ity_I32) {
1095         ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1096         addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
1097         return dst;
1098      }
1099      if (ty == Ity_I16) {
1100         ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1101         addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
1102                                       dst, amode));
1103         return dst;
1104      }
1105      if (ty == Ity_I8) {
1106         ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1107         addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
1108         return dst;
1109      }
1110
1111//zz      if (ty == Ity_I16) {
1112//zz         addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1113//zz         return dst;
1114//zz      }
1115//zz      if (ty == Ity_I8) {
1116//zz         addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1117//zz         return dst;
1118//zz      }
1119      break;
1120   }
1121
1122//zz   /* --------- TERNARY OP --------- */
1123//zz   case Iex_Triop: {
1124//zz      /* C3210 flags following FPU partial remainder (fprem), both
1125//zz         IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1126//zz      if (e->Iex.Triop.op == Iop_PRemC3210F64
1127//zz          || e->Iex.Triop.op == Iop_PRem1C3210F64) {
1128//zz         HReg junk = newVRegF(env);
1129//zz         HReg dst  = newVRegI(env);
1130//zz         HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
1131//zz         HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
1132//zz         /* XXXROUNDINGFIXME */
1133//zz         /* set roundingmode here */
1134//zz         addInstr(env, X86Instr_FpBinary(
1135//zz                           e->Iex.Binop.op==Iop_PRemC3210F64
1136//zz                              ? Xfp_PREM : Xfp_PREM1,
1137//zz                           srcL,srcR,junk
1138//zz                 ));
1139//zz         /* The previous pseudo-insn will have left the FPU's C3210
1140//zz            flags set correctly.  So bag them. */
1141//zz         addInstr(env, X86Instr_FpStSW_AX());
1142//zz         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1143//zz         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1144//zz         return dst;
1145//zz      }
1146//zz
1147//zz      break;
1148//zz   }
1149
1150   /* --------- BINARY OP --------- */
1151   case Iex_Binop: {
1152
1153      ARMAluOp   aop = 0; /* invalid */
1154      ARMShiftOp sop = 0; /* invalid */
1155
1156      /* ADD/SUB/AND/OR/XOR */
1157      switch (e->Iex.Binop.op) {
1158         case Iop_And32: {
1159            Bool     didInv = False;
1160            HReg     dst    = newVRegI(env);
1161            HReg     argL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1162            ARMRI84* argR   = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1163                                               env, e->Iex.Binop.arg2);
1164            addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1165                                       dst, argL, argR));
1166            return dst;
1167         }
1168         case Iop_Or32:  aop = ARMalu_OR;  goto std_binop;
1169         case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1170         case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1171         case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1172         std_binop: {
1173            HReg     dst  = newVRegI(env);
1174            HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1175            ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1176                                             env, e->Iex.Binop.arg2);
1177            addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1178            return dst;
1179         }
1180         default: break;
1181      }
1182
1183      /* SHL/SHR/SAR */
1184      switch (e->Iex.Binop.op) {
1185         case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1186         case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1187         case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1188         sh_binop: {
1189            HReg    dst  = newVRegI(env);
1190            HReg    argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1191            ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1192            addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1193            vassert(ty == Ity_I32); /* else the IR is ill-typed */
1194            return dst;
1195         }
1196         default: break;
1197      }
1198
1199      /* MUL */
1200      if (e->Iex.Binop.op == Iop_Mul32) {
1201         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1202         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1203         HReg dst  = newVRegI(env);
1204         addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1205         addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1206         addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1207         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1208         return dst;
1209      }
1210
1211      /* Handle misc other ops. */
1212
1213      if (e->Iex.Binop.op == Iop_Max32U) {
1214         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1215         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1216         HReg dst  = newVRegI(env);
1217         addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1218                                         ARMRI84_R(argR)));
1219         addInstr(env, mk_iMOVds_RR(dst, argL));
1220         addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1221         return dst;
1222      }
1223
1224      if (e->Iex.Binop.op == Iop_CmpF64) {
1225         HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1226         HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1227         HReg dst = newVRegI(env);
1228         /* Do the compare (FCMPD) and set NZCV in FPSCR.  Then also do
1229            FMSTAT, so we can examine the results directly. */
1230         addInstr(env, ARMInstr_VCmpD(dL, dR));
1231         /* Create in dst, the IRCmpF64Result encoded result. */
1232         addInstr(env, ARMInstr_Imm32(dst, 0));
1233         addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1234         addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1235         addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1236         addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1237         return dst;
1238      }
1239
1240      if (e->Iex.Binop.op == Iop_F64toI32S
1241          || e->Iex.Binop.op == Iop_F64toI32U) {
1242         /* Wretched uglyness all round, due to having to deal
1243            with rounding modes.  Oh well. */
1244         /* FIXME: if arg1 is a constant indicating round-to-zero,
1245            then we could skip all this arsing around with FPSCR and
1246            simply emit FTO{S,U}IZD. */
1247         Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1248         HReg valD  = iselDblExpr(env, e->Iex.Binop.arg2);
1249         set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1250         /* FTO{S,U}ID valF, valD */
1251         HReg valF = newVRegF(env);
1252         addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1253                                       valF, valD));
1254         set_VFP_rounding_default(env);
1255         /* VMOV dst, valF */
1256         HReg dst = newVRegI(env);
1257         addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1258         return dst;
1259      }
1260
1261      if (e->Iex.Binop.op == Iop_GetElem8x8
1262          || e->Iex.Binop.op == Iop_GetElem16x4
1263          || e->Iex.Binop.op == Iop_GetElem32x2) {
1264         HReg res = newVRegI(env);
1265         HReg arg = iselNeon64Expr(env, e->Iex.Triop.arg1);
1266         UInt index, size;
1267         if (e->Iex.Binop.arg2->tag != Iex_Const ||
1268             typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1269            vpanic("ARM target supports GetElem with constant "
1270                   "second argument only\n");
1271         }
1272         index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1273         switch (e->Iex.Binop.op) {
1274            case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1275            case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1276            case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1277            default: vassert(0);
1278         }
1279         addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1280                                        mkARMNRS(ARMNRS_Reg, res, 0),
1281                                        mkARMNRS(ARMNRS_Scalar, arg, index),
1282                                        size, False));
1283         return res;
1284      }
1285
1286      if (e->Iex.Binop.op == Iop_GetElem8x16
1287          || e->Iex.Binop.op == Iop_GetElem16x8
1288          || e->Iex.Binop.op == Iop_GetElem32x4) {
1289         HReg res = newVRegI(env);
1290         HReg arg = iselNeonExpr(env, e->Iex.Triop.arg1);
1291         UInt index, size;
1292         if (e->Iex.Binop.arg2->tag != Iex_Const ||
1293             typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1294            vpanic("ARM target supports GetElem with constant "
1295                   "second argument only\n");
1296         }
1297         index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1298         switch (e->Iex.Binop.op) {
1299            case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1300            case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1301            case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1302            default: vassert(0);
1303         }
1304         addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1305                                        mkARMNRS(ARMNRS_Reg, res, 0),
1306                                        mkARMNRS(ARMNRS_Scalar, arg, index),
1307                                        size, True));
1308         return res;
1309      }
1310
1311      /* All cases involving host-side helper calls. */
1312      void* fn = NULL;
1313      switch (e->Iex.Binop.op) {
1314         case Iop_Add16x2:
1315            fn = &h_generic_calc_Add16x2; break;
1316         case Iop_Sub16x2:
1317            fn = &h_generic_calc_Sub16x2; break;
1318         case Iop_HAdd16Ux2:
1319            fn = &h_generic_calc_HAdd16Ux2; break;
1320         case Iop_HAdd16Sx2:
1321            fn = &h_generic_calc_HAdd16Sx2; break;
1322         case Iop_HSub16Ux2:
1323            fn = &h_generic_calc_HSub16Ux2; break;
1324         case Iop_HSub16Sx2:
1325            fn = &h_generic_calc_HSub16Sx2; break;
1326         case Iop_QAdd16Sx2:
1327            fn = &h_generic_calc_QAdd16Sx2; break;
1328         case Iop_QSub16Sx2:
1329            fn = &h_generic_calc_QSub16Sx2; break;
1330         case Iop_Add8x4:
1331            fn = &h_generic_calc_Add8x4; break;
1332         case Iop_Sub8x4:
1333            fn = &h_generic_calc_Sub8x4; break;
1334         case Iop_HAdd8Ux4:
1335            fn = &h_generic_calc_HAdd8Ux4; break;
1336         case Iop_HAdd8Sx4:
1337            fn = &h_generic_calc_HAdd8Sx4; break;
1338         case Iop_HSub8Ux4:
1339            fn = &h_generic_calc_HSub8Ux4; break;
1340         case Iop_HSub8Sx4:
1341            fn = &h_generic_calc_HSub8Sx4; break;
1342         case Iop_QAdd8Sx4:
1343            fn = &h_generic_calc_QAdd8Sx4; break;
1344         case Iop_QAdd8Ux4:
1345            fn = &h_generic_calc_QAdd8Ux4; break;
1346         case Iop_QSub8Sx4:
1347            fn = &h_generic_calc_QSub8Sx4; break;
1348         case Iop_QSub8Ux4:
1349            fn = &h_generic_calc_QSub8Ux4; break;
1350         case Iop_Sad8Ux4:
1351            fn = &h_generic_calc_Sad8Ux4; break;
1352         default:
1353            break;
1354      }
1355
1356      if (fn) {
1357         HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1358         HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1359         HReg res  = newVRegI(env);
1360         addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1361         addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1362         addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
1363         addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1364         return res;
1365      }
1366
1367      break;
1368   }
1369
1370   /* --------- UNARY OP --------- */
1371   case Iex_Unop: {
1372
1373//zz      /* 1Uto8(32to1(expr32)) */
1374//zz      if (e->Iex.Unop.op == Iop_1Uto8) {
1375//zz         DECLARE_PATTERN(p_32to1_then_1Uto8);
1376//zz         DEFINE_PATTERN(p_32to1_then_1Uto8,
1377//zz                        unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1378//zz         if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1379//zz            IRExpr* expr32 = mi.bindee[0];
1380//zz            HReg dst = newVRegI(env);
1381//zz            HReg src = iselIntExpr_R(env, expr32);
1382//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1383//zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1384//zz                                          X86RMI_Imm(1), dst));
1385//zz            return dst;
1386//zz         }
1387//zz      }
1388//zz
1389//zz      /* 8Uto32(LDle(expr32)) */
1390//zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1391//zz         DECLARE_PATTERN(p_LDle8_then_8Uto32);
1392//zz         DEFINE_PATTERN(p_LDle8_then_8Uto32,
1393//zz                        unop(Iop_8Uto32,
1394//zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1395//zz         if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1396//zz            HReg dst = newVRegI(env);
1397//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1398//zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1399//zz            return dst;
1400//zz         }
1401//zz      }
1402//zz
1403//zz      /* 8Sto32(LDle(expr32)) */
1404//zz      if (e->Iex.Unop.op == Iop_8Sto32) {
1405//zz         DECLARE_PATTERN(p_LDle8_then_8Sto32);
1406//zz         DEFINE_PATTERN(p_LDle8_then_8Sto32,
1407//zz                        unop(Iop_8Sto32,
1408//zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1409//zz         if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1410//zz            HReg dst = newVRegI(env);
1411//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1412//zz            addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1413//zz            return dst;
1414//zz         }
1415//zz      }
1416//zz
1417//zz      /* 16Uto32(LDle(expr32)) */
1418//zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1419//zz         DECLARE_PATTERN(p_LDle16_then_16Uto32);
1420//zz         DEFINE_PATTERN(p_LDle16_then_16Uto32,
1421//zz                        unop(Iop_16Uto32,
1422//zz                             IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1423//zz         if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1424//zz            HReg dst = newVRegI(env);
1425//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1426//zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1427//zz            return dst;
1428//zz         }
1429//zz      }
1430//zz
1431//zz      /* 8Uto32(GET:I8) */
1432//zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1433//zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1434//zz            HReg      dst;
1435//zz            X86AMode* amode;
1436//zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1437//zz            dst = newVRegI(env);
1438//zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1439//zz                                hregX86_EBP());
1440//zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1441//zz            return dst;
1442//zz         }
1443//zz      }
1444//zz
1445//zz      /* 16to32(GET:I16) */
1446//zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1447//zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1448//zz            HReg      dst;
1449//zz            X86AMode* amode;
1450//zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1451//zz            dst = newVRegI(env);
1452//zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1453//zz                                hregX86_EBP());
1454//zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1455//zz            return dst;
1456//zz         }
1457//zz      }
1458
1459      switch (e->Iex.Unop.op) {
1460         case Iop_8Uto32: {
1461            HReg dst = newVRegI(env);
1462            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1463            addInstr(env, ARMInstr_Alu(ARMalu_AND,
1464                                       dst, src, ARMRI84_I84(0xFF,0)));
1465            return dst;
1466         }
1467//zz         case Iop_8Uto16:
1468//zz         case Iop_8Uto32:
1469//zz         case Iop_16Uto32: {
1470//zz            HReg dst = newVRegI(env);
1471//zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1472//zz            UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1473//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1474//zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1475//zz                                          X86RMI_Imm(mask), dst));
1476//zz            return dst;
1477//zz         }
1478//zz         case Iop_8Sto16:
1479//zz         case Iop_8Sto32:
1480         case Iop_16Uto32: {
1481            HReg dst = newVRegI(env);
1482            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1483            ARMRI5* amt = ARMRI5_I5(16);
1484            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1485            addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1486            return dst;
1487         }
1488         case Iop_8Sto32:
1489         case Iop_16Sto32: {
1490            HReg dst = newVRegI(env);
1491            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1492            ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1493            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1494            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1495            return dst;
1496         }
1497//zz         case Iop_Not8:
1498//zz         case Iop_Not16:
1499         case Iop_Not32: {
1500            HReg dst = newVRegI(env);
1501            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1502            addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1503            return dst;
1504         }
1505         case Iop_64HIto32: {
1506            HReg rHi, rLo;
1507            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1508            return rHi; /* and abandon rLo .. poor wee thing :-) */
1509         }
1510         case Iop_64to32: {
1511            HReg rHi, rLo;
1512            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1513            return rLo; /* similar stupid comment to the above ... */
1514         }
1515         case Iop_64to8: {
1516            HReg rHi, rLo;
1517            if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
1518               HReg tHi = newVRegI(env);
1519               HReg tLo = newVRegI(env);
1520               HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1521               addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1522               rHi = tHi;
1523               rLo = tLo;
1524            } else {
1525               iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1526            }
1527            return rLo;
1528         }
1529//zz         case Iop_16HIto8:
1530//zz         case Iop_32HIto16: {
1531//zz            HReg dst  = newVRegI(env);
1532//zz            HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
1533//zz            Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1534//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1535//zz            addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1536//zz            return dst;
1537//zz         }
1538         case Iop_1Uto32:
1539         case Iop_1Uto8: {
1540            HReg        dst  = newVRegI(env);
1541            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1542            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1543            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1544            return dst;
1545         }
1546
1547         case Iop_1Sto32: {
1548            HReg        dst  = newVRegI(env);
1549            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1550            ARMRI5*     amt  = ARMRI5_I5(31);
1551            /* This is really rough.  We could do much better here;
1552               perhaps mvn{cond} dst, #0 as the second insn?
1553               (same applies to 1Sto64) */
1554            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1555            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1556            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1557            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1558            return dst;
1559         }
1560
1561
1562//zz         case Iop_1Sto8:
1563//zz         case Iop_1Sto16:
1564//zz         case Iop_1Sto32: {
1565//zz            /* could do better than this, but for now ... */
1566//zz            HReg dst         = newVRegI(env);
1567//zz            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1568//zz            addInstr(env, X86Instr_Set32(cond,dst));
1569//zz            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1570//zz            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1571//zz            return dst;
1572//zz         }
1573//zz         case Iop_Ctz32: {
1574//zz            /* Count trailing zeroes, implemented by x86 'bsfl' */
1575//zz            HReg dst = newVRegI(env);
1576//zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1577//zz            addInstr(env, X86Instr_Bsfr32(True,src,dst));
1578//zz            return dst;
1579//zz         }
1580         case Iop_Clz32: {
1581            /* Count leading zeroes; easy on ARM. */
1582            HReg dst = newVRegI(env);
1583            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1584            addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1585            return dst;
1586         }
1587
1588         case Iop_CmpwNEZ32: {
1589            HReg dst = newVRegI(env);
1590            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1591            addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1592            addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1593            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1594            return dst;
1595         }
1596
1597         case Iop_Left32: {
1598            HReg dst = newVRegI(env);
1599            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1600            addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1601            addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1602            return dst;
1603         }
1604
1605//zz         case Iop_V128to32: {
1606//zz            HReg      dst  = newVRegI(env);
1607//zz            HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1608//zz            X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1609//zz            sub_from_esp(env, 16);
1610//zz            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1611//zz            addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1612//zz            add_to_esp(env, 16);
1613//zz            return dst;
1614//zz         }
1615//zz
1616         case Iop_ReinterpF32asI32: {
1617            HReg dst = newVRegI(env);
1618            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1619            addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1620            return dst;
1621         }
1622
1623//zz
1624//zz         case Iop_16to8:
1625         case Iop_32to8:
1626         case Iop_32to16:
1627            /* These are no-ops. */
1628            return iselIntExpr_R(env, e->Iex.Unop.arg);
1629
1630         default:
1631            break;
1632      }
1633
1634      /* All Unop cases involving host-side helper calls. */
1635      void* fn = NULL;
1636      switch (e->Iex.Unop.op) {
1637         case Iop_CmpNEZ16x2:
1638            fn = &h_generic_calc_CmpNEZ16x2; break;
1639         case Iop_CmpNEZ8x4:
1640            fn = &h_generic_calc_CmpNEZ8x4; break;
1641         default:
1642            break;
1643      }
1644
1645      if (fn) {
1646         HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1647         HReg res = newVRegI(env);
1648         addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1649         addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
1650         addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1651         return res;
1652      }
1653
1654      break;
1655   }
1656
1657   /* --------- GET --------- */
1658   case Iex_Get: {
1659      if (ty == Ity_I32
1660          && 0 == (e->Iex.Get.offset & 3)
1661          && e->Iex.Get.offset < 4096-4) {
1662         HReg dst = newVRegI(env);
1663         addInstr(env, ARMInstr_LdSt32(
1664                          True/*isLoad*/,
1665                          dst,
1666                          ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1667         return dst;
1668      }
1669//zz      if (ty == Ity_I8 || ty == Ity_I16) {
1670//zz         HReg dst = newVRegI(env);
1671//zz         addInstr(env, X86Instr_LoadEX(
1672//zz                          toUChar(ty==Ity_I8 ? 1 : 2),
1673//zz                          False,
1674//zz                          X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1675//zz                          dst));
1676//zz         return dst;
1677//zz      }
1678      break;
1679   }
1680
1681//zz   case Iex_GetI: {
1682//zz      X86AMode* am
1683//zz         = genGuestArrayOffset(
1684//zz              env, e->Iex.GetI.descr,
1685//zz                   e->Iex.GetI.ix, e->Iex.GetI.bias );
1686//zz      HReg dst = newVRegI(env);
1687//zz      if (ty == Ity_I8) {
1688//zz         addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1689//zz         return dst;
1690//zz      }
1691//zz      if (ty == Ity_I32) {
1692//zz         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1693//zz         return dst;
1694//zz      }
1695//zz      break;
1696//zz   }
1697
1698   /* --------- CCALL --------- */
1699   case Iex_CCall: {
1700      HReg    dst = newVRegI(env);
1701      vassert(ty == e->Iex.CCall.retty);
1702
1703      /* be very restrictive for now.  Only 32/64-bit ints allowed
1704         for args, and 32 bits for return type. */
1705      if (e->Iex.CCall.retty != Ity_I32)
1706         goto irreducible;
1707
1708      /* Marshal args, do the call, clear stack. */
1709      Bool ok = doHelperCall( env, False,
1710                              NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1711      if (ok) {
1712         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1713         return dst;
1714      }
1715      /* else fall through; will hit the irreducible: label */
1716   }
1717
1718   /* --------- LITERAL --------- */
1719   /* 32 literals */
1720   case Iex_Const: {
1721      UInt u   = 0;
1722      HReg dst = newVRegI(env);
1723      switch (e->Iex.Const.con->tag) {
1724         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1725         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1726         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1727         default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
1728      }
1729      addInstr(env, ARMInstr_Imm32(dst, u));
1730      return dst;
1731   }
1732
1733   /* --------- MULTIPLEX --------- */
1734   case Iex_Mux0X: {
1735      IRExpr* cond = e->Iex.Mux0X.cond;
1736
1737      /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
1738      if (ty == Ity_I32
1739          && cond->tag == Iex_Unop
1740          && cond->Iex.Unop.op == Iop_32to8
1741          && cond->Iex.Unop.arg->tag == Iex_Unop
1742          && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
1743         ARMCondCode cc;
1744         HReg     rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1745         ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1746         HReg     dst = newVRegI(env);
1747         addInstr(env, mk_iMOVds_RR(dst, rX));
1748         cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
1749         addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1750         return dst;
1751      }
1752
1753      /* Mux0X(cond, expr0, exprX) (general case) */
1754      if (ty == Ity_I32) {
1755         HReg     r8;
1756         HReg     rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1757         ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1758         HReg     dst = newVRegI(env);
1759         addInstr(env, mk_iMOVds_RR(dst, rX));
1760         r8 = iselIntExpr_R(env, cond);
1761         addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
1762                                         ARMRI84_I84(0xFF,0)));
1763         addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
1764         return dst;
1765      }
1766      break;
1767   }
1768
1769   default:
1770   break;
1771   } /* switch (e->tag) */
1772
1773   /* We get here if no pattern matched. */
1774  irreducible:
1775   ppIRExpr(e);
1776   vpanic("iselIntExpr_R: cannot reduce tree");
1777}
1778
1779
1780/* -------------------- 64-bit -------------------- */
1781
1782/* Compute a 64-bit value into a register pair, which is returned as
1783   the first two parameters.  As with iselIntExpr_R, these may be
1784   either real or virtual regs; in any case they must not be changed
1785   by subsequent code emitted by the caller.  */
1786
1787static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1788{
1789   iselInt64Expr_wrk(rHi, rLo, env, e);
1790#  if 0
1791   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1792#  endif
1793   vassert(hregClass(*rHi) == HRcInt32);
1794   vassert(hregIsVirtual(*rHi));
1795   vassert(hregClass(*rLo) == HRcInt32);
1796   vassert(hregIsVirtual(*rLo));
1797}
1798
1799/* DO NOT CALL THIS DIRECTLY ! */
1800static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1801{
1802   vassert(e);
1803   vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1804
1805   /* 64-bit literal */
1806   if (e->tag == Iex_Const) {
1807      ULong   w64 = e->Iex.Const.con->Ico.U64;
1808      UInt    wHi = toUInt(w64 >> 32);
1809      UInt    wLo = toUInt(w64);
1810      HReg    tHi = newVRegI(env);
1811      HReg    tLo = newVRegI(env);
1812      vassert(e->Iex.Const.con->tag == Ico_U64);
1813      addInstr(env, ARMInstr_Imm32(tHi, wHi));
1814      addInstr(env, ARMInstr_Imm32(tLo, wLo));
1815      *rHi = tHi;
1816      *rLo = tLo;
1817      return;
1818   }
1819
1820   /* read 64-bit IRTemp */
1821   if (e->tag == Iex_RdTmp) {
1822      if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
1823         HReg tHi = newVRegI(env);
1824         HReg tLo = newVRegI(env);
1825         HReg tmp = iselNeon64Expr(env, e);
1826         addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1827         *rHi = tHi;
1828         *rLo = tLo;
1829      } else {
1830         lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1831      }
1832      return;
1833   }
1834
1835   /* 64-bit load */
1836   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1837      HReg      tLo, tHi, rA;
1838      vassert(e->Iex.Load.ty == Ity_I64);
1839      rA  = iselIntExpr_R(env, e->Iex.Load.addr);
1840      tHi = newVRegI(env);
1841      tLo = newVRegI(env);
1842      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
1843      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
1844      *rHi = tHi;
1845      *rLo = tLo;
1846      return;
1847   }
1848
1849   /* 64-bit GET */
1850   if (e->tag == Iex_Get) {
1851      ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1852      ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1853      HReg tHi = newVRegI(env);
1854      HReg tLo = newVRegI(env);
1855      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
1856      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
1857      *rHi = tHi;
1858      *rLo = tLo;
1859      return;
1860   }
1861
1862   /* --------- BINARY ops --------- */
1863   if (e->tag == Iex_Binop) {
1864      switch (e->Iex.Binop.op) {
1865
1866         /* 32 x 32 -> 64 multiply */
1867         case Iop_MullS32:
1868         case Iop_MullU32: {
1869            HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1870            HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1871            HReg     tHi  = newVRegI(env);
1872            HReg     tLo  = newVRegI(env);
1873            ARMMulOp mop  = e->Iex.Binop.op == Iop_MullS32
1874                               ? ARMmul_SX : ARMmul_ZX;
1875            addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1876            addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1877            addInstr(env, ARMInstr_Mul(mop));
1878            addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
1879            addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
1880            *rHi = tHi;
1881            *rLo = tLo;
1882            return;
1883         }
1884
1885         case Iop_Or64: {
1886            HReg xLo, xHi, yLo, yHi;
1887            HReg tHi = newVRegI(env);
1888            HReg tLo = newVRegI(env);
1889            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1890            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1891            addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
1892            addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
1893            *rHi = tHi;
1894            *rLo = tLo;
1895            return;
1896         }
1897
1898         case Iop_Add64: {
1899            HReg xLo, xHi, yLo, yHi;
1900            HReg tHi = newVRegI(env);
1901            HReg tLo = newVRegI(env);
1902            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1903            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1904            addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
1905            addInstr(env, ARMInstr_Alu(ARMalu_ADC,  tHi, xHi, ARMRI84_R(yHi)));
1906            *rHi = tHi;
1907            *rLo = tLo;
1908            return;
1909         }
1910
1911         /* 32HLto64(e1,e2) */
1912         case Iop_32HLto64: {
1913            *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
1914            *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
1915            return;
1916         }
1917
1918         default:
1919            break;
1920      }
1921   }
1922
1923   /* --------- UNARY ops --------- */
1924   if (e->tag == Iex_Unop) {
1925      switch (e->Iex.Unop.op) {
1926
1927         /* ReinterpF64asI64 */
1928         case Iop_ReinterpF64asI64: {
1929            HReg dstHi = newVRegI(env);
1930            HReg dstLo = newVRegI(env);
1931            HReg src   = iselDblExpr(env, e->Iex.Unop.arg);
1932            addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
1933            *rHi = dstHi;
1934            *rLo = dstLo;
1935            return;
1936         }
1937
1938         /* Left64(e) */
1939         case Iop_Left64: {
1940            HReg yLo, yHi;
1941            HReg tHi  = newVRegI(env);
1942            HReg tLo  = newVRegI(env);
1943            HReg zero = newVRegI(env);
1944            /* yHi:yLo = arg */
1945            iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
1946            /* zero = 0 */
1947            addInstr(env, ARMInstr_Imm32(zero, 0));
1948            /* tLo = 0 - yLo, and set carry */
1949            addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
1950                                       tLo, zero, ARMRI84_R(yLo)));
1951            /* tHi = 0 - yHi - carry */
1952            addInstr(env, ARMInstr_Alu(ARMalu_SBC,
1953                                       tHi, zero, ARMRI84_R(yHi)));
1954            /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
1955               back in, so as to give the final result
1956               tHi:tLo = arg | -arg. */
1957            addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
1958            addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
1959            *rHi = tHi;
1960            *rLo = tLo;
1961            return;
1962         }
1963
1964         /* CmpwNEZ64(e) */
1965         case Iop_CmpwNEZ64: {
1966            HReg srcLo, srcHi;
1967            HReg tmp1 = newVRegI(env);
1968            HReg tmp2 = newVRegI(env);
1969            /* srcHi:srcLo = arg */
1970            iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
1971            /* tmp1 = srcHi | srcLo */
1972            addInstr(env, ARMInstr_Alu(ARMalu_OR,
1973                                       tmp1, srcHi, ARMRI84_R(srcLo)));
1974            /* tmp2 = (tmp1 | -tmp1) >>s 31 */
1975            addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
1976            addInstr(env, ARMInstr_Alu(ARMalu_OR,
1977                                       tmp2, tmp2, ARMRI84_R(tmp1)));
1978            addInstr(env, ARMInstr_Shift(ARMsh_SAR,
1979                                         tmp2, tmp2, ARMRI5_I5(31)));
1980            *rHi = tmp2;
1981            *rLo = tmp2;
1982            return;
1983         }
1984
1985         case Iop_1Sto64: {
1986            HReg        dst  = newVRegI(env);
1987            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1988            ARMRI5*     amt  = ARMRI5_I5(31);
1989            /* This is really rough.  We could do much better here;
1990               perhaps mvn{cond} dst, #0 as the second insn?
1991               (same applies to 1Sto32) */
1992            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1993            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1994            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1995            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1996            *rHi = dst;
1997            *rLo = dst;
1998            return;
1999         }
2000
2001         default:
2002            break;
2003      }
2004   } /* if (e->tag == Iex_Unop) */
2005
2006   /* --------- MULTIPLEX --------- */
2007   if (e->tag == Iex_Mux0X) {
2008      IRType ty8;
2009      HReg   r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
2010      ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
2011      vassert(ty8 == Ity_I8);
2012      iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
2013      iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
2014      dstHi = newVRegI(env);
2015      dstLo = newVRegI(env);
2016      addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
2017      addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
2018      r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2019      addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
2020                                      ARMRI84_I84(0xFF,0)));
2021      addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
2022      addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
2023      *rHi = dstHi;
2024      *rLo = dstLo;
2025      return;
2026   }
2027
2028   /* It is convenient sometimes to call iselInt64Expr even when we
2029      have NEON support (e.g. in do_helper_call we need 64-bit
2030      arguments as 2 x 32 regs). */
2031   if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
2032      HReg tHi = newVRegI(env);
2033      HReg tLo = newVRegI(env);
2034      HReg tmp = iselNeon64Expr(env, e);
2035      addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2036      *rHi = tHi;
2037      *rLo = tLo;
2038      return ;
2039   }
2040
2041   ppIRExpr(e);
2042   vpanic("iselInt64Expr");
2043}
2044
2045
2046/*---------------------------------------------------------*/
2047/*--- ISEL: Vector (NEON) expressions (64 or 128 bit)   ---*/
2048/*---------------------------------------------------------*/
2049
2050static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2051{
2052   HReg r = iselNeon64Expr_wrk( env, e );
2053   vassert(hregClass(r) == HRcFlt64);
2054   vassert(hregIsVirtual(r));
2055   return r;
2056}
2057
2058/* DO NOT CALL THIS DIRECTLY */
2059static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2060{
2061   IRType ty = typeOfIRExpr(env->type_env, e);
2062   MatchInfo mi;
2063   vassert(e);
2064   vassert(ty == Ity_I64);
2065
2066   if (e->tag == Iex_RdTmp) {
2067      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2068   }
2069
2070   if (e->tag == Iex_Const) {
2071      HReg rLo, rHi;
2072      HReg res = newVRegD(env);
2073      iselInt64Expr(&rHi, &rLo, env, e);
2074      addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2075      return res;
2076   }
2077
2078   /* 64-bit load */
2079   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2080      HReg res = newVRegD(env);
2081      ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2082      vassert(ty == Ity_I64);
2083      addInstr(env, ARMInstr_NLdStD(True, res, am));
2084      return res;
2085   }
2086
2087   /* 64-bit GET */
2088   if (e->tag == Iex_Get) {
2089      HReg addr = newVRegI(env);
2090      HReg res = newVRegD(env);
2091      vassert(ty == Ity_I64);
2092      addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2093      addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2094      return res;
2095   }
2096
2097   /* --------- BINARY ops --------- */
2098   if (e->tag == Iex_Binop) {
2099      switch (e->Iex.Binop.op) {
2100
2101         /* 32 x 32 -> 64 multiply */
2102         case Iop_MullS32:
2103         case Iop_MullU32: {
2104            HReg rLo, rHi;
2105            HReg res = newVRegD(env);
2106            iselInt64Expr(&rHi, &rLo, env, e);
2107            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2108            return res;
2109         }
2110
2111         case Iop_And64: {
2112            HReg res = newVRegD(env);
2113            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2114            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2115            addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2116                                           res, argL, argR, 4, False));
2117            return res;
2118         }
2119         case Iop_Or64: {
2120            HReg res = newVRegD(env);
2121            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2122            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2123            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2124                                           res, argL, argR, 4, False));
2125            return res;
2126         }
2127         case Iop_Xor64: {
2128            HReg res = newVRegD(env);
2129            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2130            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2131            addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2132                                           res, argL, argR, 4, False));
2133            return res;
2134         }
2135
2136         /* 32HLto64(e1,e2) */
2137         case Iop_32HLto64: {
2138            HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2139            HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2140            HReg res = newVRegD(env);
2141            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2142            return res;
2143         }
2144
2145         case Iop_Add8x8:
2146         case Iop_Add16x4:
2147         case Iop_Add32x2:
2148         case Iop_Add64: {
2149            HReg res = newVRegD(env);
2150            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2151            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2152            UInt size;
2153            switch (e->Iex.Binop.op) {
2154               case Iop_Add8x8: size = 0; break;
2155               case Iop_Add16x4: size = 1; break;
2156               case Iop_Add32x2: size = 2; break;
2157               case Iop_Add64: size = 3; break;
2158               default: vassert(0);
2159            }
2160            addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2161                                           res, argL, argR, size, False));
2162            return res;
2163         }
2164         case Iop_Add32Fx2: {
2165            HReg res = newVRegD(env);
2166            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2167            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2168            UInt size = 0;
2169            addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2170                                           res, argL, argR, size, False));
2171            return res;
2172         }
2173         case Iop_Recps32Fx2: {
2174            HReg res = newVRegD(env);
2175            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2176            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2177            UInt size = 0;
2178            addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2179                                           res, argL, argR, size, False));
2180            return res;
2181         }
2182         case Iop_Rsqrts32Fx2: {
2183            HReg res = newVRegD(env);
2184            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2185            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2186            UInt size = 0;
2187            addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2188                                           res, argL, argR, size, False));
2189            return res;
2190         }
2191         case Iop_InterleaveOddLanes8x8:
2192         case Iop_InterleaveOddLanes16x4:
2193         case Iop_InterleaveLO32x2:
2194         case Iop_InterleaveEvenLanes8x8:
2195         case Iop_InterleaveEvenLanes16x4:
2196         case Iop_InterleaveHI32x2: {
2197            HReg tmp = newVRegD(env);
2198            HReg res = newVRegD(env);
2199            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2200            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2201            UInt size;
2202            UInt is_lo;
2203            switch (e->Iex.Binop.op) {
2204               case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
2205               case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
2206               case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
2207               case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
2208               case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
2209               case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
2210               default: vassert(0);
2211            }
2212            if (is_lo) {
2213               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2214                                             tmp, argL, 4, False));
2215               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2216                                             res, argR, 4, False));
2217               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2218                                            res, tmp, size, False));
2219            } else {
2220               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2221                                             tmp, argR, 4, False));
2222               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2223                                             res, argL, 4, False));
2224               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2225                                            tmp, res, size, False));
2226            }
2227            return res;
2228         }
2229         case Iop_InterleaveHI8x8:
2230         case Iop_InterleaveHI16x4:
2231         case Iop_InterleaveLO8x8:
2232         case Iop_InterleaveLO16x4: {
2233            HReg tmp = newVRegD(env);
2234            HReg res = newVRegD(env);
2235            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2236            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2237            UInt size;
2238            UInt is_lo;
2239            switch (e->Iex.Binop.op) {
2240               case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
2241               case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
2242               case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
2243               case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
2244               default: vassert(0);
2245            }
2246            if (is_lo) {
2247               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2248                                             tmp, argL, 4, False));
2249               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2250                                             res, argR, 4, False));
2251               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2252                                            res, tmp, size, False));
2253            } else {
2254               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2255                                             tmp, argR, 4, False));
2256               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2257                                             res, argL, 4, False));
2258               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2259                                            tmp, res, size, False));
2260            }
2261            return res;
2262         }
2263         case Iop_CatOddLanes8x8:
2264         case Iop_CatOddLanes16x4:
2265         case Iop_CatEvenLanes8x8:
2266         case Iop_CatEvenLanes16x4: {
2267            HReg tmp = newVRegD(env);
2268            HReg res = newVRegD(env);
2269            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2270            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2271            UInt size;
2272            UInt is_lo;
2273            switch (e->Iex.Binop.op) {
2274               case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
2275               case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
2276               case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
2277               case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
2278               default: vassert(0);
2279            }
2280            if (is_lo) {
2281               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2282                                             tmp, argL, 4, False));
2283               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2284                                             res, argR, 4, False));
2285               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2286                                            res, tmp, size, False));
2287            } else {
2288               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2289                                             tmp, argR, 4, False));
2290               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2291                                             res, argL, 4, False));
2292               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2293                                            tmp, res, size, False));
2294            }
2295            return res;
2296         }
2297         case Iop_QAdd8Ux8:
2298         case Iop_QAdd16Ux4:
2299         case Iop_QAdd32Ux2:
2300         case Iop_QAdd64Ux1: {
2301            HReg res = newVRegD(env);
2302            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2303            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2304            UInt size;
2305            switch (e->Iex.Binop.op) {
2306               case Iop_QAdd8Ux8: size = 0; break;
2307               case Iop_QAdd16Ux4: size = 1; break;
2308               case Iop_QAdd32Ux2: size = 2; break;
2309               case Iop_QAdd64Ux1: size = 3; break;
2310               default: vassert(0);
2311            }
2312            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2313                                           res, argL, argR, size, False));
2314            return res;
2315         }
2316         case Iop_QAdd8Sx8:
2317         case Iop_QAdd16Sx4:
2318         case Iop_QAdd32Sx2:
2319         case Iop_QAdd64Sx1: {
2320            HReg res = newVRegD(env);
2321            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2322            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2323            UInt size;
2324            switch (e->Iex.Binop.op) {
2325               case Iop_QAdd8Sx8: size = 0; break;
2326               case Iop_QAdd16Sx4: size = 1; break;
2327               case Iop_QAdd32Sx2: size = 2; break;
2328               case Iop_QAdd64Sx1: size = 3; break;
2329               default: vassert(0);
2330            }
2331            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2332                                           res, argL, argR, size, False));
2333            return res;
2334         }
2335         case Iop_Sub8x8:
2336         case Iop_Sub16x4:
2337         case Iop_Sub32x2:
2338         case Iop_Sub64: {
2339            HReg res = newVRegD(env);
2340            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2341            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2342            UInt size;
2343            switch (e->Iex.Binop.op) {
2344               case Iop_Sub8x8: size = 0; break;
2345               case Iop_Sub16x4: size = 1; break;
2346               case Iop_Sub32x2: size = 2; break;
2347               case Iop_Sub64: size = 3; break;
2348               default: vassert(0);
2349            }
2350            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2351                                           res, argL, argR, size, False));
2352            return res;
2353         }
2354         case Iop_Sub32Fx2: {
2355            HReg res = newVRegD(env);
2356            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2357            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2358            UInt size = 0;
2359            addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2360                                           res, argL, argR, size, False));
2361            return res;
2362         }
2363         case Iop_QSub8Ux8:
2364         case Iop_QSub16Ux4:
2365         case Iop_QSub32Ux2:
2366         case Iop_QSub64Ux1: {
2367            HReg res = newVRegD(env);
2368            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2369            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2370            UInt size;
2371            switch (e->Iex.Binop.op) {
2372               case Iop_QSub8Ux8: size = 0; break;
2373               case Iop_QSub16Ux4: size = 1; break;
2374               case Iop_QSub32Ux2: size = 2; break;
2375               case Iop_QSub64Ux1: size = 3; break;
2376               default: vassert(0);
2377            }
2378            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2379                                           res, argL, argR, size, False));
2380            return res;
2381         }
2382         case Iop_QSub8Sx8:
2383         case Iop_QSub16Sx4:
2384         case Iop_QSub32Sx2:
2385         case Iop_QSub64Sx1: {
2386            HReg res = newVRegD(env);
2387            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2388            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2389            UInt size;
2390            switch (e->Iex.Binop.op) {
2391               case Iop_QSub8Sx8: size = 0; break;
2392               case Iop_QSub16Sx4: size = 1; break;
2393               case Iop_QSub32Sx2: size = 2; break;
2394               case Iop_QSub64Sx1: size = 3; break;
2395               default: vassert(0);
2396            }
2397            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2398                                           res, argL, argR, size, False));
2399            return res;
2400         }
2401         case Iop_Max8Ux8:
2402         case Iop_Max16Ux4:
2403         case Iop_Max32Ux2: {
2404            HReg res = newVRegD(env);
2405            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2406            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2407            UInt size;
2408            switch (e->Iex.Binop.op) {
2409               case Iop_Max8Ux8: size = 0; break;
2410               case Iop_Max16Ux4: size = 1; break;
2411               case Iop_Max32Ux2: size = 2; break;
2412               default: vassert(0);
2413            }
2414            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2415                                           res, argL, argR, size, False));
2416            return res;
2417         }
2418         case Iop_Max8Sx8:
2419         case Iop_Max16Sx4:
2420         case Iop_Max32Sx2: {
2421            HReg res = newVRegD(env);
2422            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2423            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2424            UInt size;
2425            switch (e->Iex.Binop.op) {
2426               case Iop_Max8Sx8: size = 0; break;
2427               case Iop_Max16Sx4: size = 1; break;
2428               case Iop_Max32Sx2: size = 2; break;
2429               default: vassert(0);
2430            }
2431            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2432                                           res, argL, argR, size, False));
2433            return res;
2434         }
2435         case Iop_Min8Ux8:
2436         case Iop_Min16Ux4:
2437         case Iop_Min32Ux2: {
2438            HReg res = newVRegD(env);
2439            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2440            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2441            UInt size;
2442            switch (e->Iex.Binop.op) {
2443               case Iop_Min8Ux8: size = 0; break;
2444               case Iop_Min16Ux4: size = 1; break;
2445               case Iop_Min32Ux2: size = 2; break;
2446               default: vassert(0);
2447            }
2448            addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2449                                           res, argL, argR, size, False));
2450            return res;
2451         }
2452         case Iop_Min8Sx8:
2453         case Iop_Min16Sx4:
2454         case Iop_Min32Sx2: {
2455            HReg res = newVRegD(env);
2456            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2457            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2458            UInt size;
2459            switch (e->Iex.Binop.op) {
2460               case Iop_Min8Sx8: size = 0; break;
2461               case Iop_Min16Sx4: size = 1; break;
2462               case Iop_Min32Sx2: size = 2; break;
2463               default: vassert(0);
2464            }
2465            addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2466                                           res, argL, argR, size, False));
2467            return res;
2468         }
2469         case Iop_Sar8x8:
2470         case Iop_Sar16x4:
2471         case Iop_Sar32x2: {
2472            HReg res = newVRegD(env);
2473            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2474            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2475            HReg argR2 = newVRegD(env);
2476            HReg zero = newVRegD(env);
2477            UInt size;
2478            switch (e->Iex.Binop.op) {
2479               case Iop_Sar8x8: size = 0; break;
2480               case Iop_Sar16x4: size = 1; break;
2481               case Iop_Sar32x2: size = 2; break;
2482               case Iop_Sar64: size = 3; break;
2483               default: vassert(0);
2484            }
2485            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2486            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2487                                           argR2, zero, argR, size, False));
2488            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2489                                          res, argL, argR2, size, False));
2490            return res;
2491         }
2492         case Iop_Sal8x8:
2493         case Iop_Sal16x4:
2494         case Iop_Sal32x2:
2495         case Iop_Sal64x1: {
2496            HReg res = newVRegD(env);
2497            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2498            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2499            UInt size;
2500            switch (e->Iex.Binop.op) {
2501               case Iop_Sal8x8: size = 0; break;
2502               case Iop_Sal16x4: size = 1; break;
2503               case Iop_Sal32x2: size = 2; break;
2504               case Iop_Sal64x1: size = 3; break;
2505               default: vassert(0);
2506            }
2507            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2508                                          res, argL, argR, size, False));
2509            return res;
2510         }
2511         case Iop_Shr8x8:
2512         case Iop_Shr16x4:
2513         case Iop_Shr32x2: {
2514            HReg res = newVRegD(env);
2515            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2516            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2517            HReg argR2 = newVRegD(env);
2518            HReg zero = newVRegD(env);
2519            UInt size;
2520            switch (e->Iex.Binop.op) {
2521               case Iop_Shr8x8: size = 0; break;
2522               case Iop_Shr16x4: size = 1; break;
2523               case Iop_Shr32x2: size = 2; break;
2524               default: vassert(0);
2525            }
2526            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2527            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2528                                           argR2, zero, argR, size, False));
2529            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2530                                          res, argL, argR2, size, False));
2531            return res;
2532         }
2533         case Iop_Shl8x8:
2534         case Iop_Shl16x4:
2535         case Iop_Shl32x2: {
2536            HReg res = newVRegD(env);
2537            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2538            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2539            UInt size;
2540            switch (e->Iex.Binop.op) {
2541               case Iop_Shl8x8: size = 0; break;
2542               case Iop_Shl16x4: size = 1; break;
2543               case Iop_Shl32x2: size = 2; break;
2544               default: vassert(0);
2545            }
2546            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2547                                          res, argL, argR, size, False));
2548            return res;
2549         }
2550         case Iop_QShl8x8:
2551         case Iop_QShl16x4:
2552         case Iop_QShl32x2:
2553         case Iop_QShl64x1: {
2554            HReg res = newVRegD(env);
2555            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2556            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2557            UInt size;
2558            switch (e->Iex.Binop.op) {
2559               case Iop_QShl8x8: size = 0; break;
2560               case Iop_QShl16x4: size = 1; break;
2561               case Iop_QShl32x2: size = 2; break;
2562               case Iop_QShl64x1: size = 3; break;
2563               default: vassert(0);
2564            }
2565            addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2566                                          res, argL, argR, size, False));
2567            return res;
2568         }
2569         case Iop_QSal8x8:
2570         case Iop_QSal16x4:
2571         case Iop_QSal32x2:
2572         case Iop_QSal64x1: {
2573            HReg res = newVRegD(env);
2574            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2575            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2576            UInt size;
2577            switch (e->Iex.Binop.op) {
2578               case Iop_QSal8x8: size = 0; break;
2579               case Iop_QSal16x4: size = 1; break;
2580               case Iop_QSal32x2: size = 2; break;
2581               case Iop_QSal64x1: size = 3; break;
2582               default: vassert(0);
2583            }
2584            addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2585                                          res, argL, argR, size, False));
2586            return res;
2587         }
2588         case Iop_QShlN8x8:
2589         case Iop_QShlN16x4:
2590         case Iop_QShlN32x2:
2591         case Iop_QShlN64x1: {
2592            HReg res = newVRegD(env);
2593            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2594            UInt size, imm;
2595            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2596                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2597               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2598                      "second argument only\n");
2599            }
2600            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2601            switch (e->Iex.Binop.op) {
2602               case Iop_QShlN8x8: size = 8 | imm; break;
2603               case Iop_QShlN16x4: size = 16 | imm; break;
2604               case Iop_QShlN32x2: size = 32 | imm; break;
2605               case Iop_QShlN64x1: size = 64 | imm; break;
2606               default: vassert(0);
2607            }
2608            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2609                                          res, argL, size, False));
2610            return res;
2611         }
2612         case Iop_QShlN8Sx8:
2613         case Iop_QShlN16Sx4:
2614         case Iop_QShlN32Sx2:
2615         case Iop_QShlN64Sx1: {
2616            HReg res = newVRegD(env);
2617            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2618            UInt size, imm;
2619            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2620                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2621               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2622                      "second argument only\n");
2623            }
2624            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2625            switch (e->Iex.Binop.op) {
2626               case Iop_QShlN8Sx8: size = 8 | imm; break;
2627               case Iop_QShlN16Sx4: size = 16 | imm; break;
2628               case Iop_QShlN32Sx2: size = 32 | imm; break;
2629               case Iop_QShlN64Sx1: size = 64 | imm; break;
2630               default: vassert(0);
2631            }
2632            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2633                                          res, argL, size, False));
2634            return res;
2635         }
2636         case Iop_QSalN8x8:
2637         case Iop_QSalN16x4:
2638         case Iop_QSalN32x2:
2639         case Iop_QSalN64x1: {
2640            HReg res = newVRegD(env);
2641            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2642            UInt size, imm;
2643            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2644                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2645               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2646                      "second argument only\n");
2647            }
2648            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2649            switch (e->Iex.Binop.op) {
2650               case Iop_QSalN8x8: size = 8 | imm; break;
2651               case Iop_QSalN16x4: size = 16 | imm; break;
2652               case Iop_QSalN32x2: size = 32 | imm; break;
2653               case Iop_QSalN64x1: size = 64 | imm; break;
2654               default: vassert(0);
2655            }
2656            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2657                                          res, argL, size, False));
2658            return res;
2659         }
2660         case Iop_ShrN8x8:
2661         case Iop_ShrN16x4:
2662         case Iop_ShrN32x2:
2663         case Iop_Shr64: {
2664            HReg res = newVRegD(env);
2665            HReg tmp = newVRegD(env);
2666            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2667            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2668            HReg argR2 = newVRegI(env);
2669            UInt size;
2670            switch (e->Iex.Binop.op) {
2671               case Iop_ShrN8x8: size = 0; break;
2672               case Iop_ShrN16x4: size = 1; break;
2673               case Iop_ShrN32x2: size = 2; break;
2674               case Iop_Shr64: size = 3; break;
2675               default: vassert(0);
2676            }
2677            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2678            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2679            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2680                                          res, argL, tmp, size, False));
2681            return res;
2682         }
2683         case Iop_ShlN8x8:
2684         case Iop_ShlN16x4:
2685         case Iop_ShlN32x2:
2686         case Iop_Shl64: {
2687            HReg res = newVRegD(env);
2688            HReg tmp = newVRegD(env);
2689            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2690            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2691            UInt size;
2692            switch (e->Iex.Binop.op) {
2693               case Iop_ShlN8x8: size = 0; break;
2694               case Iop_ShlN16x4: size = 1; break;
2695               case Iop_ShlN32x2: size = 2; break;
2696               case Iop_Shl64: size = 3; break;
2697               default: vassert(0);
2698            }
2699            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
2700            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2701                                          res, argL, tmp, size, False));
2702            return res;
2703         }
2704         case Iop_SarN8x8:
2705         case Iop_SarN16x4:
2706         case Iop_SarN32x2:
2707         case Iop_Sar64: {
2708            HReg res = newVRegD(env);
2709            HReg tmp = newVRegD(env);
2710            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2711            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2712            HReg argR2 = newVRegI(env);
2713            UInt size;
2714            switch (e->Iex.Binop.op) {
2715               case Iop_SarN8x8: size = 0; break;
2716               case Iop_SarN16x4: size = 1; break;
2717               case Iop_SarN32x2: size = 2; break;
2718               case Iop_Sar64: size = 3; break;
2719               default: vassert(0);
2720            }
2721            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2722            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2723            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2724                                          res, argL, tmp, size, False));
2725            return res;
2726         }
2727         case Iop_CmpGT8Ux8:
2728         case Iop_CmpGT16Ux4:
2729         case Iop_CmpGT32Ux2: {
2730            HReg res = newVRegD(env);
2731            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2732            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2733            UInt size;
2734            switch (e->Iex.Binop.op) {
2735               case Iop_CmpGT8Ux8: size = 0; break;
2736               case Iop_CmpGT16Ux4: size = 1; break;
2737               case Iop_CmpGT32Ux2: size = 2; break;
2738               default: vassert(0);
2739            }
2740            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2741                                           res, argL, argR, size, False));
2742            return res;
2743         }
2744         case Iop_CmpGT8Sx8:
2745         case Iop_CmpGT16Sx4:
2746         case Iop_CmpGT32Sx2: {
2747            HReg res = newVRegD(env);
2748            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2749            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2750            UInt size;
2751            switch (e->Iex.Binop.op) {
2752               case Iop_CmpGT8Sx8: size = 0; break;
2753               case Iop_CmpGT16Sx4: size = 1; break;
2754               case Iop_CmpGT32Sx2: size = 2; break;
2755               default: vassert(0);
2756            }
2757            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2758                                           res, argL, argR, size, False));
2759            return res;
2760         }
2761         case Iop_CmpEQ8x8:
2762         case Iop_CmpEQ16x4:
2763         case Iop_CmpEQ32x2: {
2764            HReg res = newVRegD(env);
2765            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2766            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2767            UInt size;
2768            switch (e->Iex.Binop.op) {
2769               case Iop_CmpEQ8x8: size = 0; break;
2770               case Iop_CmpEQ16x4: size = 1; break;
2771               case Iop_CmpEQ32x2: size = 2; break;
2772               default: vassert(0);
2773            }
2774            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2775                                           res, argL, argR, size, False));
2776            return res;
2777         }
2778         case Iop_Mul8x8:
2779         case Iop_Mul16x4:
2780         case Iop_Mul32x2: {
2781            HReg res = newVRegD(env);
2782            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2784            UInt size = 0;
2785            switch(e->Iex.Binop.op) {
2786               case Iop_Mul8x8: size = 0; break;
2787               case Iop_Mul16x4: size = 1; break;
2788               case Iop_Mul32x2: size = 2; break;
2789               default: vassert(0);
2790            }
2791            addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2792                                           res, argL, argR, size, False));
2793            return res;
2794         }
2795         case Iop_Mul32Fx2: {
2796            HReg res = newVRegD(env);
2797            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2798            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2799            UInt size = 0;
2800            addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2801                                           res, argL, argR, size, False));
2802            return res;
2803         }
2804         case Iop_QDMulHi16Sx4:
2805         case Iop_QDMulHi32Sx2: {
2806            HReg res = newVRegD(env);
2807            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2808            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2809            UInt size = 0;
2810            switch(e->Iex.Binop.op) {
2811               case Iop_QDMulHi16Sx4: size = 1; break;
2812               case Iop_QDMulHi32Sx2: size = 2; break;
2813               default: vassert(0);
2814            }
2815            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2816                                           res, argL, argR, size, False));
2817            return res;
2818         }
2819
2820         case Iop_QRDMulHi16Sx4:
2821         case Iop_QRDMulHi32Sx2: {
2822            HReg res = newVRegD(env);
2823            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2824            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2825            UInt size = 0;
2826            switch(e->Iex.Binop.op) {
2827               case Iop_QRDMulHi16Sx4: size = 1; break;
2828               case Iop_QRDMulHi32Sx2: size = 2; break;
2829               default: vassert(0);
2830            }
2831            addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2832                                           res, argL, argR, size, False));
2833            return res;
2834         }
2835
2836         case Iop_PwAdd8x8:
2837         case Iop_PwAdd16x4:
2838         case Iop_PwAdd32x2: {
2839            HReg res = newVRegD(env);
2840            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2841            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2842            UInt size = 0;
2843            switch(e->Iex.Binop.op) {
2844               case Iop_PwAdd8x8: size = 0; break;
2845               case Iop_PwAdd16x4: size = 1; break;
2846               case Iop_PwAdd32x2: size = 2; break;
2847               default: vassert(0);
2848            }
2849            addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2850                                           res, argL, argR, size, False));
2851            return res;
2852         }
2853         case Iop_PwAdd32Fx2: {
2854            HReg res = newVRegD(env);
2855            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2856            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2857            UInt size = 0;
2858            addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2859                                           res, argL, argR, size, False));
2860            return res;
2861         }
2862         case Iop_PwMin8Ux8:
2863         case Iop_PwMin16Ux4:
2864         case Iop_PwMin32Ux2: {
2865            HReg res = newVRegD(env);
2866            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2867            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2868            UInt size = 0;
2869            switch(e->Iex.Binop.op) {
2870               case Iop_PwMin8Ux8: size = 0; break;
2871               case Iop_PwMin16Ux4: size = 1; break;
2872               case Iop_PwMin32Ux2: size = 2; break;
2873               default: vassert(0);
2874            }
2875            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2876                                           res, argL, argR, size, False));
2877            return res;
2878         }
2879         case Iop_PwMin8Sx8:
2880         case Iop_PwMin16Sx4:
2881         case Iop_PwMin32Sx2: {
2882            HReg res = newVRegD(env);
2883            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2884            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2885            UInt size = 0;
2886            switch(e->Iex.Binop.op) {
2887               case Iop_PwMin8Sx8: size = 0; break;
2888               case Iop_PwMin16Sx4: size = 1; break;
2889               case Iop_PwMin32Sx2: size = 2; break;
2890               default: vassert(0);
2891            }
2892            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
2893                                           res, argL, argR, size, False));
2894            return res;
2895         }
2896         case Iop_PwMax8Ux8:
2897         case Iop_PwMax16Ux4:
2898         case Iop_PwMax32Ux2: {
2899            HReg res = newVRegD(env);
2900            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2901            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2902            UInt size = 0;
2903            switch(e->Iex.Binop.op) {
2904               case Iop_PwMax8Ux8: size = 0; break;
2905               case Iop_PwMax16Ux4: size = 1; break;
2906               case Iop_PwMax32Ux2: size = 2; break;
2907               default: vassert(0);
2908            }
2909            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
2910                                           res, argL, argR, size, False));
2911            return res;
2912         }
2913         case Iop_PwMax8Sx8:
2914         case Iop_PwMax16Sx4:
2915         case Iop_PwMax32Sx2: {
2916            HReg res = newVRegD(env);
2917            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2918            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2919            UInt size = 0;
2920            switch(e->Iex.Binop.op) {
2921               case Iop_PwMax8Sx8: size = 0; break;
2922               case Iop_PwMax16Sx4: size = 1; break;
2923               case Iop_PwMax32Sx2: size = 2; break;
2924               default: vassert(0);
2925            }
2926            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
2927                                           res, argL, argR, size, False));
2928            return res;
2929         }
2930         case Iop_Perm8x8: {
2931            HReg res = newVRegD(env);
2932            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2933            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2934            addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
2935                                           res, argL, argR, 0, False));
2936            return res;
2937         }
2938         case Iop_PolynomialMul8x8: {
2939            HReg res = newVRegD(env);
2940            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2941            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2942            UInt size = 0;
2943            addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
2944                                           res, argL, argR, size, False));
2945            return res;
2946         }
2947         case Iop_Max32Fx2: {
2948            HReg res = newVRegD(env);
2949            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2950            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2951            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
2952                                           res, argL, argR, 2, False));
2953            return res;
2954         }
2955         case Iop_Min32Fx2: {
2956            HReg res = newVRegD(env);
2957            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2958            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2959            addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
2960                                           res, argL, argR, 2, False));
2961            return res;
2962         }
2963         case Iop_PwMax32Fx2: {
2964            HReg res = newVRegD(env);
2965            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2966            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2967            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
2968                                           res, argL, argR, 2, False));
2969            return res;
2970         }
2971         case Iop_PwMin32Fx2: {
2972            HReg res = newVRegD(env);
2973            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2974            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2975            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
2976                                           res, argL, argR, 2, False));
2977            return res;
2978         }
2979         case Iop_CmpGT32Fx2: {
2980            HReg res = newVRegD(env);
2981            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2982            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2983            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
2984                                           res, argL, argR, 2, False));
2985            return res;
2986         }
2987         case Iop_CmpGE32Fx2: {
2988            HReg res = newVRegD(env);
2989            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2990            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2991            addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
2992                                           res, argL, argR, 2, False));
2993            return res;
2994         }
2995         case Iop_CmpEQ32Fx2: {
2996            HReg res = newVRegD(env);
2997            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2998            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2999            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3000                                           res, argL, argR, 2, False));
3001            return res;
3002         }
3003         case Iop_F32ToFixed32Ux2_RZ:
3004         case Iop_F32ToFixed32Sx2_RZ:
3005         case Iop_Fixed32UToF32x2_RN:
3006         case Iop_Fixed32SToF32x2_RN: {
3007            HReg res = newVRegD(env);
3008            HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3009            ARMNeonUnOp op;
3010            UInt imm6;
3011            if (e->Iex.Binop.arg2->tag != Iex_Const ||
3012               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3013                  vpanic("ARM supports FP <-> Fixed conversion with constant "
3014                         "second argument less than 33 only\n");
3015            }
3016            imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3017            vassert(imm6 <= 32 && imm6 > 0);
3018            imm6 = 64 - imm6;
3019            switch(e->Iex.Binop.op) {
3020               case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3021               case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3022               case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3023               case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3024               default: vassert(0);
3025            }
3026            addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3027            return res;
3028         }
3029         /*
3030         FIXME: is this here or not?
3031         case Iop_VDup8x8:
3032         case Iop_VDup16x4:
3033         case Iop_VDup32x2: {
3034            HReg res = newVRegD(env);
3035            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3036            UInt index;
3037            UInt imm4;
3038            UInt size = 0;
3039            if (e->Iex.Binop.arg2->tag != Iex_Const ||
3040               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3041                  vpanic("ARM supports Iop_VDup with constant "
3042                         "second argument less than 16 only\n");
3043            }
3044            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3045            switch(e->Iex.Binop.op) {
3046               case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3047               case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3048               case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3049               default: vassert(0);
3050            }
3051            if (imm4 >= 16) {
3052               vpanic("ARM supports Iop_VDup with constant "
3053                      "second argument less than 16 only\n");
3054            }
3055            addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3056                                          res, argL, imm4, False));
3057            return res;
3058         }
3059         */
3060         default:
3061            break;
3062      }
3063   }
3064
3065   /* --------- UNARY ops --------- */
3066   if (e->tag == Iex_Unop) {
3067      switch (e->Iex.Unop.op) {
3068
3069         /* ReinterpF64asI64 */
3070         case Iop_ReinterpF64asI64:
3071         /* Left64(e) */
3072         case Iop_Left64:
3073         /* CmpwNEZ64(e) */
3074         //case Iop_CmpwNEZ64:
3075         case Iop_1Sto64: {
3076            HReg rLo, rHi;
3077            HReg res = newVRegD(env);
3078            iselInt64Expr(&rHi, &rLo, env, e);
3079            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3080            return res;
3081         }
3082         case Iop_Not64: {
3083            DECLARE_PATTERN(p_veqz_8x8);
3084            DECLARE_PATTERN(p_veqz_16x4);
3085            DECLARE_PATTERN(p_veqz_32x2);
3086            DECLARE_PATTERN(p_vcge_8sx8);
3087            DECLARE_PATTERN(p_vcge_16sx4);
3088            DECLARE_PATTERN(p_vcge_32sx2);
3089            DECLARE_PATTERN(p_vcge_8ux8);
3090            DECLARE_PATTERN(p_vcge_16ux4);
3091            DECLARE_PATTERN(p_vcge_32ux2);
3092            DEFINE_PATTERN(p_veqz_8x8,
3093                  unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3094            DEFINE_PATTERN(p_veqz_16x4,
3095                  unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3096            DEFINE_PATTERN(p_veqz_32x2,
3097                  unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3098            DEFINE_PATTERN(p_vcge_8sx8,
3099                  unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3100            DEFINE_PATTERN(p_vcge_16sx4,
3101                  unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3102            DEFINE_PATTERN(p_vcge_32sx2,
3103                  unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3104            DEFINE_PATTERN(p_vcge_8ux8,
3105                  unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3106            DEFINE_PATTERN(p_vcge_16ux4,
3107                  unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3108            DEFINE_PATTERN(p_vcge_32ux2,
3109                  unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3110            if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3111               HReg res = newVRegD(env);
3112               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3113               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3114               return res;
3115            } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3116               HReg res = newVRegD(env);
3117               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3118               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3119               return res;
3120            } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3121               HReg res = newVRegD(env);
3122               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3123               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3124               return res;
3125            } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3126               HReg res = newVRegD(env);
3127               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3128               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3129               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3130                                              res, argL, argR, 0, False));
3131               return res;
3132            } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3133               HReg res = newVRegD(env);
3134               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3135               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3136               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3137                                              res, argL, argR, 1, False));
3138               return res;
3139            } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3140               HReg res = newVRegD(env);
3141               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3142               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3143               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3144                                              res, argL, argR, 2, False));
3145               return res;
3146            } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3147               HReg res = newVRegD(env);
3148               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3149               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3150               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3151                                              res, argL, argR, 0, False));
3152               return res;
3153            } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3154               HReg res = newVRegD(env);
3155               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3156               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3157               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3158                                              res, argL, argR, 1, False));
3159               return res;
3160            } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3161               HReg res = newVRegD(env);
3162               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3163               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3164               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3165                                              res, argL, argR, 2, False));
3166               return res;
3167            } else {
3168               HReg res = newVRegD(env);
3169               HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3170               addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3171               return res;
3172            }
3173         }
3174         case Iop_Dup8x8:
3175         case Iop_Dup16x4:
3176         case Iop_Dup32x2: {
3177            HReg res, arg;
3178            UInt size;
3179            DECLARE_PATTERN(p_vdup_8x8);
3180            DECLARE_PATTERN(p_vdup_16x4);
3181            DECLARE_PATTERN(p_vdup_32x2);
3182            DEFINE_PATTERN(p_vdup_8x8,
3183                  unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3184            DEFINE_PATTERN(p_vdup_16x4,
3185                  unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3186            DEFINE_PATTERN(p_vdup_32x2,
3187                  unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3188            if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3189               UInt index;
3190               UInt imm4;
3191               if (mi.bindee[1]->tag == Iex_Const &&
3192                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3193                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3194                  imm4 = (index << 1) + 1;
3195                  if (index < 8) {
3196                     res = newVRegD(env);
3197                     arg = iselNeon64Expr(env, mi.bindee[0]);
3198                     addInstr(env, ARMInstr_NUnaryS(
3199                                      ARMneon_VDUP,
3200                                      mkARMNRS(ARMNRS_Reg, res, 0),
3201                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3202                                      imm4, False
3203                             ));
3204                     return res;
3205                  }
3206               }
3207            } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3208               UInt index;
3209               UInt imm4;
3210               if (mi.bindee[1]->tag == Iex_Const &&
3211                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3212                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3213                  imm4 = (index << 2) + 2;
3214                  if (index < 4) {
3215                     res = newVRegD(env);
3216                     arg = iselNeon64Expr(env, mi.bindee[0]);
3217                     addInstr(env, ARMInstr_NUnaryS(
3218                                      ARMneon_VDUP,
3219                                      mkARMNRS(ARMNRS_Reg, res, 0),
3220                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3221                                      imm4, False
3222                             ));
3223                     return res;
3224                  }
3225               }
3226            } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3227               UInt index;
3228               UInt imm4;
3229               if (mi.bindee[1]->tag == Iex_Const &&
3230                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3231                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3232                  imm4 = (index << 3) + 4;
3233                  if (index < 2) {
3234                     res = newVRegD(env);
3235                     arg = iselNeon64Expr(env, mi.bindee[0]);
3236                     addInstr(env, ARMInstr_NUnaryS(
3237                                      ARMneon_VDUP,
3238                                      mkARMNRS(ARMNRS_Reg, res, 0),
3239                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3240                                      imm4, False
3241                             ));
3242                     return res;
3243                  }
3244               }
3245            }
3246            arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3247            res = newVRegD(env);
3248            switch (e->Iex.Unop.op) {
3249               case Iop_Dup8x8: size = 0; break;
3250               case Iop_Dup16x4: size = 1; break;
3251               case Iop_Dup32x2: size = 2; break;
3252               default: vassert(0);
3253            }
3254            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3255            return res;
3256         }
3257         case Iop_Abs8x8:
3258         case Iop_Abs16x4:
3259         case Iop_Abs32x2: {
3260            HReg res = newVRegD(env);
3261            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3262            UInt size = 0;
3263            switch(e->Iex.Binop.op) {
3264               case Iop_Abs8x8: size = 0; break;
3265               case Iop_Abs16x4: size = 1; break;
3266               case Iop_Abs32x2: size = 2; break;
3267               default: vassert(0);
3268            }
3269            addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3270            return res;
3271         }
3272         case Iop_Reverse64_8x8:
3273         case Iop_Reverse64_16x4:
3274         case Iop_Reverse64_32x2: {
3275            HReg res = newVRegD(env);
3276            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3277            UInt size = 0;
3278            switch(e->Iex.Binop.op) {
3279               case Iop_Reverse64_8x8: size = 0; break;
3280               case Iop_Reverse64_16x4: size = 1; break;
3281               case Iop_Reverse64_32x2: size = 2; break;
3282               default: vassert(0);
3283            }
3284            addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3285                                          res, arg, size, False));
3286            return res;
3287         }
3288         case Iop_Reverse32_8x8:
3289         case Iop_Reverse32_16x4: {
3290            HReg res = newVRegD(env);
3291            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3292            UInt size = 0;
3293            switch(e->Iex.Binop.op) {
3294               case Iop_Reverse32_8x8: size = 0; break;
3295               case Iop_Reverse32_16x4: size = 1; break;
3296               default: vassert(0);
3297            }
3298            addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3299                                          res, arg, size, False));
3300            return res;
3301         }
3302         case Iop_Reverse16_8x8: {
3303            HReg res = newVRegD(env);
3304            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3305            UInt size = 0;
3306            addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3307                                          res, arg, size, False));
3308            return res;
3309         }
3310         case Iop_CmpwNEZ64: {
3311            HReg x_lsh = newVRegD(env);
3312            HReg x_rsh = newVRegD(env);
3313            HReg lsh_amt = newVRegD(env);
3314            HReg rsh_amt = newVRegD(env);
3315            HReg zero = newVRegD(env);
3316            HReg tmp = newVRegD(env);
3317            HReg tmp2 = newVRegD(env);
3318            HReg res = newVRegD(env);
3319            HReg x = newVRegD(env);
3320            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3321            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3322            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3323            addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3324            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3325            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3326                                           rsh_amt, zero, lsh_amt, 2, False));
3327            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3328                                          x_lsh, x, lsh_amt, 3, False));
3329            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3330                                          x_rsh, x, rsh_amt, 3, False));
3331            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3332                                           tmp, x_lsh, x_rsh, 0, False));
3333            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3334                                           res, tmp, x, 0, False));
3335            return res;
3336         }
3337         case Iop_CmpNEZ8x8:
3338         case Iop_CmpNEZ16x4:
3339         case Iop_CmpNEZ32x2: {
3340            HReg res = newVRegD(env);
3341            HReg tmp = newVRegD(env);
3342            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3343            UInt size;
3344            switch (e->Iex.Unop.op) {
3345               case Iop_CmpNEZ8x8: size = 0; break;
3346               case Iop_CmpNEZ16x4: size = 1; break;
3347               case Iop_CmpNEZ32x2: size = 2; break;
3348               default: vassert(0);
3349            }
3350            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3351            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3352            return res;
3353         }
3354         case Iop_NarrowUn16to8x8:
3355         case Iop_NarrowUn32to16x4:
3356         case Iop_NarrowUn64to32x2: {
3357            HReg res = newVRegD(env);
3358            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3359            UInt size = 0;
3360            switch(e->Iex.Binop.op) {
3361               case Iop_NarrowUn16to8x8:  size = 0; break;
3362               case Iop_NarrowUn32to16x4: size = 1; break;
3363               case Iop_NarrowUn64to32x2: size = 2; break;
3364               default: vassert(0);
3365            }
3366            addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3367                                          res, arg, size, False));
3368            return res;
3369         }
3370         case Iop_QNarrowUn16Sto8Sx8:
3371         case Iop_QNarrowUn32Sto16Sx4:
3372         case Iop_QNarrowUn64Sto32Sx2: {
3373            HReg res = newVRegD(env);
3374            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3375            UInt size = 0;
3376            switch(e->Iex.Binop.op) {
3377               case Iop_QNarrowUn16Sto8Sx8:  size = 0; break;
3378               case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
3379               case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
3380               default: vassert(0);
3381            }
3382            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3383                                          res, arg, size, False));
3384            return res;
3385         }
3386         case Iop_QNarrowUn16Sto8Ux8:
3387         case Iop_QNarrowUn32Sto16Ux4:
3388         case Iop_QNarrowUn64Sto32Ux2: {
3389            HReg res = newVRegD(env);
3390            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3391            UInt size = 0;
3392            switch(e->Iex.Binop.op) {
3393               case Iop_QNarrowUn16Sto8Ux8:  size = 0; break;
3394               case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
3395               case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
3396               default: vassert(0);
3397            }
3398            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3399                                          res, arg, size, False));
3400            return res;
3401         }
3402         case Iop_QNarrowUn16Uto8Ux8:
3403         case Iop_QNarrowUn32Uto16Ux4:
3404         case Iop_QNarrowUn64Uto32Ux2: {
3405            HReg res = newVRegD(env);
3406            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3407            UInt size = 0;
3408            switch(e->Iex.Binop.op) {
3409               case Iop_QNarrowUn16Uto8Ux8:  size = 0; break;
3410               case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
3411               case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
3412               default: vassert(0);
3413            }
3414            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3415                                          res, arg, size, False));
3416            return res;
3417         }
3418         case Iop_PwAddL8Sx8:
3419         case Iop_PwAddL16Sx4:
3420         case Iop_PwAddL32Sx2: {
3421            HReg res = newVRegD(env);
3422            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3423            UInt size = 0;
3424            switch(e->Iex.Binop.op) {
3425               case Iop_PwAddL8Sx8: size = 0; break;
3426               case Iop_PwAddL16Sx4: size = 1; break;
3427               case Iop_PwAddL32Sx2: size = 2; break;
3428               default: vassert(0);
3429            }
3430            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3431                                          res, arg, size, False));
3432            return res;
3433         }
3434         case Iop_PwAddL8Ux8:
3435         case Iop_PwAddL16Ux4:
3436         case Iop_PwAddL32Ux2: {
3437            HReg res = newVRegD(env);
3438            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3439            UInt size = 0;
3440            switch(e->Iex.Binop.op) {
3441               case Iop_PwAddL8Ux8: size = 0; break;
3442               case Iop_PwAddL16Ux4: size = 1; break;
3443               case Iop_PwAddL32Ux2: size = 2; break;
3444               default: vassert(0);
3445            }
3446            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3447                                          res, arg, size, False));
3448            return res;
3449         }
3450         case Iop_Cnt8x8: {
3451            HReg res = newVRegD(env);
3452            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3453            UInt size = 0;
3454            addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3455                                          res, arg, size, False));
3456            return res;
3457         }
3458         case Iop_Clz8Sx8:
3459         case Iop_Clz16Sx4:
3460         case Iop_Clz32Sx2: {
3461            HReg res = newVRegD(env);
3462            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3463            UInt size = 0;
3464            switch(e->Iex.Binop.op) {
3465               case Iop_Clz8Sx8: size = 0; break;
3466               case Iop_Clz16Sx4: size = 1; break;
3467               case Iop_Clz32Sx2: size = 2; break;
3468               default: vassert(0);
3469            }
3470            addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3471                                          res, arg, size, False));
3472            return res;
3473         }
3474         case Iop_Cls8Sx8:
3475         case Iop_Cls16Sx4:
3476         case Iop_Cls32Sx2: {
3477            HReg res = newVRegD(env);
3478            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3479            UInt size = 0;
3480            switch(e->Iex.Binop.op) {
3481               case Iop_Cls8Sx8: size = 0; break;
3482               case Iop_Cls16Sx4: size = 1; break;
3483               case Iop_Cls32Sx2: size = 2; break;
3484               default: vassert(0);
3485            }
3486            addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3487                                          res, arg, size, False));
3488            return res;
3489         }
3490         case Iop_FtoI32Sx2_RZ: {
3491            HReg res = newVRegD(env);
3492            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3493            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3494                                          res, arg, 2, False));
3495            return res;
3496         }
3497         case Iop_FtoI32Ux2_RZ: {
3498            HReg res = newVRegD(env);
3499            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3500            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3501                                          res, arg, 2, False));
3502            return res;
3503         }
3504         case Iop_I32StoFx2: {
3505            HReg res = newVRegD(env);
3506            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3507            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3508                                          res, arg, 2, False));
3509            return res;
3510         }
3511         case Iop_I32UtoFx2: {
3512            HReg res = newVRegD(env);
3513            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3514            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3515                                          res, arg, 2, False));
3516            return res;
3517         }
3518         case Iop_F32toF16x4: {
3519            HReg res = newVRegD(env);
3520            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3521            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3522                                          res, arg, 2, False));
3523            return res;
3524         }
3525         case Iop_Recip32Fx2: {
3526            HReg res = newVRegD(env);
3527            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3528            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3529                                          res, argL, 0, False));
3530            return res;
3531         }
3532         case Iop_Recip32x2: {
3533            HReg res = newVRegD(env);
3534            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3535            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3536                                          res, argL, 0, False));
3537            return res;
3538         }
3539         case Iop_Abs32Fx2: {
3540            DECLARE_PATTERN(p_vabd_32fx2);
3541            DEFINE_PATTERN(p_vabd_32fx2,
3542                           unop(Iop_Abs32Fx2,
3543                                binop(Iop_Sub32Fx2,
3544                                      bind(0),
3545                                      bind(1))));
3546            if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3547               HReg res = newVRegD(env);
3548               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3549               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3550               addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3551                                              res, argL, argR, 0, False));
3552               return res;
3553            } else {
3554               HReg res = newVRegD(env);
3555               HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3556               addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3557                                             res, arg, 0, False));
3558               return res;
3559            }
3560         }
3561         case Iop_Rsqrte32Fx2: {
3562            HReg res = newVRegD(env);
3563            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3564            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3565                                          res, arg, 0, False));
3566            return res;
3567         }
3568         case Iop_Rsqrte32x2: {
3569            HReg res = newVRegD(env);
3570            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3571            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3572                                          res, arg, 0, False));
3573            return res;
3574         }
3575         case Iop_Neg32Fx2: {
3576            HReg res = newVRegD(env);
3577            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3578            addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3579                                          res, arg, 0, False));
3580            return res;
3581         }
3582         default:
3583            break;
3584      }
3585   } /* if (e->tag == Iex_Unop) */
3586
3587   if (e->tag == Iex_Triop) {
3588      switch (e->Iex.Triop.op) {
3589         case Iop_Extract64: {
3590            HReg res = newVRegD(env);
3591            HReg argL = iselNeon64Expr(env, e->Iex.Triop.arg1);
3592            HReg argR = iselNeon64Expr(env, e->Iex.Triop.arg2);
3593            UInt imm4;
3594            if (e->Iex.Triop.arg3->tag != Iex_Const ||
3595                typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
3596               vpanic("ARM target supports Iop_Extract64 with constant "
3597                      "third argument less than 16 only\n");
3598            }
3599            imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
3600            if (imm4 >= 8) {
3601               vpanic("ARM target supports Iop_Extract64 with constant "
3602                      "third argument less than 16 only\n");
3603            }
3604            addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3605                                           res, argL, argR, imm4, False));
3606            return res;
3607         }
3608         case Iop_SetElem8x8:
3609         case Iop_SetElem16x4:
3610         case Iop_SetElem32x2: {
3611            HReg res = newVRegD(env);
3612            HReg dreg = iselNeon64Expr(env, e->Iex.Triop.arg1);
3613            HReg arg = iselIntExpr_R(env, e->Iex.Triop.arg3);
3614            UInt index, size;
3615            if (e->Iex.Triop.arg2->tag != Iex_Const ||
3616                typeOfIRExpr(env->type_env, e->Iex.Triop.arg2) != Ity_I8) {
3617               vpanic("ARM target supports SetElem with constant "
3618                      "second argument only\n");
3619            }
3620            index = e->Iex.Triop.arg2->Iex.Const.con->Ico.U8;
3621            switch (e->Iex.Triop.op) {
3622               case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3623               case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3624               case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3625               default: vassert(0);
3626            }
3627            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3628            addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3629                                           mkARMNRS(ARMNRS_Scalar, res, index),
3630                                           mkARMNRS(ARMNRS_Reg, arg, 0),
3631                                           size, False));
3632            return res;
3633         }
3634         default:
3635            break;
3636      }
3637   }
3638
3639   /* --------- MULTIPLEX --------- */
3640   if (e->tag == Iex_Mux0X) {
3641      HReg rLo, rHi;
3642      HReg res = newVRegD(env);
3643      iselInt64Expr(&rHi, &rLo, env, e);
3644      addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3645      return res;
3646   }
3647
3648   ppIRExpr(e);
3649   vpanic("iselNeon64Expr");
3650}
3651
3652static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3653{
3654   HReg r = iselNeonExpr_wrk( env, e );
3655   vassert(hregClass(r) == HRcVec128);
3656   vassert(hregIsVirtual(r));
3657   return r;
3658}
3659
3660/* DO NOT CALL THIS DIRECTLY */
3661static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3662{
3663   IRType ty = typeOfIRExpr(env->type_env, e);
3664   MatchInfo mi;
3665   vassert(e);
3666   vassert(ty == Ity_V128);
3667
3668   if (e->tag == Iex_RdTmp) {
3669      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3670   }
3671
3672   if (e->tag == Iex_Const) {
3673      /* At the moment there should be no 128-bit constants in IR for ARM
3674         generated during disassemble. They are represented as Iop_64HLtoV128
3675         binary operation and are handled among binary ops. */
3676      /* But zero can be created by valgrind internal optimizer */
3677      if (e->Iex.Const.con->Ico.V128 == 0) {
3678         HReg res = newVRegV(env);
3679         addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
3680         return res;
3681      }
3682      ppIRExpr(e);
3683      vpanic("128-bit constant is not implemented");
3684   }
3685
3686   if (e->tag == Iex_Load) {
3687      HReg res = newVRegV(env);
3688      ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3689      vassert(ty == Ity_V128);
3690      addInstr(env, ARMInstr_NLdStQ(True, res, am));
3691      return res;
3692   }
3693
3694   if (e->tag == Iex_Get) {
3695      HReg addr = newVRegI(env);
3696      HReg res = newVRegV(env);
3697      vassert(ty == Ity_V128);
3698      addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3699      addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3700      return res;
3701   }
3702
3703   if (e->tag == Iex_Unop) {
3704      switch (e->Iex.Unop.op) {
3705         case Iop_NotV128: {
3706            DECLARE_PATTERN(p_veqz_8x16);
3707            DECLARE_PATTERN(p_veqz_16x8);
3708            DECLARE_PATTERN(p_veqz_32x4);
3709            DECLARE_PATTERN(p_vcge_8sx16);
3710            DECLARE_PATTERN(p_vcge_16sx8);
3711            DECLARE_PATTERN(p_vcge_32sx4);
3712            DECLARE_PATTERN(p_vcge_8ux16);
3713            DECLARE_PATTERN(p_vcge_16ux8);
3714            DECLARE_PATTERN(p_vcge_32ux4);
3715            DEFINE_PATTERN(p_veqz_8x16,
3716                  unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3717            DEFINE_PATTERN(p_veqz_16x8,
3718                  unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3719            DEFINE_PATTERN(p_veqz_32x4,
3720                  unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3721            DEFINE_PATTERN(p_vcge_8sx16,
3722                  unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3723            DEFINE_PATTERN(p_vcge_16sx8,
3724                  unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3725            DEFINE_PATTERN(p_vcge_32sx4,
3726                  unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3727            DEFINE_PATTERN(p_vcge_8ux16,
3728                  unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3729            DEFINE_PATTERN(p_vcge_16ux8,
3730                  unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3731            DEFINE_PATTERN(p_vcge_32ux4,
3732                  unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3733            if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3734               HReg res = newVRegV(env);
3735               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3736               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3737               return res;
3738            } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3739               HReg res = newVRegV(env);
3740               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3741               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3742               return res;
3743            } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3744               HReg res = newVRegV(env);
3745               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3746               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3747               return res;
3748            } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3749               HReg res = newVRegV(env);
3750               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3751               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3752               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3753                                              res, argL, argR, 0, True));
3754               return res;
3755            } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3756               HReg res = newVRegV(env);
3757               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3758               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3759               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3760                                              res, argL, argR, 1, True));
3761               return res;
3762            } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3763               HReg res = newVRegV(env);
3764               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3765               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3766               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3767                                              res, argL, argR, 2, True));
3768               return res;
3769            } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3770               HReg res = newVRegV(env);
3771               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3772               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3773               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3774                                              res, argL, argR, 0, True));
3775               return res;
3776            } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3777               HReg res = newVRegV(env);
3778               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3779               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3780               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3781                                              res, argL, argR, 1, True));
3782               return res;
3783            } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3784               HReg res = newVRegV(env);
3785               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3786               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3787               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3788                                              res, argL, argR, 2, True));
3789               return res;
3790            } else {
3791               HReg res = newVRegV(env);
3792               HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3793               addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3794               return res;
3795            }
3796         }
3797         case Iop_Dup8x16:
3798         case Iop_Dup16x8:
3799         case Iop_Dup32x4: {
3800            HReg res, arg;
3801            UInt size;
3802            DECLARE_PATTERN(p_vdup_8x16);
3803            DECLARE_PATTERN(p_vdup_16x8);
3804            DECLARE_PATTERN(p_vdup_32x4);
3805            DEFINE_PATTERN(p_vdup_8x16,
3806                  unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3807            DEFINE_PATTERN(p_vdup_16x8,
3808                  unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3809            DEFINE_PATTERN(p_vdup_32x4,
3810                  unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3811            if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3812               UInt index;
3813               UInt imm4;
3814               if (mi.bindee[1]->tag == Iex_Const &&
3815                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3816                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3817                  imm4 = (index << 1) + 1;
3818                  if (index < 8) {
3819                     res = newVRegV(env);
3820                     arg = iselNeon64Expr(env, mi.bindee[0]);
3821                     addInstr(env, ARMInstr_NUnaryS(
3822                                      ARMneon_VDUP,
3823                                      mkARMNRS(ARMNRS_Reg, res, 0),
3824                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3825                                      imm4, True
3826                             ));
3827                     return res;
3828                  }
3829               }
3830            } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3831               UInt index;
3832               UInt imm4;
3833               if (mi.bindee[1]->tag == Iex_Const &&
3834                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3835                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3836                  imm4 = (index << 2) + 2;
3837                  if (index < 4) {
3838                     res = newVRegV(env);
3839                     arg = iselNeon64Expr(env, mi.bindee[0]);
3840                     addInstr(env, ARMInstr_NUnaryS(
3841                                      ARMneon_VDUP,
3842                                      mkARMNRS(ARMNRS_Reg, res, 0),
3843                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3844                                      imm4, True
3845                             ));
3846                     return res;
3847                  }
3848               }
3849            } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3850               UInt index;
3851               UInt imm4;
3852               if (mi.bindee[1]->tag == Iex_Const &&
3853                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3854                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3855                  imm4 = (index << 3) + 4;
3856                  if (index < 2) {
3857                     res = newVRegV(env);
3858                     arg = iselNeon64Expr(env, mi.bindee[0]);
3859                     addInstr(env, ARMInstr_NUnaryS(
3860                                      ARMneon_VDUP,
3861                                      mkARMNRS(ARMNRS_Reg, res, 0),
3862                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3863                                      imm4, True
3864                             ));
3865                     return res;
3866                  }
3867               }
3868            }
3869            arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3870            res = newVRegV(env);
3871            switch (e->Iex.Unop.op) {
3872               case Iop_Dup8x16: size = 0; break;
3873               case Iop_Dup16x8: size = 1; break;
3874               case Iop_Dup32x4: size = 2; break;
3875               default: vassert(0);
3876            }
3877            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
3878            return res;
3879         }
3880         case Iop_Abs8x16:
3881         case Iop_Abs16x8:
3882         case Iop_Abs32x4: {
3883            HReg res = newVRegV(env);
3884            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3885            UInt size = 0;
3886            switch(e->Iex.Binop.op) {
3887               case Iop_Abs8x16: size = 0; break;
3888               case Iop_Abs16x8: size = 1; break;
3889               case Iop_Abs32x4: size = 2; break;
3890               default: vassert(0);
3891            }
3892            addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
3893            return res;
3894         }
3895         case Iop_Reverse64_8x16:
3896         case Iop_Reverse64_16x8:
3897         case Iop_Reverse64_32x4: {
3898            HReg res = newVRegV(env);
3899            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3900            UInt size = 0;
3901            switch(e->Iex.Binop.op) {
3902               case Iop_Reverse64_8x16: size = 0; break;
3903               case Iop_Reverse64_16x8: size = 1; break;
3904               case Iop_Reverse64_32x4: size = 2; break;
3905               default: vassert(0);
3906            }
3907            addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3908                                          res, arg, size, True));
3909            return res;
3910         }
3911         case Iop_Reverse32_8x16:
3912         case Iop_Reverse32_16x8: {
3913            HReg res = newVRegV(env);
3914            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3915            UInt size = 0;
3916            switch(e->Iex.Binop.op) {
3917               case Iop_Reverse32_8x16: size = 0; break;
3918               case Iop_Reverse32_16x8: size = 1; break;
3919               default: vassert(0);
3920            }
3921            addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3922                                          res, arg, size, True));
3923            return res;
3924         }
3925         case Iop_Reverse16_8x16: {
3926            HReg res = newVRegV(env);
3927            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3928            UInt size = 0;
3929            addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3930                                          res, arg, size, True));
3931            return res;
3932         }
3933         case Iop_CmpNEZ64x2: {
3934            HReg x_lsh = newVRegV(env);
3935            HReg x_rsh = newVRegV(env);
3936            HReg lsh_amt = newVRegV(env);
3937            HReg rsh_amt = newVRegV(env);
3938            HReg zero = newVRegV(env);
3939            HReg tmp = newVRegV(env);
3940            HReg tmp2 = newVRegV(env);
3941            HReg res = newVRegV(env);
3942            HReg x = newVRegV(env);
3943            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3944            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
3945            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
3946            addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3947            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3948            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3949                                           rsh_amt, zero, lsh_amt, 2, True));
3950            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3951                                          x_lsh, x, lsh_amt, 3, True));
3952            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3953                                          x_rsh, x, rsh_amt, 3, True));
3954            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3955                                           tmp, x_lsh, x_rsh, 0, True));
3956            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3957                                           res, tmp, x, 0, True));
3958            return res;
3959         }
3960         case Iop_CmpNEZ8x16:
3961         case Iop_CmpNEZ16x8:
3962         case Iop_CmpNEZ32x4: {
3963            HReg res = newVRegV(env);
3964            HReg tmp = newVRegV(env);
3965            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3966            UInt size;
3967            switch (e->Iex.Unop.op) {
3968               case Iop_CmpNEZ8x16: size = 0; break;
3969               case Iop_CmpNEZ16x8: size = 1; break;
3970               case Iop_CmpNEZ32x4: size = 2; break;
3971               default: vassert(0);
3972            }
3973            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
3974            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
3975            return res;
3976         }
3977         case Iop_Widen8Uto16x8:
3978         case Iop_Widen16Uto32x4:
3979         case Iop_Widen32Uto64x2: {
3980            HReg res = newVRegV(env);
3981            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3982            UInt size;
3983            switch (e->Iex.Unop.op) {
3984               case Iop_Widen8Uto16x8:  size = 0; break;
3985               case Iop_Widen16Uto32x4: size = 1; break;
3986               case Iop_Widen32Uto64x2: size = 2; break;
3987               default: vassert(0);
3988            }
3989            addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
3990                                          res, arg, size, True));
3991            return res;
3992         }
3993         case Iop_Widen8Sto16x8:
3994         case Iop_Widen16Sto32x4:
3995         case Iop_Widen32Sto64x2: {
3996            HReg res = newVRegV(env);
3997            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3998            UInt size;
3999            switch (e->Iex.Unop.op) {
4000               case Iop_Widen8Sto16x8:  size = 0; break;
4001               case Iop_Widen16Sto32x4: size = 1; break;
4002               case Iop_Widen32Sto64x2: size = 2; break;
4003               default: vassert(0);
4004            }
4005            addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4006                                          res, arg, size, True));
4007            return res;
4008         }
4009         case Iop_PwAddL8Sx16:
4010         case Iop_PwAddL16Sx8:
4011         case Iop_PwAddL32Sx4: {
4012            HReg res = newVRegV(env);
4013            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4014            UInt size = 0;
4015            switch(e->Iex.Binop.op) {
4016               case Iop_PwAddL8Sx16: size = 0; break;
4017               case Iop_PwAddL16Sx8: size = 1; break;
4018               case Iop_PwAddL32Sx4: size = 2; break;
4019               default: vassert(0);
4020            }
4021            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4022                                          res, arg, size, True));
4023            return res;
4024         }
4025         case Iop_PwAddL8Ux16:
4026         case Iop_PwAddL16Ux8:
4027         case Iop_PwAddL32Ux4: {
4028            HReg res = newVRegV(env);
4029            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4030            UInt size = 0;
4031            switch(e->Iex.Binop.op) {
4032               case Iop_PwAddL8Ux16: size = 0; break;
4033               case Iop_PwAddL16Ux8: size = 1; break;
4034               case Iop_PwAddL32Ux4: size = 2; break;
4035               default: vassert(0);
4036            }
4037            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4038                                          res, arg, size, True));
4039            return res;
4040         }
4041         case Iop_Cnt8x16: {
4042            HReg res = newVRegV(env);
4043            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4044            UInt size = 0;
4045            addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4046            return res;
4047         }
4048         case Iop_Clz8Sx16:
4049         case Iop_Clz16Sx8:
4050         case Iop_Clz32Sx4: {
4051            HReg res = newVRegV(env);
4052            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4053            UInt size = 0;
4054            switch(e->Iex.Binop.op) {
4055               case Iop_Clz8Sx16: size = 0; break;
4056               case Iop_Clz16Sx8: size = 1; break;
4057               case Iop_Clz32Sx4: size = 2; break;
4058               default: vassert(0);
4059            }
4060            addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4061            return res;
4062         }
4063         case Iop_Cls8Sx16:
4064         case Iop_Cls16Sx8:
4065         case Iop_Cls32Sx4: {
4066            HReg res = newVRegV(env);
4067            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4068            UInt size = 0;
4069            switch(e->Iex.Binop.op) {
4070               case Iop_Cls8Sx16: size = 0; break;
4071               case Iop_Cls16Sx8: size = 1; break;
4072               case Iop_Cls32Sx4: size = 2; break;
4073               default: vassert(0);
4074            }
4075            addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4076            return res;
4077         }
4078         case Iop_FtoI32Sx4_RZ: {
4079            HReg res = newVRegV(env);
4080            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4081            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4082                                          res, arg, 2, True));
4083            return res;
4084         }
4085         case Iop_FtoI32Ux4_RZ: {
4086            HReg res = newVRegV(env);
4087            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4088            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4089                                          res, arg, 2, True));
4090            return res;
4091         }
4092         case Iop_I32StoFx4: {
4093            HReg res = newVRegV(env);
4094            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4095            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4096                                          res, arg, 2, True));
4097            return res;
4098         }
4099         case Iop_I32UtoFx4: {
4100            HReg res = newVRegV(env);
4101            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4102            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4103                                          res, arg, 2, True));
4104            return res;
4105         }
4106         case Iop_F16toF32x4: {
4107            HReg res = newVRegV(env);
4108            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4109            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4110                                          res, arg, 2, True));
4111            return res;
4112         }
4113         case Iop_Recip32Fx4: {
4114            HReg res = newVRegV(env);
4115            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4116            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4117                                          res, argL, 0, True));
4118            return res;
4119         }
4120         case Iop_Recip32x4: {
4121            HReg res = newVRegV(env);
4122            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4123            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4124                                          res, argL, 0, True));
4125            return res;
4126         }
4127         case Iop_Abs32Fx4: {
4128            DECLARE_PATTERN(p_vabd_32fx4);
4129            DEFINE_PATTERN(p_vabd_32fx4,
4130                           unop(Iop_Abs32Fx4,
4131                                binop(Iop_Sub32Fx4,
4132                                      bind(0),
4133                                      bind(1))));
4134            if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4135               HReg res = newVRegV(env);
4136               HReg argL = iselNeonExpr(env, mi.bindee[0]);
4137               HReg argR = iselNeonExpr(env, mi.bindee[1]);
4138               addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4139                                              res, argL, argR, 0, True));
4140               return res;
4141            } else {
4142               HReg res = newVRegV(env);
4143               HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4144               addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4145                                             res, argL, 0, True));
4146               return res;
4147            }
4148         }
4149         case Iop_Rsqrte32Fx4: {
4150            HReg res = newVRegV(env);
4151            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4152            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4153                                          res, argL, 0, True));
4154            return res;
4155         }
4156         case Iop_Rsqrte32x4: {
4157            HReg res = newVRegV(env);
4158            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4159            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4160                                          res, argL, 0, True));
4161            return res;
4162         }
4163         case Iop_Neg32Fx4: {
4164            HReg res = newVRegV(env);
4165            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4166            addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4167                                          res, arg, 0, True));
4168            return res;
4169         }
4170         /* ... */
4171         default:
4172            break;
4173      }
4174   }
4175
4176   if (e->tag == Iex_Binop) {
4177      switch (e->Iex.Binop.op) {
4178         case Iop_64HLtoV128:
4179            /* Try to match into single "VMOV reg, imm" instruction */
4180            if (e->Iex.Binop.arg1->tag == Iex_Const &&
4181                e->Iex.Binop.arg2->tag == Iex_Const &&
4182                typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4183                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4184                e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4185                           e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4186               ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4187               ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4188               if (imm) {
4189                  HReg res = newVRegV(env);
4190                  addInstr(env, ARMInstr_NeonImm(res, imm));
4191                  return res;
4192               }
4193               if ((imm64 >> 32) == 0LL &&
4194                   (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4195                  HReg tmp1 = newVRegV(env);
4196                  HReg tmp2 = newVRegV(env);
4197                  HReg res = newVRegV(env);
4198                  if (imm->type < 10) {
4199                     addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4200                     addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4201                     addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4202                                                    res, tmp1, tmp2, 4, True));
4203                     return res;
4204                  }
4205               }
4206               if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4207                   (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4208                  HReg tmp1 = newVRegV(env);
4209                  HReg tmp2 = newVRegV(env);
4210                  HReg res = newVRegV(env);
4211                  if (imm->type < 10) {
4212                     addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4213                     addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4214                     addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4215                                                    res, tmp1, tmp2, 4, True));
4216                     return res;
4217                  }
4218               }
4219            }
4220            /* Does not match "VMOV Reg, Imm" form.  We'll have to do
4221               it the slow way. */
4222            {
4223               /* local scope */
4224               /* Done via the stack for ease of use. */
4225               /* FIXME: assumes little endian host */
4226               HReg       w3, w2, w1, w0;
4227               HReg       res  = newVRegV(env);
4228               ARMAMode1* sp_0  = ARMAMode1_RI(hregARM_R13(), 0);
4229               ARMAMode1* sp_4  = ARMAMode1_RI(hregARM_R13(), 4);
4230               ARMAMode1* sp_8  = ARMAMode1_RI(hregARM_R13(), 8);
4231               ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
4232               ARMRI84*   c_16  = ARMRI84_I84(16,0);
4233               /* Make space for SP */
4234               addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
4235                                                      hregARM_R13(), c_16));
4236
4237               /* Store the less significant 64 bits */
4238               iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
4239               addInstr(env, ARMInstr_LdSt32(False/*store*/, w0, sp_0));
4240               addInstr(env, ARMInstr_LdSt32(False/*store*/, w1, sp_4));
4241
4242               /* Store the more significant 64 bits */
4243               iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
4244               addInstr(env, ARMInstr_LdSt32(False/*store*/, w2, sp_8));
4245               addInstr(env, ARMInstr_LdSt32(False/*store*/, w3, sp_12));
4246
4247                /* Load result back from stack. */
4248                addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
4249                                              mkARMAModeN_R(hregARM_R13())));
4250
4251                /* Restore SP */
4252                addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
4253                                           hregARM_R13(), c_16));
4254                return res;
4255            } /* local scope */
4256            goto neon_expr_bad;
4257         case Iop_AndV128: {
4258            HReg res = newVRegV(env);
4259            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4260            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4261            addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4262                                           res, argL, argR, 4, True));
4263            return res;
4264         }
4265         case Iop_OrV128: {
4266            HReg res = newVRegV(env);
4267            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4268            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4269            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4270                                           res, argL, argR, 4, True));
4271            return res;
4272         }
4273         case Iop_XorV128: {
4274            HReg res = newVRegV(env);
4275            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4276            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4277            addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4278                                           res, argL, argR, 4, True));
4279            return res;
4280         }
4281         case Iop_Add8x16:
4282         case Iop_Add16x8:
4283         case Iop_Add32x4:
4284         case Iop_Add64x2: {
4285            /*
4286            FIXME: remove this if not used
4287            DECLARE_PATTERN(p_vrhadd_32sx4);
4288            ULong one = (1LL << 32) | 1LL;
4289            DEFINE_PATTERN(p_vrhadd_32sx4,
4290                  binop(Iop_Add32x4,
4291                        binop(Iop_Add32x4,
4292                              binop(Iop_SarN32x4,
4293                                    bind(0),
4294                                    mkU8(1)),
4295                              binop(Iop_SarN32x4,
4296                                    bind(1),
4297                                    mkU8(1))),
4298                        binop(Iop_SarN32x4,
4299                              binop(Iop_Add32x4,
4300                                    binop(Iop_Add32x4,
4301                                          binop(Iop_AndV128,
4302                                                bind(0),
4303                                                mkU128(one)),
4304                                          binop(Iop_AndV128,
4305                                                bind(1),
4306                                                mkU128(one))),
4307                                    mkU128(one)),
4308                              mkU8(1))));
4309            */
4310            HReg res = newVRegV(env);
4311            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4312            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4313            UInt size;
4314            switch (e->Iex.Binop.op) {
4315               case Iop_Add8x16: size = 0; break;
4316               case Iop_Add16x8: size = 1; break;
4317               case Iop_Add32x4: size = 2; break;
4318               case Iop_Add64x2: size = 3; break;
4319               default:
4320                  ppIROp(e->Iex.Binop.op);
4321                  vpanic("Illegal element size in VADD");
4322            }
4323            addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4324                                           res, argL, argR, size, True));
4325            return res;
4326         }
4327         case Iop_Add32Fx4: {
4328            HReg res = newVRegV(env);
4329            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4330            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4331            UInt size = 0;
4332            addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4333                                           res, argL, argR, size, True));
4334            return res;
4335         }
4336         case Iop_Recps32Fx4: {
4337            HReg res = newVRegV(env);
4338            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4339            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4340            UInt size = 0;
4341            addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4342                                           res, argL, argR, size, True));
4343            return res;
4344         }
4345         case Iop_Rsqrts32Fx4: {
4346            HReg res = newVRegV(env);
4347            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4348            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4349            UInt size = 0;
4350            addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4351                                           res, argL, argR, size, True));
4352            return res;
4353         }
4354         case Iop_InterleaveEvenLanes8x16:
4355         case Iop_InterleaveEvenLanes16x8:
4356         case Iop_InterleaveEvenLanes32x4:
4357         case Iop_InterleaveOddLanes8x16:
4358         case Iop_InterleaveOddLanes16x8:
4359         case Iop_InterleaveOddLanes32x4: {
4360            HReg tmp = newVRegV(env);
4361            HReg res = newVRegV(env);
4362            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4363            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4364            UInt size;
4365            UInt is_lo;
4366            switch (e->Iex.Binop.op) {
4367               case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
4368               case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
4369               case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
4370               case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
4371               case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
4372               case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
4373               default:
4374                  ppIROp(e->Iex.Binop.op);
4375                  vpanic("Illegal element size in VTRN");
4376            }
4377            if (is_lo) {
4378               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4379                                             tmp, argL, 4, True));
4380               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4381                                             res, argR, 4, True));
4382               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4383                                            res, tmp, size, True));
4384            } else {
4385               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4386                                             tmp, argR, 4, True));
4387               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4388                                             res, argL, 4, True));
4389               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4390                                            tmp, res, size, True));
4391            }
4392            return res;
4393         }
4394         case Iop_InterleaveHI8x16:
4395         case Iop_InterleaveHI16x8:
4396         case Iop_InterleaveHI32x4:
4397         case Iop_InterleaveLO8x16:
4398         case Iop_InterleaveLO16x8:
4399         case Iop_InterleaveLO32x4: {
4400            HReg tmp = newVRegV(env);
4401            HReg res = newVRegV(env);
4402            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4403            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4404            UInt size;
4405            UInt is_lo;
4406            switch (e->Iex.Binop.op) {
4407               case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
4408               case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
4409               case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
4410               case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
4411               case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
4412               case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
4413               default:
4414                  ppIROp(e->Iex.Binop.op);
4415                  vpanic("Illegal element size in VZIP");
4416            }
4417            if (is_lo) {
4418               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4419                                             tmp, argL, 4, True));
4420               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4421                                             res, argR, 4, True));
4422               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4423                                            res, tmp, size, True));
4424            } else {
4425               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4426                                             tmp, argR, 4, True));
4427               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4428                                             res, argL, 4, True));
4429               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4430                                            tmp, res, size, True));
4431            }
4432            return res;
4433         }
4434         case Iop_CatOddLanes8x16:
4435         case Iop_CatOddLanes16x8:
4436         case Iop_CatOddLanes32x4:
4437         case Iop_CatEvenLanes8x16:
4438         case Iop_CatEvenLanes16x8:
4439         case Iop_CatEvenLanes32x4: {
4440            HReg tmp = newVRegV(env);
4441            HReg res = newVRegV(env);
4442            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4443            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4444            UInt size;
4445            UInt is_lo;
4446            switch (e->Iex.Binop.op) {
4447               case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
4448               case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
4449               case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
4450               case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
4451               case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
4452               case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
4453               default:
4454                  ppIROp(e->Iex.Binop.op);
4455                  vpanic("Illegal element size in VUZP");
4456            }
4457            if (is_lo) {
4458               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4459                                             tmp, argL, 4, True));
4460               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4461                                             res, argR, 4, True));
4462               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4463                                            res, tmp, size, True));
4464            } else {
4465               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4466                                             tmp, argR, 4, True));
4467               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4468                                             res, argL, 4, True));
4469               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4470                                            tmp, res, size, True));
4471            }
4472            return res;
4473         }
4474         case Iop_QAdd8Ux16:
4475         case Iop_QAdd16Ux8:
4476         case Iop_QAdd32Ux4:
4477         case Iop_QAdd64Ux2: {
4478            HReg res = newVRegV(env);
4479            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4480            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4481            UInt size;
4482            switch (e->Iex.Binop.op) {
4483               case Iop_QAdd8Ux16: size = 0; break;
4484               case Iop_QAdd16Ux8: size = 1; break;
4485               case Iop_QAdd32Ux4: size = 2; break;
4486               case Iop_QAdd64Ux2: size = 3; break;
4487               default:
4488                  ppIROp(e->Iex.Binop.op);
4489                  vpanic("Illegal element size in VQADDU");
4490            }
4491            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4492                                           res, argL, argR, size, True));
4493            return res;
4494         }
4495         case Iop_QAdd8Sx16:
4496         case Iop_QAdd16Sx8:
4497         case Iop_QAdd32Sx4:
4498         case Iop_QAdd64Sx2: {
4499            HReg res = newVRegV(env);
4500            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4501            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4502            UInt size;
4503            switch (e->Iex.Binop.op) {
4504               case Iop_QAdd8Sx16: size = 0; break;
4505               case Iop_QAdd16Sx8: size = 1; break;
4506               case Iop_QAdd32Sx4: size = 2; break;
4507               case Iop_QAdd64Sx2: size = 3; break;
4508               default:
4509                  ppIROp(e->Iex.Binop.op);
4510                  vpanic("Illegal element size in VQADDS");
4511            }
4512            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4513                                           res, argL, argR, size, True));
4514            return res;
4515         }
4516         case Iop_Sub8x16:
4517         case Iop_Sub16x8:
4518         case Iop_Sub32x4:
4519         case Iop_Sub64x2: {
4520            HReg res = newVRegV(env);
4521            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4522            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4523            UInt size;
4524            switch (e->Iex.Binop.op) {
4525               case Iop_Sub8x16: size = 0; break;
4526               case Iop_Sub16x8: size = 1; break;
4527               case Iop_Sub32x4: size = 2; break;
4528               case Iop_Sub64x2: size = 3; break;
4529               default:
4530                  ppIROp(e->Iex.Binop.op);
4531                  vpanic("Illegal element size in VSUB");
4532            }
4533            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4534                                           res, argL, argR, size, True));
4535            return res;
4536         }
4537         case Iop_Sub32Fx4: {
4538            HReg res = newVRegV(env);
4539            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4540            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4541            UInt size = 0;
4542            addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
4543                                           res, argL, argR, size, True));
4544            return res;
4545         }
4546         case Iop_QSub8Ux16:
4547         case Iop_QSub16Ux8:
4548         case Iop_QSub32Ux4:
4549         case Iop_QSub64Ux2: {
4550            HReg res = newVRegV(env);
4551            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4552            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4553            UInt size;
4554            switch (e->Iex.Binop.op) {
4555               case Iop_QSub8Ux16: size = 0; break;
4556               case Iop_QSub16Ux8: size = 1; break;
4557               case Iop_QSub32Ux4: size = 2; break;
4558               case Iop_QSub64Ux2: size = 3; break;
4559               default:
4560                  ppIROp(e->Iex.Binop.op);
4561                  vpanic("Illegal element size in VQSUBU");
4562            }
4563            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4564                                           res, argL, argR, size, True));
4565            return res;
4566         }
4567         case Iop_QSub8Sx16:
4568         case Iop_QSub16Sx8:
4569         case Iop_QSub32Sx4:
4570         case Iop_QSub64Sx2: {
4571            HReg res = newVRegV(env);
4572            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4573            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4574            UInt size;
4575            switch (e->Iex.Binop.op) {
4576               case Iop_QSub8Sx16: size = 0; break;
4577               case Iop_QSub16Sx8: size = 1; break;
4578               case Iop_QSub32Sx4: size = 2; break;
4579               case Iop_QSub64Sx2: size = 3; break;
4580               default:
4581                  ppIROp(e->Iex.Binop.op);
4582                  vpanic("Illegal element size in VQSUBS");
4583            }
4584            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4585                                           res, argL, argR, size, True));
4586            return res;
4587         }
4588         case Iop_Max8Ux16:
4589         case Iop_Max16Ux8:
4590         case Iop_Max32Ux4: {
4591            HReg res = newVRegV(env);
4592            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4593            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4594            UInt size;
4595            switch (e->Iex.Binop.op) {
4596               case Iop_Max8Ux16: size = 0; break;
4597               case Iop_Max16Ux8: size = 1; break;
4598               case Iop_Max32Ux4: size = 2; break;
4599               default: vpanic("Illegal element size in VMAXU");
4600            }
4601            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4602                                           res, argL, argR, size, True));
4603            return res;
4604         }
4605         case Iop_Max8Sx16:
4606         case Iop_Max16Sx8:
4607         case Iop_Max32Sx4: {
4608            HReg res = newVRegV(env);
4609            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4610            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4611            UInt size;
4612            switch (e->Iex.Binop.op) {
4613               case Iop_Max8Sx16: size = 0; break;
4614               case Iop_Max16Sx8: size = 1; break;
4615               case Iop_Max32Sx4: size = 2; break;
4616               default: vpanic("Illegal element size in VMAXU");
4617            }
4618            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4619                                           res, argL, argR, size, True));
4620            return res;
4621         }
4622         case Iop_Min8Ux16:
4623         case Iop_Min16Ux8:
4624         case Iop_Min32Ux4: {
4625            HReg res = newVRegV(env);
4626            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4627            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4628            UInt size;
4629            switch (e->Iex.Binop.op) {
4630               case Iop_Min8Ux16: size = 0; break;
4631               case Iop_Min16Ux8: size = 1; break;
4632               case Iop_Min32Ux4: size = 2; break;
4633               default: vpanic("Illegal element size in VMAXU");
4634            }
4635            addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4636                                           res, argL, argR, size, True));
4637            return res;
4638         }
4639         case Iop_Min8Sx16:
4640         case Iop_Min16Sx8:
4641         case Iop_Min32Sx4: {
4642            HReg res = newVRegV(env);
4643            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4644            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4645            UInt size;
4646            switch (e->Iex.Binop.op) {
4647               case Iop_Min8Sx16: size = 0; break;
4648               case Iop_Min16Sx8: size = 1; break;
4649               case Iop_Min32Sx4: size = 2; break;
4650               default: vpanic("Illegal element size in VMAXU");
4651            }
4652            addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4653                                           res, argL, argR, size, True));
4654            return res;
4655         }
4656         case Iop_Sar8x16:
4657         case Iop_Sar16x8:
4658         case Iop_Sar32x4:
4659         case Iop_Sar64x2: {
4660            HReg res = newVRegV(env);
4661            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4662            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4663            HReg argR2 = newVRegV(env);
4664            HReg zero = newVRegV(env);
4665            UInt size;
4666            switch (e->Iex.Binop.op) {
4667               case Iop_Sar8x16: size = 0; break;
4668               case Iop_Sar16x8: size = 1; break;
4669               case Iop_Sar32x4: size = 2; break;
4670               case Iop_Sar64x2: size = 3; break;
4671               default: vassert(0);
4672            }
4673            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4674            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4675                                           argR2, zero, argR, size, True));
4676            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4677                                          res, argL, argR2, size, True));
4678            return res;
4679         }
4680         case Iop_Sal8x16:
4681         case Iop_Sal16x8:
4682         case Iop_Sal32x4:
4683         case Iop_Sal64x2: {
4684            HReg res = newVRegV(env);
4685            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4686            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4687            UInt size;
4688            switch (e->Iex.Binop.op) {
4689               case Iop_Sal8x16: size = 0; break;
4690               case Iop_Sal16x8: size = 1; break;
4691               case Iop_Sal32x4: size = 2; break;
4692               case Iop_Sal64x2: size = 3; break;
4693               default: vassert(0);
4694            }
4695            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4696                                          res, argL, argR, size, True));
4697            return res;
4698         }
4699         case Iop_Shr8x16:
4700         case Iop_Shr16x8:
4701         case Iop_Shr32x4:
4702         case Iop_Shr64x2: {
4703            HReg res = newVRegV(env);
4704            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4705            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4706            HReg argR2 = newVRegV(env);
4707            HReg zero = newVRegV(env);
4708            UInt size;
4709            switch (e->Iex.Binop.op) {
4710               case Iop_Shr8x16: size = 0; break;
4711               case Iop_Shr16x8: size = 1; break;
4712               case Iop_Shr32x4: size = 2; break;
4713               case Iop_Shr64x2: size = 3; break;
4714               default: vassert(0);
4715            }
4716            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4717            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4718                                           argR2, zero, argR, size, True));
4719            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4720                                          res, argL, argR2, size, True));
4721            return res;
4722         }
4723         case Iop_Shl8x16:
4724         case Iop_Shl16x8:
4725         case Iop_Shl32x4:
4726         case Iop_Shl64x2: {
4727            HReg res = newVRegV(env);
4728            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4729            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4730            UInt size;
4731            switch (e->Iex.Binop.op) {
4732               case Iop_Shl8x16: size = 0; break;
4733               case Iop_Shl16x8: size = 1; break;
4734               case Iop_Shl32x4: size = 2; break;
4735               case Iop_Shl64x2: size = 3; break;
4736               default: vassert(0);
4737            }
4738            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4739                                          res, argL, argR, size, True));
4740            return res;
4741         }
4742         case Iop_QShl8x16:
4743         case Iop_QShl16x8:
4744         case Iop_QShl32x4:
4745         case Iop_QShl64x2: {
4746            HReg res = newVRegV(env);
4747            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4748            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4749            UInt size;
4750            switch (e->Iex.Binop.op) {
4751               case Iop_QShl8x16: size = 0; break;
4752               case Iop_QShl16x8: size = 1; break;
4753               case Iop_QShl32x4: size = 2; break;
4754               case Iop_QShl64x2: size = 3; break;
4755               default: vassert(0);
4756            }
4757            addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4758                                          res, argL, argR, size, True));
4759            return res;
4760         }
4761         case Iop_QSal8x16:
4762         case Iop_QSal16x8:
4763         case Iop_QSal32x4:
4764         case Iop_QSal64x2: {
4765            HReg res = newVRegV(env);
4766            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4767            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4768            UInt size;
4769            switch (e->Iex.Binop.op) {
4770               case Iop_QSal8x16: size = 0; break;
4771               case Iop_QSal16x8: size = 1; break;
4772               case Iop_QSal32x4: size = 2; break;
4773               case Iop_QSal64x2: size = 3; break;
4774               default: vassert(0);
4775            }
4776            addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4777                                          res, argL, argR, size, True));
4778            return res;
4779         }
4780         case Iop_QShlN8x16:
4781         case Iop_QShlN16x8:
4782         case Iop_QShlN32x4:
4783         case Iop_QShlN64x2: {
4784            HReg res = newVRegV(env);
4785            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4786            UInt size, imm;
4787            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4788                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4789               vpanic("ARM taget supports Iop_QShlNAxB with constant "
4790                      "second argument only\n");
4791            }
4792            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4793            switch (e->Iex.Binop.op) {
4794               case Iop_QShlN8x16: size = 8 | imm; break;
4795               case Iop_QShlN16x8: size = 16 | imm; break;
4796               case Iop_QShlN32x4: size = 32 | imm; break;
4797               case Iop_QShlN64x2: size = 64 | imm; break;
4798               default: vassert(0);
4799            }
4800            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4801                                          res, argL, size, True));
4802            return res;
4803         }
4804         case Iop_QShlN8Sx16:
4805         case Iop_QShlN16Sx8:
4806         case Iop_QShlN32Sx4:
4807         case Iop_QShlN64Sx2: {
4808            HReg res = newVRegV(env);
4809            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4810            UInt size, imm;
4811            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4812                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4813               vpanic("ARM taget supports Iop_QShlNASxB with constant "
4814                      "second argument only\n");
4815            }
4816            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4817            switch (e->Iex.Binop.op) {
4818               case Iop_QShlN8Sx16: size = 8 | imm; break;
4819               case Iop_QShlN16Sx8: size = 16 | imm; break;
4820               case Iop_QShlN32Sx4: size = 32 | imm; break;
4821               case Iop_QShlN64Sx2: size = 64 | imm; break;
4822               default: vassert(0);
4823            }
4824            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4825                                          res, argL, size, True));
4826            return res;
4827         }
4828         case Iop_QSalN8x16:
4829         case Iop_QSalN16x8:
4830         case Iop_QSalN32x4:
4831         case Iop_QSalN64x2: {
4832            HReg res = newVRegV(env);
4833            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4834            UInt size, imm;
4835            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4836                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4837               vpanic("ARM taget supports Iop_QShlNAxB with constant "
4838                      "second argument only\n");
4839            }
4840            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4841            switch (e->Iex.Binop.op) {
4842               case Iop_QSalN8x16: size = 8 | imm; break;
4843               case Iop_QSalN16x8: size = 16 | imm; break;
4844               case Iop_QSalN32x4: size = 32 | imm; break;
4845               case Iop_QSalN64x2: size = 64 | imm; break;
4846               default: vassert(0);
4847            }
4848            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4849                                          res, argL, size, True));
4850            return res;
4851         }
4852         case Iop_ShrN8x16:
4853         case Iop_ShrN16x8:
4854         case Iop_ShrN32x4:
4855         case Iop_ShrN64x2: {
4856            HReg res = newVRegV(env);
4857            HReg tmp = newVRegV(env);
4858            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4859            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4860            HReg argR2 = newVRegI(env);
4861            UInt size;
4862            switch (e->Iex.Binop.op) {
4863               case Iop_ShrN8x16: size = 0; break;
4864               case Iop_ShrN16x8: size = 1; break;
4865               case Iop_ShrN32x4: size = 2; break;
4866               case Iop_ShrN64x2: size = 3; break;
4867               default: vassert(0);
4868            }
4869            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4870            addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4871                                          tmp, argR2, 0, True));
4872            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4873                                          res, argL, tmp, size, True));
4874            return res;
4875         }
4876         case Iop_ShlN8x16:
4877         case Iop_ShlN16x8:
4878         case Iop_ShlN32x4:
4879         case Iop_ShlN64x2: {
4880            HReg res = newVRegV(env);
4881            HReg tmp = newVRegV(env);
4882            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4883            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4884            UInt size;
4885            switch (e->Iex.Binop.op) {
4886               case Iop_ShlN8x16: size = 0; break;
4887               case Iop_ShlN16x8: size = 1; break;
4888               case Iop_ShlN32x4: size = 2; break;
4889               case Iop_ShlN64x2: size = 3; break;
4890               default: vassert(0);
4891            }
4892            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4893            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4894                                          res, argL, tmp, size, True));
4895            return res;
4896         }
4897         case Iop_SarN8x16:
4898         case Iop_SarN16x8:
4899         case Iop_SarN32x4:
4900         case Iop_SarN64x2: {
4901            HReg res = newVRegV(env);
4902            HReg tmp = newVRegV(env);
4903            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4904            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4905            HReg argR2 = newVRegI(env);
4906            UInt size;
4907            switch (e->Iex.Binop.op) {
4908               case Iop_SarN8x16: size = 0; break;
4909               case Iop_SarN16x8: size = 1; break;
4910               case Iop_SarN32x4: size = 2; break;
4911               case Iop_SarN64x2: size = 3; break;
4912               default: vassert(0);
4913            }
4914            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4915            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4916            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4917                                          res, argL, tmp, size, True));
4918            return res;
4919         }
4920         case Iop_CmpGT8Ux16:
4921         case Iop_CmpGT16Ux8:
4922         case Iop_CmpGT32Ux4: {
4923            HReg res = newVRegV(env);
4924            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4925            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4926            UInt size;
4927            switch (e->Iex.Binop.op) {
4928               case Iop_CmpGT8Ux16: size = 0; break;
4929               case Iop_CmpGT16Ux8: size = 1; break;
4930               case Iop_CmpGT32Ux4: size = 2; break;
4931               default: vassert(0);
4932            }
4933            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
4934                                           res, argL, argR, size, True));
4935            return res;
4936         }
4937         case Iop_CmpGT8Sx16:
4938         case Iop_CmpGT16Sx8:
4939         case Iop_CmpGT32Sx4: {
4940            HReg res = newVRegV(env);
4941            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4942            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4943            UInt size;
4944            switch (e->Iex.Binop.op) {
4945               case Iop_CmpGT8Sx16: size = 0; break;
4946               case Iop_CmpGT16Sx8: size = 1; break;
4947               case Iop_CmpGT32Sx4: size = 2; break;
4948               default: vassert(0);
4949            }
4950            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
4951                                           res, argL, argR, size, True));
4952            return res;
4953         }
4954         case Iop_CmpEQ8x16:
4955         case Iop_CmpEQ16x8:
4956         case Iop_CmpEQ32x4: {
4957            HReg res = newVRegV(env);
4958            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4959            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4960            UInt size;
4961            switch (e->Iex.Binop.op) {
4962               case Iop_CmpEQ8x16: size = 0; break;
4963               case Iop_CmpEQ16x8: size = 1; break;
4964               case Iop_CmpEQ32x4: size = 2; break;
4965               default: vassert(0);
4966            }
4967            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
4968                                           res, argL, argR, size, True));
4969            return res;
4970         }
4971         case Iop_Mul8x16:
4972         case Iop_Mul16x8:
4973         case Iop_Mul32x4: {
4974            HReg res = newVRegV(env);
4975            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4976            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4977            UInt size = 0;
4978            switch(e->Iex.Binop.op) {
4979               case Iop_Mul8x16: size = 0; break;
4980               case Iop_Mul16x8: size = 1; break;
4981               case Iop_Mul32x4: size = 2; break;
4982               default: vassert(0);
4983            }
4984            addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
4985                                           res, argL, argR, size, True));
4986            return res;
4987         }
4988         case Iop_Mul32Fx4: {
4989            HReg res = newVRegV(env);
4990            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4991            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4992            UInt size = 0;
4993            addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
4994                                           res, argL, argR, size, True));
4995            return res;
4996         }
4997         case Iop_Mull8Ux8:
4998         case Iop_Mull16Ux4:
4999         case Iop_Mull32Ux2: {
5000            HReg res = newVRegV(env);
5001            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5002            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5003            UInt size = 0;
5004            switch(e->Iex.Binop.op) {
5005               case Iop_Mull8Ux8: size = 0; break;
5006               case Iop_Mull16Ux4: size = 1; break;
5007               case Iop_Mull32Ux2: size = 2; break;
5008               default: vassert(0);
5009            }
5010            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
5011                                           res, argL, argR, size, True));
5012            return res;
5013         }
5014
5015         case Iop_Mull8Sx8:
5016         case Iop_Mull16Sx4:
5017         case Iop_Mull32Sx2: {
5018            HReg res = newVRegV(env);
5019            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5020            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5021            UInt size = 0;
5022            switch(e->Iex.Binop.op) {
5023               case Iop_Mull8Sx8: size = 0; break;
5024               case Iop_Mull16Sx4: size = 1; break;
5025               case Iop_Mull32Sx2: size = 2; break;
5026               default: vassert(0);
5027            }
5028            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5029                                           res, argL, argR, size, True));
5030            return res;
5031         }
5032
5033         case Iop_QDMulHi16Sx8:
5034         case Iop_QDMulHi32Sx4: {
5035            HReg res = newVRegV(env);
5036            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5037            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5038            UInt size = 0;
5039            switch(e->Iex.Binop.op) {
5040               case Iop_QDMulHi16Sx8: size = 1; break;
5041               case Iop_QDMulHi32Sx4: size = 2; break;
5042               default: vassert(0);
5043            }
5044            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5045                                           res, argL, argR, size, True));
5046            return res;
5047         }
5048
5049         case Iop_QRDMulHi16Sx8:
5050         case Iop_QRDMulHi32Sx4: {
5051            HReg res = newVRegV(env);
5052            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5053            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5054            UInt size = 0;
5055            switch(e->Iex.Binop.op) {
5056               case Iop_QRDMulHi16Sx8: size = 1; break;
5057               case Iop_QRDMulHi32Sx4: size = 2; break;
5058               default: vassert(0);
5059            }
5060            addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5061                                           res, argL, argR, size, True));
5062            return res;
5063         }
5064
5065         case Iop_QDMulLong16Sx4:
5066         case Iop_QDMulLong32Sx2: {
5067            HReg res = newVRegV(env);
5068            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5069            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5070            UInt size = 0;
5071            switch(e->Iex.Binop.op) {
5072               case Iop_QDMulLong16Sx4: size = 1; break;
5073               case Iop_QDMulLong32Sx2: size = 2; break;
5074               default: vassert(0);
5075            }
5076            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5077                                           res, argL, argR, size, True));
5078            return res;
5079         }
5080         case Iop_PolynomialMul8x16: {
5081            HReg res = newVRegV(env);
5082            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5083            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5084            UInt size = 0;
5085            addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5086                                           res, argL, argR, size, True));
5087            return res;
5088         }
5089         case Iop_Max32Fx4: {
5090            HReg res = newVRegV(env);
5091            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5092            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5093            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5094                                           res, argL, argR, 2, True));
5095            return res;
5096         }
5097         case Iop_Min32Fx4: {
5098            HReg res = newVRegV(env);
5099            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5100            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5101            addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5102                                           res, argL, argR, 2, True));
5103            return res;
5104         }
5105         case Iop_PwMax32Fx4: {
5106            HReg res = newVRegV(env);
5107            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5108            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5109            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5110                                           res, argL, argR, 2, True));
5111            return res;
5112         }
5113         case Iop_PwMin32Fx4: {
5114            HReg res = newVRegV(env);
5115            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5116            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5117            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5118                                           res, argL, argR, 2, True));
5119            return res;
5120         }
5121         case Iop_CmpGT32Fx4: {
5122            HReg res = newVRegV(env);
5123            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5124            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5125            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5126                                           res, argL, argR, 2, True));
5127            return res;
5128         }
5129         case Iop_CmpGE32Fx4: {
5130            HReg res = newVRegV(env);
5131            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5132            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5133            addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5134                                           res, argL, argR, 2, True));
5135            return res;
5136         }
5137         case Iop_CmpEQ32Fx4: {
5138            HReg res = newVRegV(env);
5139            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5140            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5141            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5142                                           res, argL, argR, 2, True));
5143            return res;
5144         }
5145
5146         case Iop_PolynomialMull8x8: {
5147            HReg res = newVRegV(env);
5148            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5149            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5150            UInt size = 0;
5151            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5152                                           res, argL, argR, size, True));
5153            return res;
5154         }
5155         case Iop_F32ToFixed32Ux4_RZ:
5156         case Iop_F32ToFixed32Sx4_RZ:
5157         case Iop_Fixed32UToF32x4_RN:
5158         case Iop_Fixed32SToF32x4_RN: {
5159            HReg res = newVRegV(env);
5160            HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5161            ARMNeonUnOp op;
5162            UInt imm6;
5163            if (e->Iex.Binop.arg2->tag != Iex_Const ||
5164               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5165                  vpanic("ARM supports FP <-> Fixed conversion with constant "
5166                         "second argument less than 33 only\n");
5167            }
5168            imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5169            vassert(imm6 <= 32 && imm6 > 0);
5170            imm6 = 64 - imm6;
5171            switch(e->Iex.Binop.op) {
5172               case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5173               case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5174               case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5175               case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5176               default: vassert(0);
5177            }
5178            addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5179            return res;
5180         }
5181         /*
5182         FIXME remove if not used
5183         case Iop_VDup8x16:
5184         case Iop_VDup16x8:
5185         case Iop_VDup32x4: {
5186            HReg res = newVRegV(env);
5187            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5188            UInt imm4;
5189            UInt index;
5190            if (e->Iex.Binop.arg2->tag != Iex_Const ||
5191               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5192                  vpanic("ARM supports Iop_VDup with constant "
5193                         "second argument less than 16 only\n");
5194            }
5195            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5196            switch(e->Iex.Binop.op) {
5197               case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5198               case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5199               case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5200               default: vassert(0);
5201            }
5202            if (imm4 >= 16) {
5203               vpanic("ARM supports Iop_VDup with constant "
5204                      "second argument less than 16 only\n");
5205            }
5206            addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5207                                          res, argL, imm4, True));
5208            return res;
5209         }
5210         */
5211         case Iop_PwAdd8x16:
5212         case Iop_PwAdd16x8:
5213         case Iop_PwAdd32x4: {
5214            HReg res = newVRegV(env);
5215            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5216            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5217            UInt size = 0;
5218            switch(e->Iex.Binop.op) {
5219               case Iop_PwAdd8x16: size = 0; break;
5220               case Iop_PwAdd16x8: size = 1; break;
5221               case Iop_PwAdd32x4: size = 2; break;
5222               default: vassert(0);
5223            }
5224            addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5225                                           res, argL, argR, size, True));
5226            return res;
5227         }
5228         /* ... */
5229         default:
5230            break;
5231      }
5232   }
5233
5234   if (e->tag == Iex_Triop) {
5235      switch (e->Iex.Triop.op) {
5236         case Iop_ExtractV128: {
5237            HReg res = newVRegV(env);
5238            HReg argL = iselNeonExpr(env, e->Iex.Triop.arg1);
5239            HReg argR = iselNeonExpr(env, e->Iex.Triop.arg2);
5240            UInt imm4;
5241            if (e->Iex.Triop.arg3->tag != Iex_Const ||
5242                typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
5243               vpanic("ARM target supports Iop_ExtractV128 with constant "
5244                      "third argument less than 16 only\n");
5245            }
5246            imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
5247            if (imm4 >= 16) {
5248               vpanic("ARM target supports Iop_ExtractV128 with constant "
5249                      "third argument less than 16 only\n");
5250            }
5251            addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5252                                           res, argL, argR, imm4, True));
5253            return res;
5254         }
5255         default:
5256            break;
5257      }
5258   }
5259
5260   if (e->tag == Iex_Mux0X) {
5261      HReg r8;
5262      HReg rX  = iselNeonExpr(env, e->Iex.Mux0X.exprX);
5263      HReg r0  = iselNeonExpr(env, e->Iex.Mux0X.expr0);
5264      HReg dst = newVRegV(env);
5265      addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
5266      r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5267      addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5268                                      ARMRI84_I84(0xFF,0)));
5269      addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
5270      return dst;
5271   }
5272
5273  neon_expr_bad:
5274   ppIRExpr(e);
5275   vpanic("iselNeonExpr_wrk");
5276}
5277
5278/*---------------------------------------------------------*/
5279/*--- ISEL: Floating point expressions (64 bit)         ---*/
5280/*---------------------------------------------------------*/
5281
5282/* Compute a 64-bit floating point value into a register, the identity
5283   of which is returned.  As with iselIntExpr_R, the reg may be either
5284   real or virtual; in any case it must not be changed by subsequent
5285   code emitted by the caller.  */
5286
5287static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5288{
5289   HReg r = iselDblExpr_wrk( env, e );
5290#  if 0
5291   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5292#  endif
5293   vassert(hregClass(r) == HRcFlt64);
5294   vassert(hregIsVirtual(r));
5295   return r;
5296}
5297
5298/* DO NOT CALL THIS DIRECTLY */
5299static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5300{
5301   IRType ty = typeOfIRExpr(env->type_env,e);
5302   vassert(e);
5303   vassert(ty == Ity_F64);
5304
5305   if (e->tag == Iex_RdTmp) {
5306      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5307   }
5308
5309   if (e->tag == Iex_Const) {
5310      /* Just handle the zero case. */
5311      IRConst* con = e->Iex.Const.con;
5312      if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5313         HReg z32 = newVRegI(env);
5314         HReg dst = newVRegD(env);
5315         addInstr(env, ARMInstr_Imm32(z32, 0));
5316         addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5317         return dst;
5318      }
5319   }
5320
5321   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5322      ARMAModeV* am;
5323      HReg res = newVRegD(env);
5324      vassert(e->Iex.Load.ty == Ity_F64);
5325      am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5326      addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5327      return res;
5328   }
5329
5330   if (e->tag == Iex_Get) {
5331      // XXX This won't work if offset > 1020 or is not 0 % 4.
5332      // In which case we'll have to generate more longwinded code.
5333      ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5334      HReg       res = newVRegD(env);
5335      addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5336      return res;
5337   }
5338
5339   if (e->tag == Iex_Unop) {
5340      switch (e->Iex.Unop.op) {
5341         case Iop_ReinterpI64asF64: {
5342            if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5343               return iselNeon64Expr(env, e->Iex.Unop.arg);
5344            } else {
5345               HReg srcHi, srcLo;
5346               HReg dst = newVRegD(env);
5347               iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5348               addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5349               return dst;
5350            }
5351         }
5352         case Iop_NegF64: {
5353            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5354            HReg dst = newVRegD(env);
5355            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5356            return dst;
5357         }
5358         case Iop_AbsF64: {
5359            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5360            HReg dst = newVRegD(env);
5361            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5362            return dst;
5363         }
5364         case Iop_F32toF64: {
5365            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5366            HReg dst = newVRegD(env);
5367            addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5368            return dst;
5369         }
5370         case Iop_I32UtoF64:
5371         case Iop_I32StoF64: {
5372            HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
5373            HReg f32   = newVRegF(env);
5374            HReg dst   = newVRegD(env);
5375            Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5376            /* VMOV f32, src */
5377            addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5378            /* FSITOD dst, f32 */
5379            addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5380                                          dst, f32));
5381            return dst;
5382         }
5383         default:
5384            break;
5385      }
5386   }
5387
5388   if (e->tag == Iex_Binop) {
5389      switch (e->Iex.Binop.op) {
5390         case Iop_SqrtF64: {
5391            /* first arg is rounding mode; we ignore it. */
5392            HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5393            HReg dst = newVRegD(env);
5394            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5395            return dst;
5396         }
5397         default:
5398            break;
5399      }
5400   }
5401
5402   if (e->tag == Iex_Triop) {
5403      switch (e->Iex.Triop.op) {
5404         case Iop_DivF64:
5405         case Iop_MulF64:
5406         case Iop_AddF64:
5407         case Iop_SubF64: {
5408            ARMVfpOp op = 0; /*INVALID*/
5409            HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
5410            HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
5411            HReg dst  = newVRegD(env);
5412            switch (e->Iex.Triop.op) {
5413               case Iop_DivF64: op = ARMvfp_DIV; break;
5414               case Iop_MulF64: op = ARMvfp_MUL; break;
5415               case Iop_AddF64: op = ARMvfp_ADD; break;
5416               case Iop_SubF64: op = ARMvfp_SUB; break;
5417               default: vassert(0);
5418            }
5419            addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5420            return dst;
5421         }
5422         default:
5423            break;
5424      }
5425   }
5426
5427   if (e->tag == Iex_Mux0X) {
5428      if (ty == Ity_F64
5429          && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5430         HReg r8;
5431         HReg rX  = iselDblExpr(env, e->Iex.Mux0X.exprX);
5432         HReg r0  = iselDblExpr(env, e->Iex.Mux0X.expr0);
5433         HReg dst = newVRegD(env);
5434         addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
5435         r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5436         addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5437                                         ARMRI84_I84(0xFF,0)));
5438         addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
5439         return dst;
5440      }
5441   }
5442
5443   ppIRExpr(e);
5444   vpanic("iselDblExpr_wrk");
5445}
5446
5447
5448/*---------------------------------------------------------*/
5449/*--- ISEL: Floating point expressions (32 bit)         ---*/
5450/*---------------------------------------------------------*/
5451
5452/* Compute a 64-bit floating point value into a register, the identity
5453   of which is returned.  As with iselIntExpr_R, the reg may be either
5454   real or virtual; in any case it must not be changed by subsequent
5455   code emitted by the caller.  */
5456
5457static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5458{
5459   HReg r = iselFltExpr_wrk( env, e );
5460#  if 0
5461   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5462#  endif
5463   vassert(hregClass(r) == HRcFlt32);
5464   vassert(hregIsVirtual(r));
5465   return r;
5466}
5467
5468/* DO NOT CALL THIS DIRECTLY */
5469static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5470{
5471   IRType ty = typeOfIRExpr(env->type_env,e);
5472   vassert(e);
5473   vassert(ty == Ity_F32);
5474
5475   if (e->tag == Iex_RdTmp) {
5476      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5477   }
5478
5479   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5480      ARMAModeV* am;
5481      HReg res = newVRegF(env);
5482      vassert(e->Iex.Load.ty == Ity_F32);
5483      am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5484      addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5485      return res;
5486   }
5487
5488   if (e->tag == Iex_Get) {
5489      // XXX This won't work if offset > 1020 or is not 0 % 4.
5490      // In which case we'll have to generate more longwinded code.
5491      ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5492      HReg       res = newVRegF(env);
5493      addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5494      return res;
5495   }
5496
5497   if (e->tag == Iex_Unop) {
5498      switch (e->Iex.Unop.op) {
5499         case Iop_ReinterpI32asF32: {
5500            HReg dst = newVRegF(env);
5501            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5502            addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5503            return dst;
5504         }
5505         case Iop_NegF32: {
5506            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5507            HReg dst = newVRegF(env);
5508            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5509            return dst;
5510         }
5511         case Iop_AbsF32: {
5512            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5513            HReg dst = newVRegF(env);
5514            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5515            return dst;
5516         }
5517         default:
5518            break;
5519      }
5520   }
5521
5522   if (e->tag == Iex_Binop) {
5523      switch (e->Iex.Binop.op) {
5524         case Iop_SqrtF32: {
5525            /* first arg is rounding mode; we ignore it. */
5526            HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5527            HReg dst = newVRegF(env);
5528            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5529            return dst;
5530         }
5531         case Iop_F64toF32: {
5532            HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5533            set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5534            HReg valS = newVRegF(env);
5535            /* FCVTSD valS, valD */
5536            addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5537            set_VFP_rounding_default(env);
5538            return valS;
5539         }
5540         default:
5541            break;
5542      }
5543   }
5544
5545   if (e->tag == Iex_Triop) {
5546      switch (e->Iex.Triop.op) {
5547         case Iop_DivF32:
5548         case Iop_MulF32:
5549         case Iop_AddF32:
5550         case Iop_SubF32: {
5551            ARMVfpOp op = 0; /*INVALID*/
5552            HReg argL = iselFltExpr(env, e->Iex.Triop.arg2);
5553            HReg argR = iselFltExpr(env, e->Iex.Triop.arg3);
5554            HReg dst  = newVRegF(env);
5555            switch (e->Iex.Triop.op) {
5556               case Iop_DivF32: op = ARMvfp_DIV; break;
5557               case Iop_MulF32: op = ARMvfp_MUL; break;
5558               case Iop_AddF32: op = ARMvfp_ADD; break;
5559               case Iop_SubF32: op = ARMvfp_SUB; break;
5560               default: vassert(0);
5561            }
5562            addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5563            return dst;
5564         }
5565         default:
5566            break;
5567      }
5568   }
5569
5570   if (e->tag == Iex_Mux0X) {
5571      if (ty == Ity_F32
5572          && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5573         HReg r8;
5574         HReg rX  = iselFltExpr(env, e->Iex.Mux0X.exprX);
5575         HReg r0  = iselFltExpr(env, e->Iex.Mux0X.expr0);
5576         HReg dst = newVRegF(env);
5577         addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
5578         r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5579         addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5580                                         ARMRI84_I84(0xFF,0)));
5581         addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
5582         return dst;
5583      }
5584   }
5585
5586   ppIRExpr(e);
5587   vpanic("iselFltExpr_wrk");
5588}
5589
5590
5591/*---------------------------------------------------------*/
5592/*--- ISEL: Statements                                  ---*/
5593/*---------------------------------------------------------*/
5594
5595static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5596{
5597   if (vex_traceflags & VEX_TRACE_VCODE) {
5598      vex_printf("\n-- ");
5599      ppIRStmt(stmt);
5600      vex_printf("\n");
5601   }
5602   switch (stmt->tag) {
5603
5604   /* --------- STORE --------- */
5605   /* little-endian write to memory */
5606   case Ist_Store: {
5607      IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5608      IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5609      IREndness end  = stmt->Ist.Store.end;
5610
5611      if (tya != Ity_I32 || end != Iend_LE)
5612         goto stmt_fail;
5613
5614      if (tyd == Ity_I32) {
5615         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5616         ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5617         addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5618         return;
5619      }
5620      if (tyd == Ity_I16) {
5621         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5622         ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5623         addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
5624                                       False/*!isSignedLoad*/, rD, am));
5625         return;
5626      }
5627      if (tyd == Ity_I8) {
5628         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5629         ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5630         addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
5631         return;
5632      }
5633      if (tyd == Ity_I64) {
5634         if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5635            HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5636            ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5637            addInstr(env, ARMInstr_NLdStD(False, dD, am));
5638         } else {
5639            HReg rDhi, rDlo, rA;
5640            iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5641            rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5642            addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
5643                                          ARMAMode1_RI(rA,4)));
5644            addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
5645                                          ARMAMode1_RI(rA,0)));
5646         }
5647         return;
5648      }
5649      if (tyd == Ity_F64) {
5650         HReg       dD = iselDblExpr(env, stmt->Ist.Store.data);
5651         ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5652         addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5653         return;
5654      }
5655      if (tyd == Ity_F32) {
5656         HReg       fD = iselFltExpr(env, stmt->Ist.Store.data);
5657         ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5658         addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5659         return;
5660      }
5661      if (tyd == Ity_V128) {
5662         HReg       qD = iselNeonExpr(env, stmt->Ist.Store.data);
5663         ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5664         addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5665         return;
5666      }
5667
5668      break;
5669   }
5670
5671   /* --------- PUT --------- */
5672   /* write guest state, fixed offset */
5673   case Ist_Put: {
5674       IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
5675
5676       if (tyd == Ity_I32) {
5677           HReg       rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5678           ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5679           addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5680           return;
5681       }
5682       if (tyd == Ity_I64) {
5683          if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5684             HReg addr = newVRegI(env);
5685             HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5686             addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5687                                                stmt->Ist.Put.offset));
5688             addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5689          } else {
5690             HReg rDhi, rDlo;
5691             ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5692                                           stmt->Ist.Put.offset + 0);
5693             ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5694                                           stmt->Ist.Put.offset + 4);
5695             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5696             addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
5697             addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
5698          }
5699          return;
5700       }
5701       if (tyd == Ity_F64) {
5702          // XXX This won't work if offset > 1020 or is not 0 % 4.
5703          // In which case we'll have to generate more longwinded code.
5704          ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5705          HReg       rD = iselDblExpr(env, stmt->Ist.Put.data);
5706          addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5707          return;
5708       }
5709       if (tyd == Ity_F32) {
5710          // XXX This won't work if offset > 1020 or is not 0 % 4.
5711          // In which case we'll have to generate more longwinded code.
5712          ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5713          HReg       rD = iselFltExpr(env, stmt->Ist.Put.data);
5714          addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5715          return;
5716       }
5717       if (tyd == Ity_V128) {
5718          HReg addr = newVRegI(env);
5719          HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5720          addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5721                                       stmt->Ist.Put.offset));
5722          addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5723          return;
5724       }
5725       break;
5726   }
5727
5728//zz   /* --------- Indexed PUT --------- */
5729//zz   /* write guest state, run-time offset */
5730//zz   case Ist_PutI: {
5731//zz      ARMAMode2* am2
5732//zz           = genGuestArrayOffset(
5733//zz               env, stmt->Ist.PutI.descr,
5734//zz               stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
5735//zz
5736//zz       IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
5737//zz
5738//zz       if (tyd == Ity_I8) {
5739//zz           HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
5740//zz           addInstr(env, ARMInstr_StoreB(reg, am2));
5741//zz           return;
5742//zz       }
5743//zz// CAB: Ity_I32, Ity_I16 ?
5744//zz       break;
5745//zz   }
5746
5747   /* --------- TMP --------- */
5748   /* assign value to temporary */
5749   case Ist_WrTmp: {
5750      IRTemp tmp = stmt->Ist.WrTmp.tmp;
5751      IRType ty = typeOfIRTemp(env->type_env, tmp);
5752
5753      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
5754         ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5755                                          env, stmt->Ist.WrTmp.data);
5756         HReg     dst  = lookupIRTemp(env, tmp);
5757         addInstr(env, ARMInstr_Mov(dst,ri84));
5758         return;
5759      }
5760      if (ty == Ity_I1) {
5761         HReg        dst  = lookupIRTemp(env, tmp);
5762         ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5763         addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5764         addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5765         return;
5766      }
5767      if (ty == Ity_I64) {
5768         if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5769            HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5770            HReg dst = lookupIRTemp(env, tmp);
5771            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5772         } else {
5773            HReg rHi, rLo, dstHi, dstLo;
5774            iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5775            lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5776            addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5777            addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5778         }
5779         return;
5780      }
5781      if (ty == Ity_F64) {
5782         HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5783         HReg dst = lookupIRTemp(env, tmp);
5784         addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5785         return;
5786      }
5787      if (ty == Ity_F32) {
5788         HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5789         HReg dst = lookupIRTemp(env, tmp);
5790         addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5791         return;
5792      }
5793      if (ty == Ity_V128) {
5794         HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5795         HReg dst = lookupIRTemp(env, tmp);
5796         addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5797         return;
5798      }
5799      break;
5800   }
5801
5802   /* --------- Call to DIRTY helper --------- */
5803   /* call complex ("dirty") helper function */
5804   case Ist_Dirty: {
5805      IRType   retty;
5806      IRDirty* d = stmt->Ist.Dirty.details;
5807      Bool     passBBP = False;
5808
5809      if (d->nFxState == 0)
5810         vassert(!d->needsBBP);
5811
5812      passBBP = toBool(d->nFxState > 0 && d->needsBBP);
5813
5814      /* Marshal args, do the call, clear stack. */
5815      Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
5816      if (!ok)
5817         break; /* will go to stmt_fail: */
5818
5819      /* Now figure out what to do with the returned value, if any. */
5820      if (d->tmp == IRTemp_INVALID)
5821         /* No return value.  Nothing to do. */
5822         return;
5823
5824      retty = typeOfIRTemp(env->type_env, d->tmp);
5825
5826      if (retty == Ity_I64) {
5827         if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5828            HReg tmp = lookupIRTemp(env, d->tmp);
5829            addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
5830                                                     hregARM_R0()));
5831         } else {
5832            HReg dstHi, dstLo;
5833            /* The returned value is in r1:r0.  Park it in the
5834               register-pair associated with tmp. */
5835            lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
5836            addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
5837            addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
5838         }
5839         return;
5840      }
5841      if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
5842         /* The returned value is in r0.  Park it in the register
5843            associated with tmp. */
5844         HReg dst = lookupIRTemp(env, d->tmp);
5845         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
5846         return;
5847      }
5848
5849      break;
5850   }
5851
5852   /* --------- Load Linked and Store Conditional --------- */
5853   case Ist_LLSC: {
5854      if (stmt->Ist.LLSC.storedata == NULL) {
5855         /* LL */
5856         IRTemp res = stmt->Ist.LLSC.result;
5857         IRType ty  = typeOfIRTemp(env->type_env, res);
5858         if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
5859            Int  szB   = 0;
5860            HReg r_dst = lookupIRTemp(env, res);
5861            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5862            switch (ty) {
5863               case Ity_I8:  szB = 1; break;
5864               case Ity_I16: szB = 2; break;
5865               case Ity_I32: szB = 4; break;
5866               default:      vassert(0);
5867            }
5868            addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
5869            addInstr(env, ARMInstr_LdrEX(szB));
5870            addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
5871            return;
5872         }
5873         if (ty == Ity_I64) {
5874            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5875            addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
5876            addInstr(env, ARMInstr_LdrEX(8));
5877            /* Result is in r3:r2.  On a non-NEON capable CPU, we must
5878               move it into a result register pair.  On a NEON capable
5879               CPU, the result register will be a 64 bit NEON
5880               register, so we must move it there instead. */
5881            if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5882               HReg dst = lookupIRTemp(env, res);
5883               addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
5884                                                        hregARM_R2()));
5885            } else {
5886               HReg r_dst_hi, r_dst_lo;
5887               lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
5888               addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
5889               addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
5890            }
5891            return;
5892         }
5893         /*NOTREACHED*/
5894         vassert(0);
5895      } else {
5896         /* SC */
5897         IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
5898         if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
5899            Int  szB = 0;
5900            HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
5901            HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5902            switch (tyd) {
5903               case Ity_I8:  szB = 1; break;
5904               case Ity_I16: szB = 2; break;
5905               case Ity_I32: szB = 4; break;
5906               default:      vassert(0);
5907            }
5908            addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
5909            addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
5910            addInstr(env, ARMInstr_StrEX(szB));
5911         } else {
5912            vassert(tyd == Ity_I64);
5913            /* This is really ugly.  There is no is/is-not NEON
5914               decision akin to the case for LL, because iselInt64Expr
5915               fudges this for us, and always gets the result into two
5916               GPRs even if this means moving it from a NEON
5917               register. */
5918            HReg rDhi, rDlo;
5919            iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
5920            HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5921            addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
5922            addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
5923            addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
5924            addInstr(env, ARMInstr_StrEX(8));
5925         }
5926         /* now r0 is 1 if failed, 0 if success.  Change to IR
5927            conventions (0 is fail, 1 is success).  Also transfer
5928            result to r_res. */
5929         IRTemp   res   = stmt->Ist.LLSC.result;
5930         IRType   ty    = typeOfIRTemp(env->type_env, res);
5931         HReg     r_res = lookupIRTemp(env, res);
5932         ARMRI84* one   = ARMRI84_I84(1,0);
5933         vassert(ty == Ity_I1);
5934         addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
5935         /* And be conservative -- mask off all but the lowest bit */
5936         addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
5937         return;
5938      }
5939      break;
5940   }
5941
5942   /* --------- MEM FENCE --------- */
5943   case Ist_MBE:
5944      switch (stmt->Ist.MBE.event) {
5945         case Imbe_Fence:
5946            addInstr(env, ARMInstr_MFence());
5947            return;
5948         case Imbe_CancelReservation:
5949            addInstr(env, ARMInstr_CLREX());
5950            return;
5951         default:
5952            break;
5953      }
5954      break;
5955
5956   /* --------- INSTR MARK --------- */
5957   /* Doesn't generate any executable code ... */
5958   case Ist_IMark:
5959       return;
5960
5961   /* --------- NO-OP --------- */
5962   case Ist_NoOp:
5963       return;
5964
5965   /* --------- EXIT --------- */
5966   case Ist_Exit: {
5967      HReg        gnext;
5968      ARMCondCode cc;
5969      if (stmt->Ist.Exit.dst->tag != Ico_U32)
5970         vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
5971      gnext = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
5972      cc    = iselCondCode(env, stmt->Ist.Exit.guard);
5973      addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
5974      addInstr(env, ARMInstr_Goto(stmt->Ist.Exit.jk, cc, gnext));
5975      return;
5976   }
5977
5978   default: break;
5979   }
5980  stmt_fail:
5981   ppIRStmt(stmt);
5982   vpanic("iselStmt");
5983}
5984
5985
5986/*---------------------------------------------------------*/
5987/*--- ISEL: Basic block terminators (Nexts)             ---*/
5988/*---------------------------------------------------------*/
5989
5990static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
5991{
5992   HReg rDst;
5993   if (vex_traceflags & VEX_TRACE_VCODE) {
5994      vex_printf("\n-- goto {");
5995      ppIRJumpKind(jk);
5996      vex_printf("} ");
5997      ppIRExpr(next);
5998      vex_printf("\n");
5999   }
6000   rDst = iselIntExpr_R(env, next);
6001   addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
6002   addInstr(env, ARMInstr_Goto(jk, ARMcc_AL, rDst));
6003}
6004
6005
6006/*---------------------------------------------------------*/
6007/*--- Insn selector top-level                           ---*/
6008/*---------------------------------------------------------*/
6009
6010/* Translate an entire SB to arm code. */
6011
6012HInstrArray* iselSB_ARM ( IRSB* bb, VexArch      arch_host,
6013                                    VexArchInfo* archinfo_host,
6014                                    VexAbiInfo*  vbi/*UNUSED*/ )
6015{
6016   Int      i, j;
6017   HReg     hreg, hregHI;
6018   ISelEnv* env;
6019   UInt     hwcaps_host = archinfo_host->hwcaps;
6020   static UInt counter = 0;
6021
6022   /* sanity ... */
6023   vassert(arch_host == VexArchARM);
6024
6025   /* hwcaps should not change from one ISEL call to another. */
6026   arm_hwcaps = hwcaps_host;
6027
6028   /* Make up an initial environment to use. */
6029   env = LibVEX_Alloc(sizeof(ISelEnv));
6030   env->vreg_ctr = 0;
6031
6032   /* Set up output code array. */
6033   env->code = newHInstrArray();
6034
6035   /* Copy BB's type env. */
6036   env->type_env = bb->tyenv;
6037
6038   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
6039      change as we go along. */
6040   env->n_vregmap = bb->tyenv->types_used;
6041   env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6042   env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6043
6044   /* For each IR temporary, allocate a suitably-kinded virtual
6045      register. */
6046   j = 0;
6047   for (i = 0; i < env->n_vregmap; i++) {
6048      hregHI = hreg = INVALID_HREG;
6049      switch (bb->tyenv->types[i]) {
6050         case Ity_I1:
6051         case Ity_I8:
6052         case Ity_I16:
6053         case Ity_I32:  hreg   = mkHReg(j++, HRcInt32, True); break;
6054         case Ity_I64:
6055            if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
6056               hreg = mkHReg(j++, HRcFlt64, True);
6057            } else {
6058               hregHI = mkHReg(j++, HRcInt32, True);
6059               hreg   = mkHReg(j++, HRcInt32, True);
6060            }
6061            break;
6062         case Ity_F32:  hreg   = mkHReg(j++, HRcFlt32, True); break;
6063         case Ity_F64:  hreg   = mkHReg(j++, HRcFlt64, True); break;
6064         case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True); break;
6065         default: ppIRType(bb->tyenv->types[i]);
6066                  vpanic("iselBB: IRTemp type");
6067      }
6068      env->vregmap[i]   = hreg;
6069      env->vregmapHI[i] = hregHI;
6070   }
6071   env->vreg_ctr = j;
6072
6073   /* Keep a copy of the link reg, since any call to a helper function
6074      will trash it, and we can't get back to the dispatcher once that
6075      happens. */
6076   env->savedLR = newVRegI(env);
6077   addInstr(env, mk_iMOVds_RR(env->savedLR, hregARM_R14()));
6078
6079   /* Ok, finally we can iterate over the statements. */
6080   for (i = 0; i < bb->stmts_used; i++)
6081      iselStmt(env,bb->stmts[i]);
6082
6083   iselNext(env,bb->next,bb->jumpkind);
6084
6085   /* record the number of vregs we used. */
6086   env->code->n_vregs = env->vreg_ctr;
6087   counter++;
6088   return env->code;
6089}
6090
6091
6092/*---------------------------------------------------------------*/
6093/*--- end                                     host_arm_isel.c ---*/
6094/*---------------------------------------------------------------*/
6095