1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_arm_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2010 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2010 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex_ir.h"
38#include "libvex.h"
39#include "ir_match.h"
40
41#include "main_util.h"
42#include "main_globals.h"
43#include "host_generic_regs.h"
44#include "host_generic_simd64.h"  // for 32-bit SIMD helpers
45#include "host_arm_defs.h"
46
47
48/*---------------------------------------------------------*/
49/*--- ARMvfp control word stuff                         ---*/
50/*---------------------------------------------------------*/
51
52/* Vex-generated code expects to run with the FPU set as follows: all
53   exceptions masked, round-to-nearest, non-vector mode, with the NZCV
54   flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
55   this corresponds to a FPSCR value of zero.
56
57   fpscr should therefore be zero on entry to Vex-generated code, and
58   should be unchanged at exit.  (Or at least the bottom 28 bits
59   should be zero).
60*/
61
62#define DEFAULT_FPSCR 0
63
64
65/*---------------------------------------------------------*/
66/*--- ISelEnv                                           ---*/
67/*---------------------------------------------------------*/
68
69/* This carries around:
70
71   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
72     might encounter.  This is computed before insn selection starts,
73     and does not change.
74
75   - A mapping from IRTemp to HReg.  This tells the insn selector
76     which virtual register(s) are associated with each IRTemp
77     temporary.  This is computed before insn selection starts, and
78     does not change.  We expect this mapping to map precisely the
79     same set of IRTemps as the type mapping does.
80
81        - vregmap   holds the primary register for the IRTemp.
82        - vregmapHI is only used for 64-bit integer-typed
83             IRTemps.  It holds the identity of a second
84             32-bit virtual HReg, which holds the high half
85             of the value.
86
87   - The name of the vreg in which we stash a copy of the link reg, so
88     helper functions don't kill it.
89
90   - The code array, that is, the insns selected so far.
91
92   - A counter, for generating new virtual registers.
93
94   - The host hardware capabilities word.  This is set at the start
95     and does not change.
96
97   Note, this is all host-independent.  */
98
99typedef
100   struct {
101      IRTypeEnv*   type_env;
102
103      HReg*        vregmap;
104      HReg*        vregmapHI;
105      Int          n_vregmap;
106
107      HReg         savedLR;
108
109      HInstrArray* code;
110
111      Int          vreg_ctr;
112
113      UInt         hwcaps;
114   }
115   ISelEnv;
116
117static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
118{
119   vassert(tmp >= 0);
120   vassert(tmp < env->n_vregmap);
121   return env->vregmap[tmp];
122}
123
124static void lookupIRTemp64 ( HReg* vrHI, HReg* vrLO, ISelEnv* env, IRTemp tmp )
125{
126   vassert(tmp >= 0);
127   vassert(tmp < env->n_vregmap);
128   vassert(env->vregmapHI[tmp] != INVALID_HREG);
129   *vrLO = env->vregmap[tmp];
130   *vrHI = env->vregmapHI[tmp];
131}
132
133static void addInstr ( ISelEnv* env, ARMInstr* instr )
134{
135   addHInstr(env->code, instr);
136   if (vex_traceflags & VEX_TRACE_VCODE) {
137      ppARMInstr(instr);
138      vex_printf("\n");
139   }
140#if 0
141   if (instr->tag == ARMin_NUnary || instr->tag == ARMin_NBinary
142         || instr->tag == ARMin_NUnaryS || instr->tag == ARMin_NBinaryS
143         || instr->tag == ARMin_NDual || instr->tag == ARMin_NShift) {
144      ppARMInstr(instr);
145      vex_printf("\n");
146   }
147#endif
148}
149
150static HReg newVRegI ( ISelEnv* env )
151{
152   HReg reg = mkHReg(env->vreg_ctr, HRcInt32, True/*virtual reg*/);
153   env->vreg_ctr++;
154   return reg;
155}
156
157static HReg newVRegD ( ISelEnv* env )
158{
159   HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
160   env->vreg_ctr++;
161   return reg;
162}
163
164static HReg newVRegF ( ISelEnv* env )
165{
166   HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
167   env->vreg_ctr++;
168   return reg;
169}
170
171static HReg newVRegV ( ISelEnv* env )
172{
173   HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
174   env->vreg_ctr++;
175   return reg;
176}
177
178/* These are duplicated in guest_arm_toIR.c */
179static IRExpr* unop ( IROp op, IRExpr* a )
180{
181   return IRExpr_Unop(op, a);
182}
183
184static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
185{
186   return IRExpr_Binop(op, a1, a2);
187}
188
189static IRExpr* bind ( Int binder )
190{
191   return IRExpr_Binder(binder);
192}
193
194
195/*---------------------------------------------------------*/
196/*--- ISEL: Forward declarations                        ---*/
197/*---------------------------------------------------------*/
198
199/* These are organised as iselXXX and iselXXX_wrk pairs.  The
200   iselXXX_wrk do the real work, but are not to be called directly.
201   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
202   checks that all returned registers are virtual.  You should not
203   call the _wrk version directly.
204*/
205static ARMAMode1*  iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e );
206static ARMAMode1*  iselIntExpr_AMode1     ( ISelEnv* env, IRExpr* e );
207
208static ARMAMode2*  iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e );
209static ARMAMode2*  iselIntExpr_AMode2     ( ISelEnv* env, IRExpr* e );
210
211static ARMAModeV*  iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
212static ARMAModeV*  iselIntExpr_AModeV     ( ISelEnv* env, IRExpr* e );
213
214static ARMAModeN* iselIntExpr_AModeN_wrk  ( ISelEnv* env, IRExpr* e );
215static ARMAModeN* iselIntExpr_AModeN      ( ISelEnv* env, IRExpr* e );
216
217static ARMRI84*    iselIntExpr_RI84_wrk
218        ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
219static ARMRI84*    iselIntExpr_RI84
220        ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
221
222static ARMRI5*     iselIntExpr_RI5_wrk    ( ISelEnv* env, IRExpr* e );
223static ARMRI5*     iselIntExpr_RI5        ( ISelEnv* env, IRExpr* e );
224
225static ARMCondCode iselCondCode_wrk       ( ISelEnv* env, IRExpr* e );
226static ARMCondCode iselCondCode           ( ISelEnv* env, IRExpr* e );
227
228static HReg        iselIntExpr_R_wrk      ( ISelEnv* env, IRExpr* e );
229static HReg        iselIntExpr_R          ( ISelEnv* env, IRExpr* e );
230
231static void        iselInt64Expr_wrk      ( HReg* rHi, HReg* rLo,
232                                            ISelEnv* env, IRExpr* e );
233static void        iselInt64Expr          ( HReg* rHi, HReg* rLo,
234                                            ISelEnv* env, IRExpr* e );
235
236static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
237static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
238
239static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
240static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
241
242static HReg        iselNeon64Expr_wrk     ( ISelEnv* env, IRExpr* e );
243static HReg        iselNeon64Expr         ( ISelEnv* env, IRExpr* e );
244
245static HReg        iselNeonExpr_wrk       ( ISelEnv* env, IRExpr* e );
246static HReg        iselNeonExpr           ( ISelEnv* env, IRExpr* e );
247
248/*---------------------------------------------------------*/
249/*--- ISEL: Misc helpers                                ---*/
250/*---------------------------------------------------------*/
251
252static UInt ROR32 ( UInt x, UInt sh ) {
253   vassert(sh >= 0 && sh < 32);
254   if (sh == 0)
255      return x;
256   else
257      return (x << (32-sh)) | (x >> sh);
258}
259
260/* Figure out if 'u' fits in the special shifter-operand 8x4 immediate
261   form, and if so return the components. */
262static Bool fitsIn8x4 ( /*OUT*/UInt* u8, /*OUT*/UInt* u4, UInt u )
263{
264   UInt i;
265   for (i = 0; i < 16; i++) {
266      if (0 == (u & 0xFFFFFF00)) {
267         *u8 = u;
268         *u4 = i;
269         return True;
270      }
271      u = ROR32(u, 30);
272   }
273   vassert(i == 16);
274   return False;
275}
276
277/* Make a int reg-reg move. */
278static ARMInstr* mk_iMOVds_RR ( HReg dst, HReg src )
279{
280   vassert(hregClass(src) == HRcInt32);
281   vassert(hregClass(dst) == HRcInt32);
282   return ARMInstr_Mov(dst, ARMRI84_R(src));
283}
284
285/* Set the VFP unit's rounding mode to default (round to nearest). */
286static void set_VFP_rounding_default ( ISelEnv* env )
287{
288   /* mov rTmp, #DEFAULT_FPSCR
289      fmxr fpscr, rTmp
290   */
291   HReg rTmp = newVRegI(env);
292   addInstr(env, ARMInstr_Imm32(rTmp, DEFAULT_FPSCR));
293   addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, rTmp));
294}
295
296/* Mess with the VFP unit's rounding mode: 'mode' is an I32-typed
297   expression denoting a value in the range 0 .. 3, indicating a round
298   mode encoded as per type IRRoundingMode.  Set FPSCR to have the
299   same rounding.
300*/
301static
302void set_VFP_rounding_mode ( ISelEnv* env, IRExpr* mode )
303{
304   /* This isn't simple, because 'mode' carries an IR rounding
305      encoding, and we need to translate that to an ARMvfp one:
306      The IR encoding:
307         00  to nearest (the default)
308         10  to +infinity
309         01  to -infinity
310         11  to zero
311      The ARMvfp encoding:
312         00  to nearest
313         01  to +infinity
314         10  to -infinity
315         11  to zero
316      Easy enough to do; just swap the two bits.
317   */
318   HReg irrm = iselIntExpr_R(env, mode);
319   HReg tL   = newVRegI(env);
320   HReg tR   = newVRegI(env);
321   HReg t3   = newVRegI(env);
322   /* tL = irrm << 1;
323      tR = irrm >> 1;  if we're lucky, these will issue together
324      tL &= 2;
325      tR &= 1;         ditto
326      t3 = tL | tR;
327      t3 <<= 22;
328      fmxr fpscr, t3
329   */
330   addInstr(env, ARMInstr_Shift(ARMsh_SHL, tL, irrm, ARMRI5_I5(1)));
331   addInstr(env, ARMInstr_Shift(ARMsh_SHR, tR, irrm, ARMRI5_I5(1)));
332   addInstr(env, ARMInstr_Alu(ARMalu_AND, tL, tL, ARMRI84_I84(2,0)));
333   addInstr(env, ARMInstr_Alu(ARMalu_AND, tR, tR, ARMRI84_I84(1,0)));
334   addInstr(env, ARMInstr_Alu(ARMalu_OR, t3, tL, ARMRI84_R(tR)));
335   addInstr(env, ARMInstr_Shift(ARMsh_SHL, t3, t3, ARMRI5_I5(22)));
336   addInstr(env, ARMInstr_FPSCR(True/*toFPSCR*/, t3));
337}
338
339
340/*---------------------------------------------------------*/
341/*--- ISEL: Function call helpers                       ---*/
342/*---------------------------------------------------------*/
343
344/* Used only in doHelperCall.  See big comment in doHelperCall re
345   handling of register-parameter args.  This function figures out
346   whether evaluation of an expression might require use of a fixed
347   register.  If in doubt return True (safe but suboptimal).
348*/
349static
350Bool mightRequireFixedRegs ( IRExpr* e )
351{
352   switch (e->tag) {
353   case Iex_RdTmp: case Iex_Const: case Iex_Get:
354      return False;
355   default:
356      return True;
357   }
358}
359
360
361/* Do a complete function call.  guard is a Ity_Bit expression
362   indicating whether or not the call happens.  If guard==NULL, the
363   call is unconditional.  Returns True iff it managed to handle this
364   combination of arg/return types, else returns False. */
365
366static
367Bool doHelperCall ( ISelEnv* env,
368                    Bool passBBP,
369                    IRExpr* guard, IRCallee* cee, IRExpr** args )
370{
371   ARMCondCode cc;
372   HReg        argregs[ARM_N_ARGREGS];
373   HReg        tmpregs[ARM_N_ARGREGS];
374   Bool        go_fast;
375   Int         n_args, i, nextArgReg;
376   ULong       target;
377
378   vassert(ARM_N_ARGREGS == 4);
379
380   /* Marshal args for a call and do the call.
381
382      If passBBP is True, r8 (the baseblock pointer) is to be passed
383      as the first arg.
384
385      This function only deals with a tiny set of possibilities, which
386      cover all helpers in practice.  The restrictions are that only
387      arguments in registers are supported, hence only ARM_N_REGPARMS
388      x 32 integer bits in total can be passed.  In fact the only
389      supported arg types are I32 and I64.
390
391      Generating code which is both efficient and correct when
392      parameters are to be passed in registers is difficult, for the
393      reasons elaborated in detail in comments attached to
394      doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
395      of the method described in those comments.
396
397      The problem is split into two cases: the fast scheme and the
398      slow scheme.  In the fast scheme, arguments are computed
399      directly into the target (real) registers.  This is only safe
400      when we can be sure that computation of each argument will not
401      trash any real registers set by computation of any other
402      argument.
403
404      In the slow scheme, all args are first computed into vregs, and
405      once they are all done, they are moved to the relevant real
406      regs.  This always gives correct code, but it also gives a bunch
407      of vreg-to-rreg moves which are usually redundant but are hard
408      for the register allocator to get rid of.
409
410      To decide which scheme to use, all argument expressions are
411      first examined.  If they are all so simple that it is clear they
412      will be evaluated without use of any fixed registers, use the
413      fast scheme, else use the slow scheme.  Note also that only
414      unconditional calls may use the fast scheme, since having to
415      compute a condition expression could itself trash real
416      registers.
417
418      Note this requires being able to examine an expression and
419      determine whether or not evaluation of it might use a fixed
420      register.  That requires knowledge of how the rest of this insn
421      selector works.  Currently just the following 3 are regarded as
422      safe -- hopefully they cover the majority of arguments in
423      practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
424   */
425
426   /* Note that the cee->regparms field is meaningless on ARM hosts
427      (since there is only one calling convention) and so we always
428      ignore it. */
429
430   n_args = 0;
431   for (i = 0; args[i]; i++)
432      n_args++;
433
434   argregs[0] = hregARM_R0();
435   argregs[1] = hregARM_R1();
436   argregs[2] = hregARM_R2();
437   argregs[3] = hregARM_R3();
438
439   tmpregs[0] = tmpregs[1] = tmpregs[2] =
440   tmpregs[3] = INVALID_HREG;
441
442   /* First decide which scheme (slow or fast) is to be used.  First
443      assume the fast scheme, and select slow if any contraindications
444      (wow) appear. */
445
446   go_fast = True;
447
448   if (guard) {
449      if (guard->tag == Iex_Const
450          && guard->Iex.Const.con->tag == Ico_U1
451          && guard->Iex.Const.con->Ico.U1 == True) {
452         /* unconditional */
453      } else {
454         /* Not manifestly unconditional -- be conservative. */
455         go_fast = False;
456      }
457   }
458
459   if (go_fast) {
460      for (i = 0; i < n_args; i++) {
461         if (mightRequireFixedRegs(args[i])) {
462            go_fast = False;
463            break;
464         }
465      }
466   }
467   /* At this point the scheme to use has been established.  Generate
468      code to get the arg values into the argument rregs.  If we run
469      out of arg regs, give up. */
470
471   if (go_fast) {
472
473      /* FAST SCHEME */
474      nextArgReg = 0;
475      if (passBBP) {
476         addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
477                                     hregARM_R8() ));
478         nextArgReg++;
479      }
480
481      for (i = 0; i < n_args; i++) {
482         IRType aTy = typeOfIRExpr(env->type_env, args[i]);
483         if (nextArgReg >= ARM_N_ARGREGS)
484            return False; /* out of argregs */
485         if (aTy == Ity_I32) {
486            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg],
487                                        iselIntExpr_R(env, args[i]) ));
488            nextArgReg++;
489         }
490         else if (aTy == Ity_I64) {
491            /* 64-bit args must be passed in an a reg-pair of the form
492               n:n+1, where n is even.  Hence either r0:r1 or r2:r3.
493               On a little-endian host, the less significant word is
494               passed in the lower-numbered register. */
495            if (nextArgReg & 1) {
496               if (nextArgReg >= ARM_N_ARGREGS)
497                  return False; /* out of argregs */
498               addInstr(env, ARMInstr_Imm32( argregs[nextArgReg], 0xAA ));
499               nextArgReg++;
500            }
501            if (nextArgReg >= ARM_N_ARGREGS)
502               return False; /* out of argregs */
503            HReg raHi, raLo;
504            iselInt64Expr(&raHi, &raLo, env, args[i]);
505            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raLo ));
506            nextArgReg++;
507            addInstr(env, mk_iMOVds_RR( argregs[nextArgReg], raHi ));
508            nextArgReg++;
509         }
510         else
511            return False; /* unhandled arg type */
512      }
513
514      /* Fast scheme only applies for unconditional calls.  Hence: */
515      cc = ARMcc_AL;
516
517   } else {
518
519      /* SLOW SCHEME; move via temporaries */
520      nextArgReg = 0;
521
522      if (passBBP) {
523         /* This is pretty stupid; better to move directly to r0
524            after the rest of the args are done. */
525         tmpregs[nextArgReg] = newVRegI(env);
526         addInstr(env, mk_iMOVds_RR( tmpregs[nextArgReg],
527                                     hregARM_R8() ));
528         nextArgReg++;
529      }
530
531      for (i = 0; i < n_args; i++) {
532         IRType aTy = typeOfIRExpr(env->type_env, args[i]);
533         if (nextArgReg >= ARM_N_ARGREGS)
534            return False; /* out of argregs */
535         if (aTy == Ity_I32) {
536            tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
537            nextArgReg++;
538         }
539         else if (aTy == Ity_I64) {
540            /* Same comment applies as in the Fast-scheme case. */
541            if (nextArgReg & 1)
542               nextArgReg++;
543            if (nextArgReg + 1 >= ARM_N_ARGREGS)
544               return False; /* out of argregs */
545            HReg raHi, raLo;
546            iselInt64Expr(&raHi, &raLo, env, args[i]);
547            tmpregs[nextArgReg] = raLo;
548            nextArgReg++;
549            tmpregs[nextArgReg] = raHi;
550            nextArgReg++;
551         }
552      }
553
554      /* Now we can compute the condition.  We can't do it earlier
555         because the argument computations could trash the condition
556         codes.  Be a bit clever to handle the common case where the
557         guard is 1:Bit. */
558      cc = ARMcc_AL;
559      if (guard) {
560         if (guard->tag == Iex_Const
561             && guard->Iex.Const.con->tag == Ico_U1
562             && guard->Iex.Const.con->Ico.U1 == True) {
563            /* unconditional -- do nothing */
564         } else {
565            cc = iselCondCode( env, guard );
566         }
567      }
568
569      /* Move the args to their final destinations. */
570      for (i = 0; i < nextArgReg; i++) {
571         if (tmpregs[i] == INVALID_HREG) { // Skip invalid regs
572            addInstr(env, ARMInstr_Imm32( argregs[i], 0xAA ));
573            continue;
574         }
575         /* None of these insns, including any spill code that might
576            be generated, may alter the condition codes. */
577         addInstr( env, mk_iMOVds_RR( argregs[i], tmpregs[i] ) );
578      }
579
580   }
581
582   /* Should be assured by checks above */
583   vassert(nextArgReg <= ARM_N_ARGREGS);
584
585   target = (HWord)Ptr_to_ULong(cee->addr);
586
587   /* nextArgReg doles out argument registers.  Since these are
588      assigned in the order r0, r1, r2, r3, its numeric value at this
589      point, which must be between 0 and 4 inclusive, is going to be
590      equal to the number of arg regs in use for the call.  Hence bake
591      that number into the call (we'll need to know it when doing
592      register allocation, to know what regs the call reads.)
593
594      There is a bit of a twist -- harmless but worth recording.
595      Suppose the arg types are (Ity_I32, Ity_I64).  Then we will have
596      the first arg in r0 and the second in r3:r2, but r1 isn't used.
597      We nevertheless have nextArgReg==4 and bake that into the call
598      instruction.  This will mean the register allocator wil believe
599      this insn reads r1 when in fact it doesn't.  But that's
600      harmless; it just artificially extends the live range of r1
601      unnecessarily.  The best fix would be to put into the
602      instruction, a bitmask indicating which of r0/1/2/3 carry live
603      values.  But that's too much hassle. */
604
605   /* Finally, the call itself. */
606   addInstr(env, ARMInstr_Call( cc, target, nextArgReg ));
607
608   return True; /* success */
609}
610
611
612/*---------------------------------------------------------*/
613/*--- ISEL: Integer expressions (32/16/8 bit)           ---*/
614/*---------------------------------------------------------*/
615
616/* Select insns for an integer-typed expression, and add them to the
617   code list.  Return a reg holding the result.  This reg will be a
618   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
619   want to modify it, ask for a new vreg, copy it in there, and modify
620   the copy.  The register allocator will do its best to map both
621   vregs to the same real register, so the copies will often disappear
622   later in the game.
623
624   This should handle expressions of 32, 16 and 8-bit type.  All
625   results are returned in a 32-bit register.  For 16- and 8-bit
626   expressions, the upper 16/24 bits are arbitrary, so you should mask
627   or sign extend partial values if necessary.
628*/
629
630/* --------------------- AMode1 --------------------- */
631
632/* Return an AMode1 which computes the value of the specified
633   expression, possibly also adding insns to the code list as a
634   result.  The expression may only be a 32-bit one.
635*/
636
637static Bool sane_AMode1 ( ARMAMode1* am )
638{
639   switch (am->tag) {
640      case ARMam1_RI:
641         return
642            toBool( hregClass(am->ARMam1.RI.reg) == HRcInt32
643                    && (hregIsVirtual(am->ARMam1.RI.reg)
644                        || am->ARMam1.RI.reg == hregARM_R8())
645                    && am->ARMam1.RI.simm13 >= -4095
646                    && am->ARMam1.RI.simm13 <= 4095 );
647      case ARMam1_RRS:
648         return
649            toBool( hregClass(am->ARMam1.RRS.base) == HRcInt32
650                    && hregIsVirtual(am->ARMam1.RRS.base)
651                    && hregClass(am->ARMam1.RRS.index) == HRcInt32
652                    && hregIsVirtual(am->ARMam1.RRS.index)
653                    && am->ARMam1.RRS.shift >= 0
654                    && am->ARMam1.RRS.shift <= 3 );
655      default:
656         vpanic("sane_AMode: unknown ARM AMode1 tag");
657   }
658}
659
660static ARMAMode1* iselIntExpr_AMode1 ( ISelEnv* env, IRExpr* e )
661{
662   ARMAMode1* am = iselIntExpr_AMode1_wrk(env, e);
663   vassert(sane_AMode1(am));
664   return am;
665}
666
667static ARMAMode1* iselIntExpr_AMode1_wrk ( ISelEnv* env, IRExpr* e )
668{
669   IRType ty = typeOfIRExpr(env->type_env,e);
670   vassert(ty == Ity_I32);
671
672   /* FIXME: add RRS matching */
673
674   /* {Add32,Sub32}(expr,simm13) */
675   if (e->tag == Iex_Binop
676       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
677       && e->Iex.Binop.arg2->tag == Iex_Const
678       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
679      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
680      if (simm >= -4095 && simm <= 4095) {
681         HReg reg;
682         if (e->Iex.Binop.op == Iop_Sub32)
683            simm = -simm;
684         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
685         return ARMAMode1_RI(reg, simm);
686      }
687   }
688
689   /* Doesn't match anything in particular.  Generate it into
690      a register and use that. */
691   {
692      HReg reg = iselIntExpr_R(env, e);
693      return ARMAMode1_RI(reg, 0);
694   }
695
696}
697
698
699/* --------------------- AMode2 --------------------- */
700
701/* Return an AMode2 which computes the value of the specified
702   expression, possibly also adding insns to the code list as a
703   result.  The expression may only be a 32-bit one.
704*/
705
706static Bool sane_AMode2 ( ARMAMode2* am )
707{
708   switch (am->tag) {
709      case ARMam2_RI:
710         return
711            toBool( hregClass(am->ARMam2.RI.reg) == HRcInt32
712                    && hregIsVirtual(am->ARMam2.RI.reg)
713                    && am->ARMam2.RI.simm9 >= -255
714                    && am->ARMam2.RI.simm9 <= 255 );
715      case ARMam2_RR:
716         return
717            toBool( hregClass(am->ARMam2.RR.base) == HRcInt32
718                    && hregIsVirtual(am->ARMam2.RR.base)
719                    && hregClass(am->ARMam2.RR.index) == HRcInt32
720                    && hregIsVirtual(am->ARMam2.RR.index) );
721      default:
722         vpanic("sane_AMode: unknown ARM AMode2 tag");
723   }
724}
725
726static ARMAMode2* iselIntExpr_AMode2 ( ISelEnv* env, IRExpr* e )
727{
728   ARMAMode2* am = iselIntExpr_AMode2_wrk(env, e);
729   vassert(sane_AMode2(am));
730   return am;
731}
732
733static ARMAMode2* iselIntExpr_AMode2_wrk ( ISelEnv* env, IRExpr* e )
734{
735   IRType ty = typeOfIRExpr(env->type_env,e);
736   vassert(ty == Ity_I32);
737
738   /* FIXME: add RR matching */
739
740   /* {Add32,Sub32}(expr,simm8) */
741   if (e->tag == Iex_Binop
742       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
743       && e->Iex.Binop.arg2->tag == Iex_Const
744       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
745      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
746      if (simm >= -255 && simm <= 255) {
747         HReg reg;
748         if (e->Iex.Binop.op == Iop_Sub32)
749            simm = -simm;
750         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
751         return ARMAMode2_RI(reg, simm);
752      }
753   }
754
755   /* Doesn't match anything in particular.  Generate it into
756      a register and use that. */
757   {
758      HReg reg = iselIntExpr_R(env, e);
759      return ARMAMode2_RI(reg, 0);
760   }
761
762}
763
764
765/* --------------------- AModeV --------------------- */
766
767/* Return an AModeV which computes the value of the specified
768   expression, possibly also adding insns to the code list as a
769   result.  The expression may only be a 32-bit one.
770*/
771
772static Bool sane_AModeV ( ARMAModeV* am )
773{
774  return toBool( hregClass(am->reg) == HRcInt32
775                 && hregIsVirtual(am->reg)
776                 && am->simm11 >= -1020 && am->simm11 <= 1020
777                 && 0 == (am->simm11 & 3) );
778}
779
780static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
781{
782   ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
783   vassert(sane_AModeV(am));
784   return am;
785}
786
787static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
788{
789   IRType ty = typeOfIRExpr(env->type_env,e);
790   vassert(ty == Ity_I32);
791
792   /* {Add32,Sub32}(expr, simm8 << 2) */
793   if (e->tag == Iex_Binop
794       && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
795       && e->Iex.Binop.arg2->tag == Iex_Const
796       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
797      Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
798      if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
799         HReg reg;
800         if (e->Iex.Binop.op == Iop_Sub32)
801            simm = -simm;
802         reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
803         return mkARMAModeV(reg, simm);
804      }
805   }
806
807   /* Doesn't match anything in particular.  Generate it into
808      a register and use that. */
809   {
810      HReg reg = iselIntExpr_R(env, e);
811      return mkARMAModeV(reg, 0);
812   }
813
814}
815
816/* -------------------- AModeN -------------------- */
817
818static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
819{
820   return iselIntExpr_AModeN_wrk(env, e);
821}
822
823static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
824{
825   HReg reg = iselIntExpr_R(env, e);
826   return mkARMAModeN_R(reg);
827}
828
829
830/* --------------------- RI84 --------------------- */
831
832/* Select instructions to generate 'e' into a RI84.  If mayInv is
833   true, then the caller will also accept an I84 form that denotes
834   'not e'.  In this case didInv may not be NULL, and *didInv is set
835   to True.  This complication is so as to allow generation of an RI84
836   which is suitable for use in either an AND or BIC instruction,
837   without knowing (before this call) which one.
838*/
839static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
840                                   ISelEnv* env, IRExpr* e )
841{
842   ARMRI84* ri;
843   if (mayInv)
844      vassert(didInv != NULL);
845   ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
846   /* sanity checks ... */
847   switch (ri->tag) {
848      case ARMri84_I84:
849         return ri;
850      case ARMri84_R:
851         vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
852         vassert(hregIsVirtual(ri->ARMri84.R.reg));
853         return ri;
854      default:
855         vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
856   }
857}
858
859/* DO NOT CALL THIS DIRECTLY ! */
860static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
861                                       ISelEnv* env, IRExpr* e )
862{
863   IRType ty = typeOfIRExpr(env->type_env,e);
864   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
865
866   if (didInv) *didInv = False;
867
868   /* special case: immediate */
869   if (e->tag == Iex_Const) {
870      UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
871      switch (e->Iex.Const.con->tag) {
872         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
873         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
874         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
875         default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
876      }
877      if (fitsIn8x4(&u8, &u4, u)) {
878         return ARMRI84_I84( (UShort)u8, (UShort)u4 );
879      }
880      if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
881         vassert(didInv);
882         *didInv = True;
883         return ARMRI84_I84( (UShort)u8, (UShort)u4 );
884      }
885      /* else fail, fall through to default case */
886   }
887
888   /* default case: calculate into a register and return that */
889   {
890      HReg r = iselIntExpr_R ( env, e );
891      return ARMRI84_R(r);
892   }
893}
894
895
896/* --------------------- RI5 --------------------- */
897
898/* Select instructions to generate 'e' into a RI5. */
899
900static ARMRI5* iselIntExpr_RI5 ( ISelEnv* env, IRExpr* e )
901{
902   ARMRI5* ri = iselIntExpr_RI5_wrk(env, e);
903   /* sanity checks ... */
904   switch (ri->tag) {
905      case ARMri5_I5:
906         return ri;
907      case ARMri5_R:
908         vassert(hregClass(ri->ARMri5.R.reg) == HRcInt32);
909         vassert(hregIsVirtual(ri->ARMri5.R.reg));
910         return ri;
911      default:
912         vpanic("iselIntExpr_RI5: unknown arm RI5 tag");
913   }
914}
915
916/* DO NOT CALL THIS DIRECTLY ! */
917static ARMRI5* iselIntExpr_RI5_wrk ( ISelEnv* env, IRExpr* e )
918{
919   IRType ty = typeOfIRExpr(env->type_env,e);
920   vassert(ty == Ity_I32 || ty == Ity_I8);
921
922   /* special case: immediate */
923   if (e->tag == Iex_Const) {
924      UInt u; /* both invalid */
925      switch (e->Iex.Const.con->tag) {
926         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
927         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
928         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
929         default: vpanic("iselIntExpr_RI5.Iex_Const(armh)");
930      }
931      if (u >= 1 && u <= 31) {
932         return ARMRI5_I5(u);
933      }
934      /* else fail, fall through to default case */
935   }
936
937   /* default case: calculate into a register and return that */
938   {
939      HReg r = iselIntExpr_R ( env, e );
940      return ARMRI5_R(r);
941   }
942}
943
944
945/* ------------------- CondCode ------------------- */
946
947/* Generate code to evaluated a bit-typed expression, returning the
948   condition code which would correspond when the expression would
949   notionally have returned 1. */
950
951static ARMCondCode iselCondCode ( ISelEnv* env, IRExpr* e )
952{
953   ARMCondCode cc = iselCondCode_wrk(env,e);
954   vassert(cc != ARMcc_NV);
955   return cc;
956}
957
958static ARMCondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
959{
960   vassert(e);
961   vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
962
963   /* var */
964   if (e->tag == Iex_RdTmp) {
965      HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
966      /* CmpOrTst doesn't modify rTmp; so this is OK. */
967      ARMRI84* one  = ARMRI84_I84(1,0);
968      addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
969      return ARMcc_NE;
970   }
971
972   /* Not1(e) */
973   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
974      /* Generate code for the arg, and negate the test condition */
975      return 1 ^ iselCondCode(env, e->Iex.Unop.arg);
976   }
977
978   /* --- patterns rooted at: 32to1 --- */
979
980   if (e->tag == Iex_Unop
981       && e->Iex.Unop.op == Iop_32to1) {
982      HReg     rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
983      ARMRI84* one  = ARMRI84_I84(1,0);
984      addInstr(env, ARMInstr_CmpOrTst(False/*test*/, rTmp, one));
985      return ARMcc_NE;
986   }
987
988   /* --- patterns rooted at: CmpNEZ8 --- */
989
990   if (e->tag == Iex_Unop
991       && e->Iex.Unop.op == Iop_CmpNEZ8) {
992      HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
993      ARMRI84* xFF  = ARMRI84_I84(0xFF,0);
994      addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r1, xFF));
995      return ARMcc_NE;
996   }
997
998   /* --- patterns rooted at: CmpNEZ32 --- */
999
1000   if (e->tag == Iex_Unop
1001       && e->Iex.Unop.op == Iop_CmpNEZ32) {
1002      HReg     r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1003      ARMRI84* zero = ARMRI84_I84(0,0);
1004      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r1, zero));
1005      return ARMcc_NE;
1006   }
1007
1008   /* --- patterns rooted at: CmpNEZ64 --- */
1009
1010   if (e->tag == Iex_Unop
1011       && e->Iex.Unop.op == Iop_CmpNEZ64) {
1012      HReg     tHi, tLo;
1013      HReg     tmp  = newVRegI(env);
1014      ARMRI84* zero = ARMRI84_I84(0,0);
1015      iselInt64Expr(&tHi, &tLo, env, e->Iex.Unop.arg);
1016      addInstr(env, ARMInstr_Alu(ARMalu_OR, tmp, tHi, ARMRI84_R(tLo)));
1017      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, tmp, zero));
1018      return ARMcc_NE;
1019   }
1020
1021   /* --- Cmp*32*(x,y) --- */
1022   if (e->tag == Iex_Binop
1023       && (e->Iex.Binop.op == Iop_CmpEQ32
1024           || e->Iex.Binop.op == Iop_CmpNE32
1025           || e->Iex.Binop.op == Iop_CmpLT32S
1026           || e->Iex.Binop.op == Iop_CmpLT32U
1027           || e->Iex.Binop.op == Iop_CmpLE32S
1028           || e->Iex.Binop.op == Iop_CmpLE32U)) {
1029      HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1030      ARMRI84* argR = iselIntExpr_RI84(NULL,False,
1031                                       env, e->Iex.Binop.arg2);
1032      addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL, argR));
1033      switch (e->Iex.Binop.op) {
1034         case Iop_CmpEQ32:  return ARMcc_EQ;
1035         case Iop_CmpNE32:  return ARMcc_NE;
1036         case Iop_CmpLT32S: return ARMcc_LT;
1037         case Iop_CmpLT32U: return ARMcc_LO;
1038         case Iop_CmpLE32S: return ARMcc_LE;
1039         case Iop_CmpLE32U: return ARMcc_LS;
1040         default: vpanic("iselCondCode(arm): CmpXX32");
1041      }
1042   }
1043
1044   /* --- CasCmpEQ* --- */
1045   /* Ist_Cas has a dummy argument to compare with, so comparison is
1046      always true. */
1047   if (e->tag == Iex_Binop
1048       && (e->Iex.Binop.op == Iop_CasCmpEQ32
1049           || e->Iex.Binop.op == Iop_CasCmpEQ16
1050           || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1051      return ARMcc_AL;
1052   }
1053
1054   ppIRExpr(e);
1055   vpanic("iselCondCode");
1056}
1057
1058
1059/* --------------------- Reg --------------------- */
1060
1061static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1062{
1063   HReg r = iselIntExpr_R_wrk(env, e);
1064   /* sanity checks ... */
1065#  if 0
1066   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1067#  endif
1068   vassert(hregClass(r) == HRcInt32);
1069   vassert(hregIsVirtual(r));
1070   return r;
1071}
1072
1073/* DO NOT CALL THIS DIRECTLY ! */
1074static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1075{
1076   IRType ty = typeOfIRExpr(env->type_env,e);
1077   vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1078//   vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1079
1080   switch (e->tag) {
1081
1082   /* --------- TEMP --------- */
1083   case Iex_RdTmp: {
1084      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1085   }
1086
1087   /* --------- LOAD --------- */
1088   case Iex_Load: {
1089      HReg dst  = newVRegI(env);
1090
1091      if (e->Iex.Load.end != Iend_LE)
1092         goto irreducible;
1093
1094      if (ty == Ity_I32) {
1095         ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1096         addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, dst, amode));
1097         return dst;
1098      }
1099      if (ty == Ity_I16) {
1100         ARMAMode2* amode = iselIntExpr_AMode2 ( env, e->Iex.Load.addr );
1101         addInstr(env, ARMInstr_LdSt16(True/*isLoad*/, False/*!signedLoad*/,
1102                                       dst, amode));
1103         return dst;
1104      }
1105      if (ty == Ity_I8) {
1106         ARMAMode1* amode = iselIntExpr_AMode1 ( env, e->Iex.Load.addr );
1107         addInstr(env, ARMInstr_LdSt8U(True/*isLoad*/, dst, amode));
1108         return dst;
1109      }
1110
1111//zz      if (ty == Ity_I16) {
1112//zz         addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1113//zz         return dst;
1114//zz      }
1115//zz      if (ty == Ity_I8) {
1116//zz         addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1117//zz         return dst;
1118//zz      }
1119      break;
1120   }
1121
1122//zz   /* --------- TERNARY OP --------- */
1123//zz   case Iex_Triop: {
1124//zz      /* C3210 flags following FPU partial remainder (fprem), both
1125//zz         IEEE compliant (PREM1) and non-IEEE compliant (PREM). */
1126//zz      if (e->Iex.Triop.op == Iop_PRemC3210F64
1127//zz          || e->Iex.Triop.op == Iop_PRem1C3210F64) {
1128//zz         HReg junk = newVRegF(env);
1129//zz         HReg dst  = newVRegI(env);
1130//zz         HReg srcL = iselDblExpr(env, e->Iex.Triop.arg2);
1131//zz         HReg srcR = iselDblExpr(env, e->Iex.Triop.arg3);
1132//zz         /* XXXROUNDINGFIXME */
1133//zz         /* set roundingmode here */
1134//zz         addInstr(env, X86Instr_FpBinary(
1135//zz                           e->Iex.Binop.op==Iop_PRemC3210F64
1136//zz                              ? Xfp_PREM : Xfp_PREM1,
1137//zz                           srcL,srcR,junk
1138//zz                 ));
1139//zz         /* The previous pseudo-insn will have left the FPU's C3210
1140//zz            flags set correctly.  So bag them. */
1141//zz         addInstr(env, X86Instr_FpStSW_AX());
1142//zz         addInstr(env, mk_iMOVsd_RR(hregX86_EAX(), dst));
1143//zz         addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(0x4700), dst));
1144//zz         return dst;
1145//zz      }
1146//zz
1147//zz      break;
1148//zz   }
1149
1150   /* --------- BINARY OP --------- */
1151   case Iex_Binop: {
1152
1153      ARMAluOp   aop = 0; /* invalid */
1154      ARMShiftOp sop = 0; /* invalid */
1155
1156      /* ADD/SUB/AND/OR/XOR */
1157      switch (e->Iex.Binop.op) {
1158         case Iop_And32: {
1159            Bool     didInv = False;
1160            HReg     dst    = newVRegI(env);
1161            HReg     argL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
1162            ARMRI84* argR   = iselIntExpr_RI84(&didInv, True/*mayInv*/,
1163                                               env, e->Iex.Binop.arg2);
1164            addInstr(env, ARMInstr_Alu(didInv ? ARMalu_BIC : ARMalu_AND,
1165                                       dst, argL, argR));
1166            return dst;
1167         }
1168         case Iop_Or32:  aop = ARMalu_OR;  goto std_binop;
1169         case Iop_Xor32: aop = ARMalu_XOR; goto std_binop;
1170         case Iop_Sub32: aop = ARMalu_SUB; goto std_binop;
1171         case Iop_Add32: aop = ARMalu_ADD; goto std_binop;
1172         std_binop: {
1173            HReg     dst  = newVRegI(env);
1174            HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1175            ARMRI84* argR = iselIntExpr_RI84(NULL, False/*mayInv*/,
1176                                             env, e->Iex.Binop.arg2);
1177            addInstr(env, ARMInstr_Alu(aop, dst, argL, argR));
1178            return dst;
1179         }
1180         default: break;
1181      }
1182
1183      /* SHL/SHR/SAR */
1184      switch (e->Iex.Binop.op) {
1185         case Iop_Shl32: sop = ARMsh_SHL; goto sh_binop;
1186         case Iop_Shr32: sop = ARMsh_SHR; goto sh_binop;
1187         case Iop_Sar32: sop = ARMsh_SAR; goto sh_binop;
1188         sh_binop: {
1189            HReg    dst  = newVRegI(env);
1190            HReg    argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1191            ARMRI5* argR = iselIntExpr_RI5(env, e->Iex.Binop.arg2);
1192            addInstr(env, ARMInstr_Shift(sop, dst, argL, argR));
1193            vassert(ty == Ity_I32); /* else the IR is ill-typed */
1194            return dst;
1195         }
1196         default: break;
1197      }
1198
1199      /* MUL */
1200      if (e->Iex.Binop.op == Iop_Mul32) {
1201         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1202         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1203         HReg dst  = newVRegI(env);
1204         addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1205         addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1206         addInstr(env, ARMInstr_Mul(ARMmul_PLAIN));
1207         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1208         return dst;
1209      }
1210
1211      /* Handle misc other ops. */
1212
1213      if (e->Iex.Binop.op == Iop_Max32U) {
1214         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1215         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1216         HReg dst  = newVRegI(env);
1217         addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, argL,
1218                                         ARMRI84_R(argR)));
1219         addInstr(env, mk_iMOVds_RR(dst, argL));
1220         addInstr(env, ARMInstr_CMov(ARMcc_LO, dst, ARMRI84_R(argR)));
1221         return dst;
1222      }
1223
1224      if (e->Iex.Binop.op == Iop_CmpF64) {
1225         HReg dL = iselDblExpr(env, e->Iex.Binop.arg1);
1226         HReg dR = iselDblExpr(env, e->Iex.Binop.arg2);
1227         HReg dst = newVRegI(env);
1228         /* Do the compare (FCMPD) and set NZCV in FPSCR.  Then also do
1229            FMSTAT, so we can examine the results directly. */
1230         addInstr(env, ARMInstr_VCmpD(dL, dR));
1231         /* Create in dst, the IRCmpF64Result encoded result. */
1232         addInstr(env, ARMInstr_Imm32(dst, 0));
1233         addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, ARMRI84_I84(0x40,0))); //EQ
1234         addInstr(env, ARMInstr_CMov(ARMcc_MI, dst, ARMRI84_I84(0x01,0))); //LT
1235         addInstr(env, ARMInstr_CMov(ARMcc_GT, dst, ARMRI84_I84(0x00,0))); //GT
1236         addInstr(env, ARMInstr_CMov(ARMcc_VS, dst, ARMRI84_I84(0x45,0))); //UN
1237         return dst;
1238      }
1239
1240      if (e->Iex.Binop.op == Iop_F64toI32S
1241          || e->Iex.Binop.op == Iop_F64toI32U) {
1242         /* Wretched uglyness all round, due to having to deal
1243            with rounding modes.  Oh well. */
1244         /* FIXME: if arg1 is a constant indicating round-to-zero,
1245            then we could skip all this arsing around with FPSCR and
1246            simply emit FTO{S,U}IZD. */
1247         Bool syned = e->Iex.Binop.op == Iop_F64toI32S;
1248         HReg valD  = iselDblExpr(env, e->Iex.Binop.arg2);
1249         set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
1250         /* FTO{S,U}ID valF, valD */
1251         HReg valF = newVRegF(env);
1252         addInstr(env, ARMInstr_VCvtID(False/*!iToD*/, syned,
1253                                       valF, valD));
1254         set_VFP_rounding_default(env);
1255         /* VMOV dst, valF */
1256         HReg dst = newVRegI(env);
1257         addInstr(env, ARMInstr_VXferS(False/*!toS*/, valF, dst));
1258         return dst;
1259      }
1260
1261      if (e->Iex.Binop.op == Iop_GetElem8x8
1262          || e->Iex.Binop.op == Iop_GetElem16x4
1263          || e->Iex.Binop.op == Iop_GetElem32x2) {
1264         HReg res = newVRegI(env);
1265         HReg arg = iselNeon64Expr(env, e->Iex.Triop.arg1);
1266         UInt index, size;
1267         if (e->Iex.Binop.arg2->tag != Iex_Const ||
1268             typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1269            vpanic("ARM target supports GetElem with constant "
1270                   "second argument only\n");
1271         }
1272         index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1273         switch (e->Iex.Binop.op) {
1274            case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1275            case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1276            case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1277            default: vassert(0);
1278         }
1279         addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1280                                        mkARMNRS(ARMNRS_Reg, res, 0),
1281                                        mkARMNRS(ARMNRS_Scalar, arg, index),
1282                                        size, False));
1283         return res;
1284      }
1285
1286      if (e->Iex.Binop.op == Iop_GetElem8x16
1287          || e->Iex.Binop.op == Iop_GetElem16x8
1288          || e->Iex.Binop.op == Iop_GetElem32x4) {
1289         HReg res = newVRegI(env);
1290         HReg arg = iselNeonExpr(env, e->Iex.Triop.arg1);
1291         UInt index, size;
1292         if (e->Iex.Binop.arg2->tag != Iex_Const ||
1293             typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1294            vpanic("ARM target supports GetElem with constant "
1295                   "second argument only\n");
1296         }
1297         index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1298         switch (e->Iex.Binop.op) {
1299            case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1300            case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1301            case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1302            default: vassert(0);
1303         }
1304         addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1305                                        mkARMNRS(ARMNRS_Reg, res, 0),
1306                                        mkARMNRS(ARMNRS_Scalar, arg, index),
1307                                        size, True));
1308         return res;
1309      }
1310
1311      /* All cases involving host-side helper calls. */
1312      void* fn = NULL;
1313      switch (e->Iex.Binop.op) {
1314         case Iop_Add16x2:
1315            fn = &h_generic_calc_Add16x2; break;
1316         case Iop_Sub16x2:
1317            fn = &h_generic_calc_Sub16x2; break;
1318         case Iop_HAdd16Ux2:
1319            fn = &h_generic_calc_HAdd16Ux2; break;
1320         case Iop_HAdd16Sx2:
1321            fn = &h_generic_calc_HAdd16Sx2; break;
1322         case Iop_HSub16Ux2:
1323            fn = &h_generic_calc_HSub16Ux2; break;
1324         case Iop_HSub16Sx2:
1325            fn = &h_generic_calc_HSub16Sx2; break;
1326         case Iop_QAdd16Sx2:
1327            fn = &h_generic_calc_QAdd16Sx2; break;
1328         case Iop_QSub16Sx2:
1329            fn = &h_generic_calc_QSub16Sx2; break;
1330         case Iop_Add8x4:
1331            fn = &h_generic_calc_Add8x4; break;
1332         case Iop_Sub8x4:
1333            fn = &h_generic_calc_Sub8x4; break;
1334         case Iop_HAdd8Ux4:
1335            fn = &h_generic_calc_HAdd8Ux4; break;
1336         case Iop_HAdd8Sx4:
1337            fn = &h_generic_calc_HAdd8Sx4; break;
1338         case Iop_HSub8Ux4:
1339            fn = &h_generic_calc_HSub8Ux4; break;
1340         case Iop_HSub8Sx4:
1341            fn = &h_generic_calc_HSub8Sx4; break;
1342         case Iop_QAdd8Sx4:
1343            fn = &h_generic_calc_QAdd8Sx4; break;
1344         case Iop_QAdd8Ux4:
1345            fn = &h_generic_calc_QAdd8Ux4; break;
1346         case Iop_QSub8Sx4:
1347            fn = &h_generic_calc_QSub8Sx4; break;
1348         case Iop_QSub8Ux4:
1349            fn = &h_generic_calc_QSub8Ux4; break;
1350         case Iop_Sad8Ux4:
1351            fn = &h_generic_calc_Sad8Ux4; break;
1352         default:
1353            break;
1354      }
1355
1356      if (fn) {
1357         HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1358         HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1359         HReg res  = newVRegI(env);
1360         addInstr(env, mk_iMOVds_RR(hregARM_R0(), regL));
1361         addInstr(env, mk_iMOVds_RR(hregARM_R1(), regR));
1362         addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 2 ));
1363         addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1364         return res;
1365      }
1366
1367      break;
1368   }
1369
1370   /* --------- UNARY OP --------- */
1371   case Iex_Unop: {
1372
1373//zz      /* 1Uto8(32to1(expr32)) */
1374//zz      if (e->Iex.Unop.op == Iop_1Uto8) {
1375//zz         DECLARE_PATTERN(p_32to1_then_1Uto8);
1376//zz         DEFINE_PATTERN(p_32to1_then_1Uto8,
1377//zz                        unop(Iop_1Uto8,unop(Iop_32to1,bind(0))));
1378//zz         if (matchIRExpr(&mi,p_32to1_then_1Uto8,e)) {
1379//zz            IRExpr* expr32 = mi.bindee[0];
1380//zz            HReg dst = newVRegI(env);
1381//zz            HReg src = iselIntExpr_R(env, expr32);
1382//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1383//zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1384//zz                                          X86RMI_Imm(1), dst));
1385//zz            return dst;
1386//zz         }
1387//zz      }
1388//zz
1389//zz      /* 8Uto32(LDle(expr32)) */
1390//zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1391//zz         DECLARE_PATTERN(p_LDle8_then_8Uto32);
1392//zz         DEFINE_PATTERN(p_LDle8_then_8Uto32,
1393//zz                        unop(Iop_8Uto32,
1394//zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1395//zz         if (matchIRExpr(&mi,p_LDle8_then_8Uto32,e)) {
1396//zz            HReg dst = newVRegI(env);
1397//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1398//zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1399//zz            return dst;
1400//zz         }
1401//zz      }
1402//zz
1403//zz      /* 8Sto32(LDle(expr32)) */
1404//zz      if (e->Iex.Unop.op == Iop_8Sto32) {
1405//zz         DECLARE_PATTERN(p_LDle8_then_8Sto32);
1406//zz         DEFINE_PATTERN(p_LDle8_then_8Sto32,
1407//zz                        unop(Iop_8Sto32,
1408//zz                             IRExpr_Load(Iend_LE,Ity_I8,bind(0))) );
1409//zz         if (matchIRExpr(&mi,p_LDle8_then_8Sto32,e)) {
1410//zz            HReg dst = newVRegI(env);
1411//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1412//zz            addInstr(env, X86Instr_LoadEX(1,True,amode,dst));
1413//zz            return dst;
1414//zz         }
1415//zz      }
1416//zz
1417//zz      /* 16Uto32(LDle(expr32)) */
1418//zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1419//zz         DECLARE_PATTERN(p_LDle16_then_16Uto32);
1420//zz         DEFINE_PATTERN(p_LDle16_then_16Uto32,
1421//zz                        unop(Iop_16Uto32,
1422//zz                             IRExpr_Load(Iend_LE,Ity_I16,bind(0))) );
1423//zz         if (matchIRExpr(&mi,p_LDle16_then_16Uto32,e)) {
1424//zz            HReg dst = newVRegI(env);
1425//zz            X86AMode* amode = iselIntExpr_AMode ( env, mi.bindee[0] );
1426//zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1427//zz            return dst;
1428//zz         }
1429//zz      }
1430//zz
1431//zz      /* 8Uto32(GET:I8) */
1432//zz      if (e->Iex.Unop.op == Iop_8Uto32) {
1433//zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1434//zz            HReg      dst;
1435//zz            X86AMode* amode;
1436//zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I8);
1437//zz            dst = newVRegI(env);
1438//zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1439//zz                                hregX86_EBP());
1440//zz            addInstr(env, X86Instr_LoadEX(1,False,amode,dst));
1441//zz            return dst;
1442//zz         }
1443//zz      }
1444//zz
1445//zz      /* 16to32(GET:I16) */
1446//zz      if (e->Iex.Unop.op == Iop_16Uto32) {
1447//zz         if (e->Iex.Unop.arg->tag == Iex_Get) {
1448//zz            HReg      dst;
1449//zz            X86AMode* amode;
1450//zz            vassert(e->Iex.Unop.arg->Iex.Get.ty == Ity_I16);
1451//zz            dst = newVRegI(env);
1452//zz            amode = X86AMode_IR(e->Iex.Unop.arg->Iex.Get.offset,
1453//zz                                hregX86_EBP());
1454//zz            addInstr(env, X86Instr_LoadEX(2,False,amode,dst));
1455//zz            return dst;
1456//zz         }
1457//zz      }
1458
1459      switch (e->Iex.Unop.op) {
1460         case Iop_8Uto32: {
1461            HReg dst = newVRegI(env);
1462            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1463            addInstr(env, ARMInstr_Alu(ARMalu_AND,
1464                                       dst, src, ARMRI84_I84(0xFF,0)));
1465            return dst;
1466         }
1467//zz         case Iop_8Uto16:
1468//zz         case Iop_8Uto32:
1469//zz         case Iop_16Uto32: {
1470//zz            HReg dst = newVRegI(env);
1471//zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1472//zz            UInt mask = e->Iex.Unop.op==Iop_16Uto32 ? 0xFFFF : 0xFF;
1473//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1474//zz            addInstr(env, X86Instr_Alu32R(Xalu_AND,
1475//zz                                          X86RMI_Imm(mask), dst));
1476//zz            return dst;
1477//zz         }
1478//zz         case Iop_8Sto16:
1479//zz         case Iop_8Sto32:
1480         case Iop_16Uto32: {
1481            HReg dst = newVRegI(env);
1482            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1483            ARMRI5* amt = ARMRI5_I5(16);
1484            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1485            addInstr(env, ARMInstr_Shift(ARMsh_SHR, dst, dst, amt));
1486            return dst;
1487         }
1488         case Iop_8Sto32:
1489         case Iop_16Sto32: {
1490            HReg dst = newVRegI(env);
1491            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1492            ARMRI5* amt = ARMRI5_I5(e->Iex.Unop.op==Iop_16Sto32 ? 16 : 24);
1493            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, src, amt));
1494            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1495            return dst;
1496         }
1497//zz         case Iop_Not8:
1498//zz         case Iop_Not16:
1499         case Iop_Not32: {
1500            HReg dst = newVRegI(env);
1501            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1502            addInstr(env, ARMInstr_Unary(ARMun_NOT, dst, src));
1503            return dst;
1504         }
1505         case Iop_64HIto32: {
1506            HReg rHi, rLo;
1507            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1508            return rHi; /* and abandon rLo .. poor wee thing :-) */
1509         }
1510         case Iop_64to32: {
1511            HReg rHi, rLo;
1512            iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1513            return rLo; /* similar stupid comment to the above ... */
1514         }
1515         case Iop_64to8: {
1516            HReg rHi, rLo;
1517            if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
1518               HReg tHi = newVRegI(env);
1519               HReg tLo = newVRegI(env);
1520               HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
1521               addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1522               rHi = tHi;
1523               rLo = tLo;
1524            } else {
1525               iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1526            }
1527            return rLo;
1528         }
1529//zz         case Iop_16HIto8:
1530//zz         case Iop_32HIto16: {
1531//zz            HReg dst  = newVRegI(env);
1532//zz            HReg src  = iselIntExpr_R(env, e->Iex.Unop.arg);
1533//zz            Int shift = e->Iex.Unop.op == Iop_16HIto8 ? 8 : 16;
1534//zz            addInstr(env, mk_iMOVsd_RR(src,dst) );
1535//zz            addInstr(env, X86Instr_Sh32(Xsh_SHR, shift, dst));
1536//zz            return dst;
1537//zz         }
1538         case Iop_1Uto32:
1539         case Iop_1Uto8: {
1540            HReg        dst  = newVRegI(env);
1541            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1542            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1543            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1544            return dst;
1545         }
1546
1547         case Iop_1Sto32: {
1548            HReg        dst  = newVRegI(env);
1549            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1550            ARMRI5*     amt  = ARMRI5_I5(31);
1551            /* This is really rough.  We could do much better here;
1552               perhaps mvn{cond} dst, #0 as the second insn?
1553               (same applies to 1Sto64) */
1554            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1555            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1556            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1557            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1558            return dst;
1559         }
1560
1561
1562//zz         case Iop_1Sto8:
1563//zz         case Iop_1Sto16:
1564//zz         case Iop_1Sto32: {
1565//zz            /* could do better than this, but for now ... */
1566//zz            HReg dst         = newVRegI(env);
1567//zz            X86CondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1568//zz            addInstr(env, X86Instr_Set32(cond,dst));
1569//zz            addInstr(env, X86Instr_Sh32(Xsh_SHL, 31, dst));
1570//zz            addInstr(env, X86Instr_Sh32(Xsh_SAR, 31, dst));
1571//zz            return dst;
1572//zz         }
1573//zz         case Iop_Ctz32: {
1574//zz            /* Count trailing zeroes, implemented by x86 'bsfl' */
1575//zz            HReg dst = newVRegI(env);
1576//zz            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1577//zz            addInstr(env, X86Instr_Bsfr32(True,src,dst));
1578//zz            return dst;
1579//zz         }
1580         case Iop_Clz32: {
1581            /* Count leading zeroes; easy on ARM. */
1582            HReg dst = newVRegI(env);
1583            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1584            addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
1585            return dst;
1586         }
1587
1588         case Iop_CmpwNEZ32: {
1589            HReg dst = newVRegI(env);
1590            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1591            addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1592            addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1593            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
1594            return dst;
1595         }
1596
1597         case Iop_Left32: {
1598            HReg dst = newVRegI(env);
1599            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1600            addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
1601            addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
1602            return dst;
1603         }
1604
1605//zz         case Iop_V128to32: {
1606//zz            HReg      dst  = newVRegI(env);
1607//zz            HReg      vec  = iselVecExpr(env, e->Iex.Unop.arg);
1608//zz            X86AMode* esp0 = X86AMode_IR(0, hregX86_ESP());
1609//zz            sub_from_esp(env, 16);
1610//zz            addInstr(env, X86Instr_SseLdSt(False/*store*/, vec, esp0));
1611//zz            addInstr(env, X86Instr_Alu32R( Xalu_MOV, X86RMI_Mem(esp0), dst ));
1612//zz            add_to_esp(env, 16);
1613//zz            return dst;
1614//zz         }
1615//zz
1616         case Iop_ReinterpF32asI32: {
1617            HReg dst = newVRegI(env);
1618            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1619            addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
1620            return dst;
1621         }
1622
1623//zz
1624//zz         case Iop_16to8:
1625         case Iop_32to8:
1626         case Iop_32to16:
1627            /* These are no-ops. */
1628            return iselIntExpr_R(env, e->Iex.Unop.arg);
1629
1630         default:
1631            break;
1632      }
1633
1634      /* All Unop cases involving host-side helper calls. */
1635      void* fn = NULL;
1636      switch (e->Iex.Unop.op) {
1637         case Iop_CmpNEZ16x2:
1638            fn = &h_generic_calc_CmpNEZ16x2; break;
1639         case Iop_CmpNEZ8x4:
1640            fn = &h_generic_calc_CmpNEZ8x4; break;
1641         default:
1642            break;
1643      }
1644
1645      if (fn) {
1646         HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
1647         HReg res = newVRegI(env);
1648         addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
1649         addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn), 1 ));
1650         addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
1651         return res;
1652      }
1653
1654      break;
1655   }
1656
1657   /* --------- GET --------- */
1658   case Iex_Get: {
1659      if (ty == Ity_I32
1660          && 0 == (e->Iex.Get.offset & 3)
1661          && e->Iex.Get.offset < 4096-4) {
1662         HReg dst = newVRegI(env);
1663         addInstr(env, ARMInstr_LdSt32(
1664                          True/*isLoad*/,
1665                          dst,
1666                          ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset)));
1667         return dst;
1668      }
1669//zz      if (ty == Ity_I8 || ty == Ity_I16) {
1670//zz         HReg dst = newVRegI(env);
1671//zz         addInstr(env, X86Instr_LoadEX(
1672//zz                          toUChar(ty==Ity_I8 ? 1 : 2),
1673//zz                          False,
1674//zz                          X86AMode_IR(e->Iex.Get.offset,hregX86_EBP()),
1675//zz                          dst));
1676//zz         return dst;
1677//zz      }
1678      break;
1679   }
1680
1681//zz   case Iex_GetI: {
1682//zz      X86AMode* am
1683//zz         = genGuestArrayOffset(
1684//zz              env, e->Iex.GetI.descr,
1685//zz                   e->Iex.GetI.ix, e->Iex.GetI.bias );
1686//zz      HReg dst = newVRegI(env);
1687//zz      if (ty == Ity_I8) {
1688//zz         addInstr(env, X86Instr_LoadEX( 1, False, am, dst ));
1689//zz         return dst;
1690//zz      }
1691//zz      if (ty == Ity_I32) {
1692//zz         addInstr(env, X86Instr_Alu32R(Xalu_MOV, X86RMI_Mem(am), dst));
1693//zz         return dst;
1694//zz      }
1695//zz      break;
1696//zz   }
1697
1698   /* --------- CCALL --------- */
1699   case Iex_CCall: {
1700      HReg    dst = newVRegI(env);
1701      vassert(ty == e->Iex.CCall.retty);
1702
1703      /* be very restrictive for now.  Only 32/64-bit ints allowed
1704         for args, and 32 bits for return type. */
1705      if (e->Iex.CCall.retty != Ity_I32)
1706         goto irreducible;
1707
1708      /* Marshal args, do the call, clear stack. */
1709      Bool ok = doHelperCall( env, False,
1710                              NULL, e->Iex.CCall.cee, e->Iex.CCall.args );
1711      if (ok) {
1712         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()));
1713         return dst;
1714      }
1715      /* else fall through; will hit the irreducible: label */
1716   }
1717
1718   /* --------- LITERAL --------- */
1719   /* 32 literals */
1720   case Iex_Const: {
1721      UInt u   = 0;
1722      HReg dst = newVRegI(env);
1723      switch (e->Iex.Const.con->tag) {
1724         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1725         case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1726         case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1727         default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm)");
1728      }
1729      addInstr(env, ARMInstr_Imm32(dst, u));
1730      return dst;
1731   }
1732
1733   /* --------- MULTIPLEX --------- */
1734   case Iex_Mux0X: {
1735      IRExpr* cond = e->Iex.Mux0X.cond;
1736
1737      /* Mux0X( 32to8(1Uto32(ccexpr)), expr0, exprX ) */
1738      if (ty == Ity_I32
1739          && cond->tag == Iex_Unop
1740          && cond->Iex.Unop.op == Iop_32to8
1741          && cond->Iex.Unop.arg->tag == Iex_Unop
1742          && cond->Iex.Unop.arg->Iex.Unop.op == Iop_1Uto32) {
1743         ARMCondCode cc;
1744         HReg     rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1745         ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1746         HReg     dst = newVRegI(env);
1747         addInstr(env, mk_iMOVds_RR(dst, rX));
1748         cc = iselCondCode(env, cond->Iex.Unop.arg->Iex.Unop.arg);
1749         addInstr(env, ARMInstr_CMov(cc ^ 1, dst, r0));
1750         return dst;
1751      }
1752
1753      /* Mux0X(cond, expr0, exprX) (general case) */
1754      if (ty == Ity_I32) {
1755         HReg     r8;
1756         HReg     rX  = iselIntExpr_R(env, e->Iex.Mux0X.exprX);
1757         ARMRI84* r0  = iselIntExpr_RI84(NULL, False, env, e->Iex.Mux0X.expr0);
1758         HReg     dst = newVRegI(env);
1759         addInstr(env, mk_iMOVds_RR(dst, rX));
1760         r8 = iselIntExpr_R(env, cond);
1761         addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
1762                                         ARMRI84_I84(0xFF,0)));
1763         addInstr(env, ARMInstr_CMov(ARMcc_EQ, dst, r0));
1764         return dst;
1765      }
1766      break;
1767   }
1768
1769   default:
1770   break;
1771   } /* switch (e->tag) */
1772
1773   /* We get here if no pattern matched. */
1774  irreducible:
1775   ppIRExpr(e);
1776   vpanic("iselIntExpr_R: cannot reduce tree");
1777}
1778
1779
1780/* -------------------- 64-bit -------------------- */
1781
1782/* Compute a 64-bit value into a register pair, which is returned as
1783   the first two parameters.  As with iselIntExpr_R, these may be
1784   either real or virtual regs; in any case they must not be changed
1785   by subsequent code emitted by the caller.  */
1786
1787static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1788{
1789   iselInt64Expr_wrk(rHi, rLo, env, e);
1790#  if 0
1791   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1792#  endif
1793   vassert(hregClass(*rHi) == HRcInt32);
1794   vassert(hregIsVirtual(*rHi));
1795   vassert(hregClass(*rLo) == HRcInt32);
1796   vassert(hregIsVirtual(*rLo));
1797}
1798
1799/* DO NOT CALL THIS DIRECTLY ! */
1800static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
1801{
1802   vassert(e);
1803   vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
1804
1805   /* 64-bit literal */
1806   if (e->tag == Iex_Const) {
1807      ULong   w64 = e->Iex.Const.con->Ico.U64;
1808      UInt    wHi = toUInt(w64 >> 32);
1809      UInt    wLo = toUInt(w64);
1810      HReg    tHi = newVRegI(env);
1811      HReg    tLo = newVRegI(env);
1812      vassert(e->Iex.Const.con->tag == Ico_U64);
1813      addInstr(env, ARMInstr_Imm32(tHi, wHi));
1814      addInstr(env, ARMInstr_Imm32(tLo, wLo));
1815      *rHi = tHi;
1816      *rLo = tLo;
1817      return;
1818   }
1819
1820   /* read 64-bit IRTemp */
1821   if (e->tag == Iex_RdTmp) {
1822      if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
1823         HReg tHi = newVRegI(env);
1824         HReg tLo = newVRegI(env);
1825         HReg tmp = iselNeon64Expr(env, e);
1826         addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
1827         *rHi = tHi;
1828         *rLo = tLo;
1829      } else {
1830         lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
1831      }
1832      return;
1833   }
1834
1835   /* 64-bit load */
1836   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
1837      HReg      tLo, tHi, rA;
1838      vassert(e->Iex.Load.ty == Ity_I64);
1839      rA  = iselIntExpr_R(env, e->Iex.Load.addr);
1840      tHi = newVRegI(env);
1841      tLo = newVRegI(env);
1842      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, ARMAMode1_RI(rA, 4)));
1843      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, ARMAMode1_RI(rA, 0)));
1844      *rHi = tHi;
1845      *rLo = tLo;
1846      return;
1847   }
1848
1849   /* 64-bit GET */
1850   if (e->tag == Iex_Get) {
1851      ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
1852      ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
1853      HReg tHi = newVRegI(env);
1854      HReg tLo = newVRegI(env);
1855      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tHi, am4));
1856      addInstr(env, ARMInstr_LdSt32(True/*isLoad*/, tLo, am0));
1857      *rHi = tHi;
1858      *rLo = tLo;
1859      return;
1860   }
1861
1862   /* --------- BINARY ops --------- */
1863   if (e->tag == Iex_Binop) {
1864      switch (e->Iex.Binop.op) {
1865
1866         /* 32 x 32 -> 64 multiply */
1867         case Iop_MullS32:
1868         case Iop_MullU32: {
1869            HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1870            HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1871            HReg     tHi  = newVRegI(env);
1872            HReg     tLo  = newVRegI(env);
1873            ARMMulOp mop  = e->Iex.Binop.op == Iop_MullS32
1874                               ? ARMmul_SX : ARMmul_ZX;
1875            addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
1876            addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
1877            addInstr(env, ARMInstr_Mul(mop));
1878            addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
1879            addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
1880            *rHi = tHi;
1881            *rLo = tLo;
1882            return;
1883         }
1884
1885         case Iop_Or64: {
1886            HReg xLo, xHi, yLo, yHi;
1887            HReg tHi = newVRegI(env);
1888            HReg tLo = newVRegI(env);
1889            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1890            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1891            addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
1892            addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
1893            *rHi = tHi;
1894            *rLo = tLo;
1895            return;
1896         }
1897
1898         case Iop_Add64: {
1899            HReg xLo, xHi, yLo, yHi;
1900            HReg tHi = newVRegI(env);
1901            HReg tLo = newVRegI(env);
1902            iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
1903            iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
1904            addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
1905            addInstr(env, ARMInstr_Alu(ARMalu_ADC,  tHi, xHi, ARMRI84_R(yHi)));
1906            *rHi = tHi;
1907            *rLo = tLo;
1908            return;
1909         }
1910
1911         /* 32HLto64(e1,e2) */
1912         case Iop_32HLto64: {
1913            *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
1914            *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
1915            return;
1916         }
1917
1918         default:
1919            break;
1920      }
1921   }
1922
1923   /* --------- UNARY ops --------- */
1924   if (e->tag == Iex_Unop) {
1925      switch (e->Iex.Unop.op) {
1926
1927         /* ReinterpF64asI64 */
1928         case Iop_ReinterpF64asI64: {
1929            HReg dstHi = newVRegI(env);
1930            HReg dstLo = newVRegI(env);
1931            HReg src   = iselDblExpr(env, e->Iex.Unop.arg);
1932            addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
1933            *rHi = dstHi;
1934            *rLo = dstLo;
1935            return;
1936         }
1937
1938         /* Left64(e) */
1939         case Iop_Left64: {
1940            HReg yLo, yHi;
1941            HReg tHi  = newVRegI(env);
1942            HReg tLo  = newVRegI(env);
1943            HReg zero = newVRegI(env);
1944            /* yHi:yLo = arg */
1945            iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
1946            /* zero = 0 */
1947            addInstr(env, ARMInstr_Imm32(zero, 0));
1948            /* tLo = 0 - yLo, and set carry */
1949            addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
1950                                       tLo, zero, ARMRI84_R(yLo)));
1951            /* tHi = 0 - yHi - carry */
1952            addInstr(env, ARMInstr_Alu(ARMalu_SBC,
1953                                       tHi, zero, ARMRI84_R(yHi)));
1954            /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
1955               back in, so as to give the final result
1956               tHi:tLo = arg | -arg. */
1957            addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
1958            addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
1959            *rHi = tHi;
1960            *rLo = tLo;
1961            return;
1962         }
1963
1964         /* CmpwNEZ64(e) */
1965         case Iop_CmpwNEZ64: {
1966            HReg srcLo, srcHi;
1967            HReg tmp1 = newVRegI(env);
1968            HReg tmp2 = newVRegI(env);
1969            /* srcHi:srcLo = arg */
1970            iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
1971            /* tmp1 = srcHi | srcLo */
1972            addInstr(env, ARMInstr_Alu(ARMalu_OR,
1973                                       tmp1, srcHi, ARMRI84_R(srcLo)));
1974            /* tmp2 = (tmp1 | -tmp1) >>s 31 */
1975            addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
1976            addInstr(env, ARMInstr_Alu(ARMalu_OR,
1977                                       tmp2, tmp2, ARMRI84_R(tmp1)));
1978            addInstr(env, ARMInstr_Shift(ARMsh_SAR,
1979                                         tmp2, tmp2, ARMRI5_I5(31)));
1980            *rHi = tmp2;
1981            *rLo = tmp2;
1982            return;
1983         }
1984
1985         case Iop_1Sto64: {
1986            HReg        dst  = newVRegI(env);
1987            ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
1988            ARMRI5*     amt  = ARMRI5_I5(31);
1989            /* This is really rough.  We could do much better here;
1990               perhaps mvn{cond} dst, #0 as the second insn?
1991               (same applies to 1Sto32) */
1992            addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
1993            addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
1994            addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
1995            addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
1996            *rHi = dst;
1997            *rLo = dst;
1998            return;
1999         }
2000
2001         default:
2002            break;
2003      }
2004   } /* if (e->tag == Iex_Unop) */
2005
2006   /* --------- MULTIPLEX --------- */
2007   if (e->tag == Iex_Mux0X) {
2008      IRType ty8;
2009      HReg   r8, rXhi, rXlo, r0hi, r0lo, dstHi, dstLo;
2010      ty8 = typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond);
2011      vassert(ty8 == Ity_I8);
2012      iselInt64Expr(&rXhi, &rXlo, env, e->Iex.Mux0X.exprX);
2013      iselInt64Expr(&r0hi, &r0lo, env, e->Iex.Mux0X.expr0);
2014      dstHi = newVRegI(env);
2015      dstLo = newVRegI(env);
2016      addInstr(env, mk_iMOVds_RR(dstHi, rXhi));
2017      addInstr(env, mk_iMOVds_RR(dstLo, rXlo));
2018      r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
2019      addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
2020                                      ARMRI84_I84(0xFF,0)));
2021      addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstHi, ARMRI84_R(r0hi)));
2022      addInstr(env, ARMInstr_CMov(ARMcc_EQ, dstLo, ARMRI84_R(r0lo)));
2023      *rHi = dstHi;
2024      *rLo = dstLo;
2025      return;
2026   }
2027
2028   /* It is convenient sometimes to call iselInt64Expr even when we
2029      have NEON support (e.g. in do_helper_call we need 64-bit
2030      arguments as 2 x 32 regs). */
2031   if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
2032      HReg tHi = newVRegI(env);
2033      HReg tLo = newVRegI(env);
2034      HReg tmp = iselNeon64Expr(env, e);
2035      addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2036      *rHi = tHi;
2037      *rLo = tLo;
2038      return ;
2039   }
2040
2041   ppIRExpr(e);
2042   vpanic("iselInt64Expr");
2043}
2044
2045
2046/*---------------------------------------------------------*/
2047/*--- ISEL: Vector (NEON) expressions (64 or 128 bit)   ---*/
2048/*---------------------------------------------------------*/
2049
2050static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2051{
2052   HReg r = iselNeon64Expr_wrk( env, e );
2053   vassert(hregClass(r) == HRcFlt64);
2054   vassert(hregIsVirtual(r));
2055   return r;
2056}
2057
2058/* DO NOT CALL THIS DIRECTLY */
2059static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2060{
2061   IRType ty = typeOfIRExpr(env->type_env, e);
2062   MatchInfo mi;
2063   vassert(e);
2064   vassert(ty == Ity_I64);
2065
2066   if (e->tag == Iex_RdTmp) {
2067      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2068   }
2069
2070   if (e->tag == Iex_Const) {
2071      HReg rLo, rHi;
2072      HReg res = newVRegD(env);
2073      iselInt64Expr(&rHi, &rLo, env, e);
2074      addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2075      return res;
2076   }
2077
2078   /* 64-bit load */
2079   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2080      HReg res = newVRegD(env);
2081      ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2082      vassert(ty == Ity_I64);
2083      addInstr(env, ARMInstr_NLdStD(True, res, am));
2084      return res;
2085   }
2086
2087   /* 64-bit GET */
2088   if (e->tag == Iex_Get) {
2089      HReg addr = newVRegI(env);
2090      HReg res = newVRegD(env);
2091      vassert(ty == Ity_I64);
2092      addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2093      addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2094      return res;
2095   }
2096
2097   /* --------- BINARY ops --------- */
2098   if (e->tag == Iex_Binop) {
2099      switch (e->Iex.Binop.op) {
2100
2101         /* 32 x 32 -> 64 multiply */
2102         case Iop_MullS32:
2103         case Iop_MullU32: {
2104            HReg rLo, rHi;
2105            HReg res = newVRegD(env);
2106            iselInt64Expr(&rHi, &rLo, env, e);
2107            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2108            return res;
2109         }
2110
2111         case Iop_And64: {
2112            HReg res = newVRegD(env);
2113            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2114            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2115            addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2116                                           res, argL, argR, 4, False));
2117            return res;
2118         }
2119         case Iop_Or64: {
2120            HReg res = newVRegD(env);
2121            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2122            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2123            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2124                                           res, argL, argR, 4, False));
2125            return res;
2126         }
2127         case Iop_Xor64: {
2128            HReg res = newVRegD(env);
2129            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2130            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2131            addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2132                                           res, argL, argR, 4, False));
2133            return res;
2134         }
2135
2136         /* 32HLto64(e1,e2) */
2137         case Iop_32HLto64: {
2138            HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2139            HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2140            HReg res = newVRegD(env);
2141            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2142            return res;
2143         }
2144
2145         case Iop_Add8x8:
2146         case Iop_Add16x4:
2147         case Iop_Add32x2:
2148         case Iop_Add64: {
2149            HReg res = newVRegD(env);
2150            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2151            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2152            UInt size;
2153            switch (e->Iex.Binop.op) {
2154               case Iop_Add8x8: size = 0; break;
2155               case Iop_Add16x4: size = 1; break;
2156               case Iop_Add32x2: size = 2; break;
2157               case Iop_Add64: size = 3; break;
2158               default: vassert(0);
2159            }
2160            addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2161                                           res, argL, argR, size, False));
2162            return res;
2163         }
2164         case Iop_Add32Fx2: {
2165            HReg res = newVRegD(env);
2166            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2167            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2168            UInt size = 0;
2169            addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2170                                           res, argL, argR, size, False));
2171            return res;
2172         }
2173         case Iop_Recps32Fx2: {
2174            HReg res = newVRegD(env);
2175            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2176            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2177            UInt size = 0;
2178            addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2179                                           res, argL, argR, size, False));
2180            return res;
2181         }
2182         case Iop_Rsqrts32Fx2: {
2183            HReg res = newVRegD(env);
2184            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2185            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2186            UInt size = 0;
2187            addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2188                                           res, argL, argR, size, False));
2189            return res;
2190         }
2191         case Iop_InterleaveOddLanes8x8:
2192         case Iop_InterleaveOddLanes16x4:
2193         case Iop_InterleaveLO32x2:
2194         case Iop_InterleaveEvenLanes8x8:
2195         case Iop_InterleaveEvenLanes16x4:
2196         case Iop_InterleaveHI32x2: {
2197            HReg tmp = newVRegD(env);
2198            HReg res = newVRegD(env);
2199            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2200            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2201            UInt size;
2202            UInt is_lo;
2203            switch (e->Iex.Binop.op) {
2204               case Iop_InterleaveOddLanes8x8: is_lo = 1; size = 0; break;
2205               case Iop_InterleaveEvenLanes8x8: is_lo = 0; size = 0; break;
2206               case Iop_InterleaveOddLanes16x4: is_lo = 1; size = 1; break;
2207               case Iop_InterleaveEvenLanes16x4: is_lo = 0; size = 1; break;
2208               case Iop_InterleaveLO32x2: is_lo = 1; size = 2; break;
2209               case Iop_InterleaveHI32x2: is_lo = 0; size = 2; break;
2210               default: vassert(0);
2211            }
2212            if (is_lo) {
2213               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2214                                             tmp, argL, 4, False));
2215               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2216                                             res, argR, 4, False));
2217               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2218                                            res, tmp, size, False));
2219            } else {
2220               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2221                                             tmp, argR, 4, False));
2222               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2223                                             res, argL, 4, False));
2224               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
2225                                            tmp, res, size, False));
2226            }
2227            return res;
2228         }
2229         case Iop_InterleaveHI8x8:
2230         case Iop_InterleaveHI16x4:
2231         case Iop_InterleaveLO8x8:
2232         case Iop_InterleaveLO16x4: {
2233            HReg tmp = newVRegD(env);
2234            HReg res = newVRegD(env);
2235            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2236            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2237            UInt size;
2238            UInt is_lo;
2239            switch (e->Iex.Binop.op) {
2240               case Iop_InterleaveHI8x8: is_lo = 1; size = 0; break;
2241               case Iop_InterleaveLO8x8: is_lo = 0; size = 0; break;
2242               case Iop_InterleaveHI16x4: is_lo = 1; size = 1; break;
2243               case Iop_InterleaveLO16x4: is_lo = 0; size = 1; break;
2244               default: vassert(0);
2245            }
2246            if (is_lo) {
2247               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2248                                             tmp, argL, 4, False));
2249               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2250                                             res, argR, 4, False));
2251               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2252                                            res, tmp, size, False));
2253            } else {
2254               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2255                                             tmp, argR, 4, False));
2256               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2257                                             res, argL, 4, False));
2258               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
2259                                            tmp, res, size, False));
2260            }
2261            return res;
2262         }
2263         case Iop_CatOddLanes8x8:
2264         case Iop_CatOddLanes16x4:
2265         case Iop_CatEvenLanes8x8:
2266         case Iop_CatEvenLanes16x4: {
2267            HReg tmp = newVRegD(env);
2268            HReg res = newVRegD(env);
2269            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2270            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2271            UInt size;
2272            UInt is_lo;
2273            switch (e->Iex.Binop.op) {
2274               case Iop_CatOddLanes8x8: is_lo = 1; size = 0; break;
2275               case Iop_CatEvenLanes8x8: is_lo = 0; size = 0; break;
2276               case Iop_CatOddLanes16x4: is_lo = 1; size = 1; break;
2277               case Iop_CatEvenLanes16x4: is_lo = 0; size = 1; break;
2278               default: vassert(0);
2279            }
2280            if (is_lo) {
2281               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2282                                             tmp, argL, 4, False));
2283               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2284                                             res, argR, 4, False));
2285               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2286                                            res, tmp, size, False));
2287            } else {
2288               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2289                                             tmp, argR, 4, False));
2290               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
2291                                             res, argL, 4, False));
2292               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
2293                                            tmp, res, size, False));
2294            }
2295            return res;
2296         }
2297         case Iop_QAdd8Ux8:
2298         case Iop_QAdd16Ux4:
2299         case Iop_QAdd32Ux2:
2300         case Iop_QAdd64Ux1: {
2301            HReg res = newVRegD(env);
2302            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2303            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2304            UInt size;
2305            switch (e->Iex.Binop.op) {
2306               case Iop_QAdd8Ux8: size = 0; break;
2307               case Iop_QAdd16Ux4: size = 1; break;
2308               case Iop_QAdd32Ux2: size = 2; break;
2309               case Iop_QAdd64Ux1: size = 3; break;
2310               default: vassert(0);
2311            }
2312            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2313                                           res, argL, argR, size, False));
2314            return res;
2315         }
2316         case Iop_QAdd8Sx8:
2317         case Iop_QAdd16Sx4:
2318         case Iop_QAdd32Sx2:
2319         case Iop_QAdd64Sx1: {
2320            HReg res = newVRegD(env);
2321            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2322            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2323            UInt size;
2324            switch (e->Iex.Binop.op) {
2325               case Iop_QAdd8Sx8: size = 0; break;
2326               case Iop_QAdd16Sx4: size = 1; break;
2327               case Iop_QAdd32Sx2: size = 2; break;
2328               case Iop_QAdd64Sx1: size = 3; break;
2329               default: vassert(0);
2330            }
2331            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2332                                           res, argL, argR, size, False));
2333            return res;
2334         }
2335         case Iop_Sub8x8:
2336         case Iop_Sub16x4:
2337         case Iop_Sub32x2:
2338         case Iop_Sub64: {
2339            HReg res = newVRegD(env);
2340            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2341            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2342            UInt size;
2343            switch (e->Iex.Binop.op) {
2344               case Iop_Sub8x8: size = 0; break;
2345               case Iop_Sub16x4: size = 1; break;
2346               case Iop_Sub32x2: size = 2; break;
2347               case Iop_Sub64: size = 3; break;
2348               default: vassert(0);
2349            }
2350            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2351                                           res, argL, argR, size, False));
2352            return res;
2353         }
2354         case Iop_Sub32Fx2: {
2355            HReg res = newVRegD(env);
2356            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2357            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2358            UInt size = 0;
2359            addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2360                                           res, argL, argR, size, False));
2361            return res;
2362         }
2363         case Iop_QSub8Ux8:
2364         case Iop_QSub16Ux4:
2365         case Iop_QSub32Ux2:
2366         case Iop_QSub64Ux1: {
2367            HReg res = newVRegD(env);
2368            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2369            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2370            UInt size;
2371            switch (e->Iex.Binop.op) {
2372               case Iop_QSub8Ux8: size = 0; break;
2373               case Iop_QSub16Ux4: size = 1; break;
2374               case Iop_QSub32Ux2: size = 2; break;
2375               case Iop_QSub64Ux1: size = 3; break;
2376               default: vassert(0);
2377            }
2378            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
2379                                           res, argL, argR, size, False));
2380            return res;
2381         }
2382         case Iop_QSub8Sx8:
2383         case Iop_QSub16Sx4:
2384         case Iop_QSub32Sx2:
2385         case Iop_QSub64Sx1: {
2386            HReg res = newVRegD(env);
2387            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2388            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2389            UInt size;
2390            switch (e->Iex.Binop.op) {
2391               case Iop_QSub8Sx8: size = 0; break;
2392               case Iop_QSub16Sx4: size = 1; break;
2393               case Iop_QSub32Sx2: size = 2; break;
2394               case Iop_QSub64Sx1: size = 3; break;
2395               default: vassert(0);
2396            }
2397            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
2398                                           res, argL, argR, size, False));
2399            return res;
2400         }
2401         case Iop_Max8Ux8:
2402         case Iop_Max16Ux4:
2403         case Iop_Max32Ux2: {
2404            HReg res = newVRegD(env);
2405            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2406            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2407            UInt size;
2408            switch (e->Iex.Binop.op) {
2409               case Iop_Max8Ux8: size = 0; break;
2410               case Iop_Max16Ux4: size = 1; break;
2411               case Iop_Max32Ux2: size = 2; break;
2412               default: vassert(0);
2413            }
2414            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
2415                                           res, argL, argR, size, False));
2416            return res;
2417         }
2418         case Iop_Max8Sx8:
2419         case Iop_Max16Sx4:
2420         case Iop_Max32Sx2: {
2421            HReg res = newVRegD(env);
2422            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2423            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2424            UInt size;
2425            switch (e->Iex.Binop.op) {
2426               case Iop_Max8Sx8: size = 0; break;
2427               case Iop_Max16Sx4: size = 1; break;
2428               case Iop_Max32Sx2: size = 2; break;
2429               default: vassert(0);
2430            }
2431            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
2432                                           res, argL, argR, size, False));
2433            return res;
2434         }
2435         case Iop_Min8Ux8:
2436         case Iop_Min16Ux4:
2437         case Iop_Min32Ux2: {
2438            HReg res = newVRegD(env);
2439            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2440            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2441            UInt size;
2442            switch (e->Iex.Binop.op) {
2443               case Iop_Min8Ux8: size = 0; break;
2444               case Iop_Min16Ux4: size = 1; break;
2445               case Iop_Min32Ux2: size = 2; break;
2446               default: vassert(0);
2447            }
2448            addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
2449                                           res, argL, argR, size, False));
2450            return res;
2451         }
2452         case Iop_Min8Sx8:
2453         case Iop_Min16Sx4:
2454         case Iop_Min32Sx2: {
2455            HReg res = newVRegD(env);
2456            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2457            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2458            UInt size;
2459            switch (e->Iex.Binop.op) {
2460               case Iop_Min8Sx8: size = 0; break;
2461               case Iop_Min16Sx4: size = 1; break;
2462               case Iop_Min32Sx2: size = 2; break;
2463               default: vassert(0);
2464            }
2465            addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
2466                                           res, argL, argR, size, False));
2467            return res;
2468         }
2469         case Iop_Sar8x8:
2470         case Iop_Sar16x4:
2471         case Iop_Sar32x2: {
2472            HReg res = newVRegD(env);
2473            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2474            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2475            HReg argR2 = newVRegD(env);
2476            HReg zero = newVRegD(env);
2477            UInt size;
2478            switch (e->Iex.Binop.op) {
2479               case Iop_Sar8x8: size = 0; break;
2480               case Iop_Sar16x4: size = 1; break;
2481               case Iop_Sar32x2: size = 2; break;
2482               case Iop_Sar64: size = 3; break;
2483               default: vassert(0);
2484            }
2485            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2486            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2487                                           argR2, zero, argR, size, False));
2488            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2489                                          res, argL, argR2, size, False));
2490            return res;
2491         }
2492         case Iop_Sal8x8:
2493         case Iop_Sal16x4:
2494         case Iop_Sal32x2:
2495         case Iop_Sal64x1: {
2496            HReg res = newVRegD(env);
2497            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2498            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2499            UInt size;
2500            switch (e->Iex.Binop.op) {
2501               case Iop_Sal8x8: size = 0; break;
2502               case Iop_Sal16x4: size = 1; break;
2503               case Iop_Sal32x2: size = 2; break;
2504               case Iop_Sal64x1: size = 3; break;
2505               default: vassert(0);
2506            }
2507            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2508                                          res, argL, argR, size, False));
2509            return res;
2510         }
2511         case Iop_Shr8x8:
2512         case Iop_Shr16x4:
2513         case Iop_Shr32x2: {
2514            HReg res = newVRegD(env);
2515            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2516            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2517            HReg argR2 = newVRegD(env);
2518            HReg zero = newVRegD(env);
2519            UInt size;
2520            switch (e->Iex.Binop.op) {
2521               case Iop_Shr8x8: size = 0; break;
2522               case Iop_Shr16x4: size = 1; break;
2523               case Iop_Shr32x2: size = 2; break;
2524               default: vassert(0);
2525            }
2526            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
2527            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2528                                           argR2, zero, argR, size, False));
2529            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2530                                          res, argL, argR2, size, False));
2531            return res;
2532         }
2533         case Iop_Shl8x8:
2534         case Iop_Shl16x4:
2535         case Iop_Shl32x2: {
2536            HReg res = newVRegD(env);
2537            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2538            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2539            UInt size;
2540            switch (e->Iex.Binop.op) {
2541               case Iop_Shl8x8: size = 0; break;
2542               case Iop_Shl16x4: size = 1; break;
2543               case Iop_Shl32x2: size = 2; break;
2544               default: vassert(0);
2545            }
2546            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2547                                          res, argL, argR, size, False));
2548            return res;
2549         }
2550         case Iop_QShl8x8:
2551         case Iop_QShl16x4:
2552         case Iop_QShl32x2:
2553         case Iop_QShl64x1: {
2554            HReg res = newVRegD(env);
2555            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2556            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2557            UInt size;
2558            switch (e->Iex.Binop.op) {
2559               case Iop_QShl8x8: size = 0; break;
2560               case Iop_QShl16x4: size = 1; break;
2561               case Iop_QShl32x2: size = 2; break;
2562               case Iop_QShl64x1: size = 3; break;
2563               default: vassert(0);
2564            }
2565            addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
2566                                          res, argL, argR, size, False));
2567            return res;
2568         }
2569         case Iop_QSal8x8:
2570         case Iop_QSal16x4:
2571         case Iop_QSal32x2:
2572         case Iop_QSal64x1: {
2573            HReg res = newVRegD(env);
2574            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2575            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2576            UInt size;
2577            switch (e->Iex.Binop.op) {
2578               case Iop_QSal8x8: size = 0; break;
2579               case Iop_QSal16x4: size = 1; break;
2580               case Iop_QSal32x2: size = 2; break;
2581               case Iop_QSal64x1: size = 3; break;
2582               default: vassert(0);
2583            }
2584            addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
2585                                          res, argL, argR, size, False));
2586            return res;
2587         }
2588         case Iop_QShlN8x8:
2589         case Iop_QShlN16x4:
2590         case Iop_QShlN32x2:
2591         case Iop_QShlN64x1: {
2592            HReg res = newVRegD(env);
2593            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2594            UInt size, imm;
2595            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2596                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2597               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2598                      "second argument only\n");
2599            }
2600            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2601            switch (e->Iex.Binop.op) {
2602               case Iop_QShlN8x8: size = 8 | imm; break;
2603               case Iop_QShlN16x4: size = 16 | imm; break;
2604               case Iop_QShlN32x2: size = 32 | imm; break;
2605               case Iop_QShlN64x1: size = 64 | imm; break;
2606               default: vassert(0);
2607            }
2608            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
2609                                          res, argL, size, False));
2610            return res;
2611         }
2612         case Iop_QShlN8Sx8:
2613         case Iop_QShlN16Sx4:
2614         case Iop_QShlN32Sx2:
2615         case Iop_QShlN64Sx1: {
2616            HReg res = newVRegD(env);
2617            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2618            UInt size, imm;
2619            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2620                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2621               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2622                      "second argument only\n");
2623            }
2624            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2625            switch (e->Iex.Binop.op) {
2626               case Iop_QShlN8Sx8: size = 8 | imm; break;
2627               case Iop_QShlN16Sx4: size = 16 | imm; break;
2628               case Iop_QShlN32Sx2: size = 32 | imm; break;
2629               case Iop_QShlN64Sx1: size = 64 | imm; break;
2630               default: vassert(0);
2631            }
2632            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
2633                                          res, argL, size, False));
2634            return res;
2635         }
2636         case Iop_QSalN8x8:
2637         case Iop_QSalN16x4:
2638         case Iop_QSalN32x2:
2639         case Iop_QSalN64x1: {
2640            HReg res = newVRegD(env);
2641            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2642            UInt size, imm;
2643            if (e->Iex.Binop.arg2->tag != Iex_Const ||
2644                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
2645               vpanic("ARM taget supports Iop_QShlNAxB with constant "
2646                      "second argument only\n");
2647            }
2648            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
2649            switch (e->Iex.Binop.op) {
2650               case Iop_QSalN8x8: size = 8 | imm; break;
2651               case Iop_QSalN16x4: size = 16 | imm; break;
2652               case Iop_QSalN32x2: size = 32 | imm; break;
2653               case Iop_QSalN64x1: size = 64 | imm; break;
2654               default: vassert(0);
2655            }
2656            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
2657                                          res, argL, size, False));
2658            return res;
2659         }
2660         case Iop_ShrN8x8:
2661         case Iop_ShrN16x4:
2662         case Iop_ShrN32x2:
2663         case Iop_Shr64: {
2664            HReg res = newVRegD(env);
2665            HReg tmp = newVRegD(env);
2666            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2667            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2668            HReg argR2 = newVRegI(env);
2669            UInt size;
2670            switch (e->Iex.Binop.op) {
2671               case Iop_ShrN8x8: size = 0; break;
2672               case Iop_ShrN16x4: size = 1; break;
2673               case Iop_ShrN32x2: size = 2; break;
2674               case Iop_Shr64: size = 3; break;
2675               default: vassert(0);
2676            }
2677            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2678            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2679            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2680                                          res, argL, tmp, size, False));
2681            return res;
2682         }
2683         case Iop_ShlN8x8:
2684         case Iop_ShlN16x4:
2685         case Iop_ShlN32x2:
2686         case Iop_Shl64: {
2687            HReg res = newVRegD(env);
2688            HReg tmp = newVRegD(env);
2689            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2690            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2691            UInt size;
2692            switch (e->Iex.Binop.op) {
2693               case Iop_ShlN8x8: size = 0; break;
2694               case Iop_ShlN16x4: size = 1; break;
2695               case Iop_ShlN32x2: size = 2; break;
2696               case Iop_Shl64: size = 3; break;
2697               default: vassert(0);
2698            }
2699            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, False));
2700            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
2701                                          res, argL, tmp, size, False));
2702            return res;
2703         }
2704         case Iop_SarN8x8:
2705         case Iop_SarN16x4:
2706         case Iop_SarN32x2:
2707         case Iop_Sar64: {
2708            HReg res = newVRegD(env);
2709            HReg tmp = newVRegD(env);
2710            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2711            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2712            HReg argR2 = newVRegI(env);
2713            UInt size;
2714            switch (e->Iex.Binop.op) {
2715               case Iop_SarN8x8: size = 0; break;
2716               case Iop_SarN16x4: size = 1; break;
2717               case Iop_SarN32x2: size = 2; break;
2718               case Iop_Sar64: size = 3; break;
2719               default: vassert(0);
2720            }
2721            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
2722            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
2723            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
2724                                          res, argL, tmp, size, False));
2725            return res;
2726         }
2727         case Iop_CmpGT8Ux8:
2728         case Iop_CmpGT16Ux4:
2729         case Iop_CmpGT32Ux2: {
2730            HReg res = newVRegD(env);
2731            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2732            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2733            UInt size;
2734            switch (e->Iex.Binop.op) {
2735               case Iop_CmpGT8Ux8: size = 0; break;
2736               case Iop_CmpGT16Ux4: size = 1; break;
2737               case Iop_CmpGT32Ux2: size = 2; break;
2738               default: vassert(0);
2739            }
2740            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
2741                                           res, argL, argR, size, False));
2742            return res;
2743         }
2744         case Iop_CmpGT8Sx8:
2745         case Iop_CmpGT16Sx4:
2746         case Iop_CmpGT32Sx2: {
2747            HReg res = newVRegD(env);
2748            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2749            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2750            UInt size;
2751            switch (e->Iex.Binop.op) {
2752               case Iop_CmpGT8Sx8: size = 0; break;
2753               case Iop_CmpGT16Sx4: size = 1; break;
2754               case Iop_CmpGT32Sx2: size = 2; break;
2755               default: vassert(0);
2756            }
2757            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
2758                                           res, argL, argR, size, False));
2759            return res;
2760         }
2761         case Iop_CmpEQ8x8:
2762         case Iop_CmpEQ16x4:
2763         case Iop_CmpEQ32x2: {
2764            HReg res = newVRegD(env);
2765            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2766            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2767            UInt size;
2768            switch (e->Iex.Binop.op) {
2769               case Iop_CmpEQ8x8: size = 0; break;
2770               case Iop_CmpEQ16x4: size = 1; break;
2771               case Iop_CmpEQ32x2: size = 2; break;
2772               default: vassert(0);
2773            }
2774            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
2775                                           res, argL, argR, size, False));
2776            return res;
2777         }
2778         case Iop_Mul8x8:
2779         case Iop_Mul16x4:
2780         case Iop_Mul32x2: {
2781            HReg res = newVRegD(env);
2782            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2784            UInt size = 0;
2785            switch(e->Iex.Binop.op) {
2786               case Iop_Mul8x8: size = 0; break;
2787               case Iop_Mul16x4: size = 1; break;
2788               case Iop_Mul32x2: size = 2; break;
2789               default: vassert(0);
2790            }
2791            addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
2792                                           res, argL, argR, size, False));
2793            return res;
2794         }
2795         case Iop_Mul32Fx2: {
2796            HReg res = newVRegD(env);
2797            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2798            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2799            UInt size = 0;
2800            addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
2801                                           res, argL, argR, size, False));
2802            return res;
2803         }
2804         case Iop_QDMulHi16Sx4:
2805         case Iop_QDMulHi32Sx2: {
2806            HReg res = newVRegD(env);
2807            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2808            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2809            UInt size = 0;
2810            switch(e->Iex.Binop.op) {
2811               case Iop_QDMulHi16Sx4: size = 1; break;
2812               case Iop_QDMulHi32Sx2: size = 2; break;
2813               default: vassert(0);
2814            }
2815            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
2816                                           res, argL, argR, size, False));
2817            return res;
2818         }
2819
2820         case Iop_QRDMulHi16Sx4:
2821         case Iop_QRDMulHi32Sx2: {
2822            HReg res = newVRegD(env);
2823            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2824            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2825            UInt size = 0;
2826            switch(e->Iex.Binop.op) {
2827               case Iop_QRDMulHi16Sx4: size = 1; break;
2828               case Iop_QRDMulHi32Sx2: size = 2; break;
2829               default: vassert(0);
2830            }
2831            addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
2832                                           res, argL, argR, size, False));
2833            return res;
2834         }
2835
2836         case Iop_PwAdd8x8:
2837         case Iop_PwAdd16x4:
2838         case Iop_PwAdd32x2: {
2839            HReg res = newVRegD(env);
2840            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2841            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2842            UInt size = 0;
2843            switch(e->Iex.Binop.op) {
2844               case Iop_PwAdd8x8: size = 0; break;
2845               case Iop_PwAdd16x4: size = 1; break;
2846               case Iop_PwAdd32x2: size = 2; break;
2847               default: vassert(0);
2848            }
2849            addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
2850                                           res, argL, argR, size, False));
2851            return res;
2852         }
2853         case Iop_PwAdd32Fx2: {
2854            HReg res = newVRegD(env);
2855            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2856            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2857            UInt size = 0;
2858            addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
2859                                           res, argL, argR, size, False));
2860            return res;
2861         }
2862         case Iop_PwMin8Ux8:
2863         case Iop_PwMin16Ux4:
2864         case Iop_PwMin32Ux2: {
2865            HReg res = newVRegD(env);
2866            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2867            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2868            UInt size = 0;
2869            switch(e->Iex.Binop.op) {
2870               case Iop_PwMin8Ux8: size = 0; break;
2871               case Iop_PwMin16Ux4: size = 1; break;
2872               case Iop_PwMin32Ux2: size = 2; break;
2873               default: vassert(0);
2874            }
2875            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
2876                                           res, argL, argR, size, False));
2877            return res;
2878         }
2879         case Iop_PwMin8Sx8:
2880         case Iop_PwMin16Sx4:
2881         case Iop_PwMin32Sx2: {
2882            HReg res = newVRegD(env);
2883            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2884            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2885            UInt size = 0;
2886            switch(e->Iex.Binop.op) {
2887               case Iop_PwMin8Sx8: size = 0; break;
2888               case Iop_PwMin16Sx4: size = 1; break;
2889               case Iop_PwMin32Sx2: size = 2; break;
2890               default: vassert(0);
2891            }
2892            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
2893                                           res, argL, argR, size, False));
2894            return res;
2895         }
2896         case Iop_PwMax8Ux8:
2897         case Iop_PwMax16Ux4:
2898         case Iop_PwMax32Ux2: {
2899            HReg res = newVRegD(env);
2900            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2901            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2902            UInt size = 0;
2903            switch(e->Iex.Binop.op) {
2904               case Iop_PwMax8Ux8: size = 0; break;
2905               case Iop_PwMax16Ux4: size = 1; break;
2906               case Iop_PwMax32Ux2: size = 2; break;
2907               default: vassert(0);
2908            }
2909            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
2910                                           res, argL, argR, size, False));
2911            return res;
2912         }
2913         case Iop_PwMax8Sx8:
2914         case Iop_PwMax16Sx4:
2915         case Iop_PwMax32Sx2: {
2916            HReg res = newVRegD(env);
2917            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2918            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2919            UInt size = 0;
2920            switch(e->Iex.Binop.op) {
2921               case Iop_PwMax8Sx8: size = 0; break;
2922               case Iop_PwMax16Sx4: size = 1; break;
2923               case Iop_PwMax32Sx2: size = 2; break;
2924               default: vassert(0);
2925            }
2926            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
2927                                           res, argL, argR, size, False));
2928            return res;
2929         }
2930         case Iop_Perm8x8: {
2931            HReg res = newVRegD(env);
2932            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2933            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2934            addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
2935                                           res, argL, argR, 0, False));
2936            return res;
2937         }
2938         case Iop_PolynomialMul8x8: {
2939            HReg res = newVRegD(env);
2940            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2941            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2942            UInt size = 0;
2943            addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
2944                                           res, argL, argR, size, False));
2945            return res;
2946         }
2947         case Iop_Max32Fx2: {
2948            HReg res = newVRegD(env);
2949            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2950            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2951            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
2952                                           res, argL, argR, 2, False));
2953            return res;
2954         }
2955         case Iop_Min32Fx2: {
2956            HReg res = newVRegD(env);
2957            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2958            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2959            addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
2960                                           res, argL, argR, 2, False));
2961            return res;
2962         }
2963         case Iop_PwMax32Fx2: {
2964            HReg res = newVRegD(env);
2965            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2966            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2967            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
2968                                           res, argL, argR, 2, False));
2969            return res;
2970         }
2971         case Iop_PwMin32Fx2: {
2972            HReg res = newVRegD(env);
2973            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2974            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2975            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
2976                                           res, argL, argR, 2, False));
2977            return res;
2978         }
2979         case Iop_CmpGT32Fx2: {
2980            HReg res = newVRegD(env);
2981            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2982            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2983            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
2984                                           res, argL, argR, 2, False));
2985            return res;
2986         }
2987         case Iop_CmpGE32Fx2: {
2988            HReg res = newVRegD(env);
2989            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2990            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2991            addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
2992                                           res, argL, argR, 2, False));
2993            return res;
2994         }
2995         case Iop_CmpEQ32Fx2: {
2996            HReg res = newVRegD(env);
2997            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2998            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2999            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3000                                           res, argL, argR, 2, False));
3001            return res;
3002         }
3003         case Iop_F32ToFixed32Ux2_RZ:
3004         case Iop_F32ToFixed32Sx2_RZ:
3005         case Iop_Fixed32UToF32x2_RN:
3006         case Iop_Fixed32SToF32x2_RN: {
3007            HReg res = newVRegD(env);
3008            HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3009            ARMNeonUnOp op;
3010            UInt imm6;
3011            if (e->Iex.Binop.arg2->tag != Iex_Const ||
3012               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3013                  vpanic("ARM supports FP <-> Fixed conversion with constant "
3014                         "second argument less than 33 only\n");
3015            }
3016            imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3017            vassert(imm6 <= 32 && imm6 > 0);
3018            imm6 = 64 - imm6;
3019            switch(e->Iex.Binop.op) {
3020               case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3021               case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3022               case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3023               case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3024               default: vassert(0);
3025            }
3026            addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3027            return res;
3028         }
3029         /*
3030         FIXME: is this here or not?
3031         case Iop_VDup8x8:
3032         case Iop_VDup16x4:
3033         case Iop_VDup32x2: {
3034            HReg res = newVRegD(env);
3035            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3036            UInt index;
3037            UInt imm4;
3038            UInt size = 0;
3039            if (e->Iex.Binop.arg2->tag != Iex_Const ||
3040               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3041                  vpanic("ARM supports Iop_VDup with constant "
3042                         "second argument less than 16 only\n");
3043            }
3044            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3045            switch(e->Iex.Binop.op) {
3046               case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3047               case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3048               case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3049               default: vassert(0);
3050            }
3051            if (imm4 >= 16) {
3052               vpanic("ARM supports Iop_VDup with constant "
3053                      "second argument less than 16 only\n");
3054            }
3055            addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3056                                          res, argL, imm4, False));
3057            return res;
3058         }
3059         */
3060         default:
3061            break;
3062      }
3063   }
3064
3065   /* --------- UNARY ops --------- */
3066   if (e->tag == Iex_Unop) {
3067      switch (e->Iex.Unop.op) {
3068
3069         /* ReinterpF64asI64 */
3070         case Iop_ReinterpF64asI64:
3071         /* Left64(e) */
3072         case Iop_Left64:
3073         /* CmpwNEZ64(e) */
3074         //case Iop_CmpwNEZ64:
3075         case Iop_1Sto64: {
3076            HReg rLo, rHi;
3077            HReg res = newVRegD(env);
3078            iselInt64Expr(&rHi, &rLo, env, e);
3079            addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3080            return res;
3081         }
3082         case Iop_Not64: {
3083            DECLARE_PATTERN(p_veqz_8x8);
3084            DECLARE_PATTERN(p_veqz_16x4);
3085            DECLARE_PATTERN(p_veqz_32x2);
3086            DECLARE_PATTERN(p_vcge_8sx8);
3087            DECLARE_PATTERN(p_vcge_16sx4);
3088            DECLARE_PATTERN(p_vcge_32sx2);
3089            DECLARE_PATTERN(p_vcge_8ux8);
3090            DECLARE_PATTERN(p_vcge_16ux4);
3091            DECLARE_PATTERN(p_vcge_32ux2);
3092            DEFINE_PATTERN(p_veqz_8x8,
3093                  unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3094            DEFINE_PATTERN(p_veqz_16x4,
3095                  unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3096            DEFINE_PATTERN(p_veqz_32x2,
3097                  unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3098            DEFINE_PATTERN(p_vcge_8sx8,
3099                  unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3100            DEFINE_PATTERN(p_vcge_16sx4,
3101                  unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3102            DEFINE_PATTERN(p_vcge_32sx2,
3103                  unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3104            DEFINE_PATTERN(p_vcge_8ux8,
3105                  unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3106            DEFINE_PATTERN(p_vcge_16ux4,
3107                  unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3108            DEFINE_PATTERN(p_vcge_32ux2,
3109                  unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3110            if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3111               HReg res = newVRegD(env);
3112               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3113               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3114               return res;
3115            } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3116               HReg res = newVRegD(env);
3117               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3118               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3119               return res;
3120            } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3121               HReg res = newVRegD(env);
3122               HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3123               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3124               return res;
3125            } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3126               HReg res = newVRegD(env);
3127               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3128               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3129               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3130                                              res, argL, argR, 0, False));
3131               return res;
3132            } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3133               HReg res = newVRegD(env);
3134               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3135               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3136               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3137                                              res, argL, argR, 1, False));
3138               return res;
3139            } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3140               HReg res = newVRegD(env);
3141               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3142               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3143               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3144                                              res, argL, argR, 2, False));
3145               return res;
3146            } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3147               HReg res = newVRegD(env);
3148               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3149               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3150               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3151                                              res, argL, argR, 0, False));
3152               return res;
3153            } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3154               HReg res = newVRegD(env);
3155               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3156               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3157               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3158                                              res, argL, argR, 1, False));
3159               return res;
3160            } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3161               HReg res = newVRegD(env);
3162               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3163               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3164               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3165                                              res, argL, argR, 2, False));
3166               return res;
3167            } else {
3168               HReg res = newVRegD(env);
3169               HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3170               addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3171               return res;
3172            }
3173         }
3174         case Iop_Dup8x8:
3175         case Iop_Dup16x4:
3176         case Iop_Dup32x2: {
3177            HReg res, arg;
3178            UInt size;
3179            DECLARE_PATTERN(p_vdup_8x8);
3180            DECLARE_PATTERN(p_vdup_16x4);
3181            DECLARE_PATTERN(p_vdup_32x2);
3182            DEFINE_PATTERN(p_vdup_8x8,
3183                  unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3184            DEFINE_PATTERN(p_vdup_16x4,
3185                  unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3186            DEFINE_PATTERN(p_vdup_32x2,
3187                  unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3188            if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3189               UInt index;
3190               UInt imm4;
3191               if (mi.bindee[1]->tag == Iex_Const &&
3192                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3193                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3194                  imm4 = (index << 1) + 1;
3195                  if (index < 8) {
3196                     res = newVRegD(env);
3197                     arg = iselNeon64Expr(env, mi.bindee[0]);
3198                     addInstr(env, ARMInstr_NUnaryS(
3199                                      ARMneon_VDUP,
3200                                      mkARMNRS(ARMNRS_Reg, res, 0),
3201                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3202                                      imm4, False
3203                             ));
3204                     return res;
3205                  }
3206               }
3207            } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3208               UInt index;
3209               UInt imm4;
3210               if (mi.bindee[1]->tag == Iex_Const &&
3211                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3212                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3213                  imm4 = (index << 2) + 2;
3214                  if (index < 4) {
3215                     res = newVRegD(env);
3216                     arg = iselNeon64Expr(env, mi.bindee[0]);
3217                     addInstr(env, ARMInstr_NUnaryS(
3218                                      ARMneon_VDUP,
3219                                      mkARMNRS(ARMNRS_Reg, res, 0),
3220                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3221                                      imm4, False
3222                             ));
3223                     return res;
3224                  }
3225               }
3226            } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3227               UInt index;
3228               UInt imm4;
3229               if (mi.bindee[1]->tag == Iex_Const &&
3230                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3231                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3232                  imm4 = (index << 3) + 4;
3233                  if (index < 2) {
3234                     res = newVRegD(env);
3235                     arg = iselNeon64Expr(env, mi.bindee[0]);
3236                     addInstr(env, ARMInstr_NUnaryS(
3237                                      ARMneon_VDUP,
3238                                      mkARMNRS(ARMNRS_Reg, res, 0),
3239                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3240                                      imm4, False
3241                             ));
3242                     return res;
3243                  }
3244               }
3245            }
3246            arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3247            res = newVRegD(env);
3248            switch (e->Iex.Unop.op) {
3249               case Iop_Dup8x8: size = 0; break;
3250               case Iop_Dup16x4: size = 1; break;
3251               case Iop_Dup32x2: size = 2; break;
3252               default: vassert(0);
3253            }
3254            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3255            return res;
3256         }
3257         case Iop_Abs8x8:
3258         case Iop_Abs16x4:
3259         case Iop_Abs32x2: {
3260            HReg res = newVRegD(env);
3261            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3262            UInt size = 0;
3263            switch(e->Iex.Binop.op) {
3264               case Iop_Abs8x8: size = 0; break;
3265               case Iop_Abs16x4: size = 1; break;
3266               case Iop_Abs32x2: size = 2; break;
3267               default: vassert(0);
3268            }
3269            addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3270            return res;
3271         }
3272         case Iop_Reverse64_8x8:
3273         case Iop_Reverse64_16x4:
3274         case Iop_Reverse64_32x2: {
3275            HReg res = newVRegD(env);
3276            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3277            UInt size = 0;
3278            switch(e->Iex.Binop.op) {
3279               case Iop_Reverse64_8x8: size = 0; break;
3280               case Iop_Reverse64_16x4: size = 1; break;
3281               case Iop_Reverse64_32x2: size = 2; break;
3282               default: vassert(0);
3283            }
3284            addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3285                                          res, arg, size, False));
3286            return res;
3287         }
3288         case Iop_Reverse32_8x8:
3289         case Iop_Reverse32_16x4: {
3290            HReg res = newVRegD(env);
3291            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3292            UInt size = 0;
3293            switch(e->Iex.Binop.op) {
3294               case Iop_Reverse32_8x8: size = 0; break;
3295               case Iop_Reverse32_16x4: size = 1; break;
3296               default: vassert(0);
3297            }
3298            addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3299                                          res, arg, size, False));
3300            return res;
3301         }
3302         case Iop_Reverse16_8x8: {
3303            HReg res = newVRegD(env);
3304            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3305            UInt size = 0;
3306            addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3307                                          res, arg, size, False));
3308            return res;
3309         }
3310         case Iop_CmpwNEZ64: {
3311            HReg x_lsh = newVRegD(env);
3312            HReg x_rsh = newVRegD(env);
3313            HReg lsh_amt = newVRegD(env);
3314            HReg rsh_amt = newVRegD(env);
3315            HReg zero = newVRegD(env);
3316            HReg tmp = newVRegD(env);
3317            HReg tmp2 = newVRegD(env);
3318            HReg res = newVRegD(env);
3319            HReg x = newVRegD(env);
3320            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3321            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3322            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3323            addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3324            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3325            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3326                                           rsh_amt, zero, lsh_amt, 2, False));
3327            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3328                                          x_lsh, x, lsh_amt, 3, False));
3329            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3330                                          x_rsh, x, rsh_amt, 3, False));
3331            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3332                                           tmp, x_lsh, x_rsh, 0, False));
3333            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3334                                           res, tmp, x, 0, False));
3335            return res;
3336         }
3337         case Iop_CmpNEZ8x8:
3338         case Iop_CmpNEZ16x4:
3339         case Iop_CmpNEZ32x2: {
3340            HReg res = newVRegD(env);
3341            HReg tmp = newVRegD(env);
3342            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3343            UInt size;
3344            switch (e->Iex.Unop.op) {
3345               case Iop_CmpNEZ8x8: size = 0; break;
3346               case Iop_CmpNEZ16x4: size = 1; break;
3347               case Iop_CmpNEZ32x2: size = 2; break;
3348               default: vassert(0);
3349            }
3350            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
3351            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
3352            return res;
3353         }
3354         case Iop_Shorten16x8:
3355         case Iop_Shorten32x4:
3356         case Iop_Shorten64x2: {
3357            HReg res = newVRegD(env);
3358            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3359            UInt size = 0;
3360            switch(e->Iex.Binop.op) {
3361               case Iop_Shorten16x8: size = 0; break;
3362               case Iop_Shorten32x4: size = 1; break;
3363               case Iop_Shorten64x2: size = 2; break;
3364               default: vassert(0);
3365            }
3366            addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
3367                                          res, arg, size, False));
3368            return res;
3369         }
3370         case Iop_QShortenS16Sx8:
3371         case Iop_QShortenS32Sx4:
3372         case Iop_QShortenS64Sx2: {
3373            HReg res = newVRegD(env);
3374            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3375            UInt size = 0;
3376            switch(e->Iex.Binop.op) {
3377               case Iop_QShortenS16Sx8: size = 0; break;
3378               case Iop_QShortenS32Sx4: size = 1; break;
3379               case Iop_QShortenS64Sx2: size = 2; break;
3380               default: vassert(0);
3381            }
3382            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
3383                                          res, arg, size, False));
3384            return res;
3385         }
3386         case Iop_QShortenU16Sx8:
3387         case Iop_QShortenU32Sx4:
3388         case Iop_QShortenU64Sx2: {
3389            HReg res = newVRegD(env);
3390            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3391            UInt size = 0;
3392            switch(e->Iex.Binop.op) {
3393               case Iop_QShortenU16Sx8: size = 0; break;
3394               case Iop_QShortenU32Sx4: size = 1; break;
3395               case Iop_QShortenU64Sx2: size = 2; break;
3396               default: vassert(0);
3397            }
3398            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
3399                                          res, arg, size, False));
3400            return res;
3401         }
3402         case Iop_QShortenU16Ux8:
3403         case Iop_QShortenU32Ux4:
3404         case Iop_QShortenU64Ux2: {
3405            HReg res = newVRegD(env);
3406            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3407            UInt size = 0;
3408            switch(e->Iex.Binop.op) {
3409               case Iop_QShortenU16Ux8: size = 0; break;
3410               case Iop_QShortenU32Ux4: size = 1; break;
3411               case Iop_QShortenU64Ux2: size = 2; break;
3412               default: vassert(0);
3413            }
3414            addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
3415                                          res, arg, size, False));
3416            return res;
3417         }
3418         case Iop_PwAddL8Sx8:
3419         case Iop_PwAddL16Sx4:
3420         case Iop_PwAddL32Sx2: {
3421            HReg res = newVRegD(env);
3422            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3423            UInt size = 0;
3424            switch(e->Iex.Binop.op) {
3425               case Iop_PwAddL8Sx8: size = 0; break;
3426               case Iop_PwAddL16Sx4: size = 1; break;
3427               case Iop_PwAddL32Sx2: size = 2; break;
3428               default: vassert(0);
3429            }
3430            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
3431                                          res, arg, size, False));
3432            return res;
3433         }
3434         case Iop_PwAddL8Ux8:
3435         case Iop_PwAddL16Ux4:
3436         case Iop_PwAddL32Ux2: {
3437            HReg res = newVRegD(env);
3438            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3439            UInt size = 0;
3440            switch(e->Iex.Binop.op) {
3441               case Iop_PwAddL8Ux8: size = 0; break;
3442               case Iop_PwAddL16Ux4: size = 1; break;
3443               case Iop_PwAddL32Ux2: size = 2; break;
3444               default: vassert(0);
3445            }
3446            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
3447                                          res, arg, size, False));
3448            return res;
3449         }
3450         case Iop_Cnt8x8: {
3451            HReg res = newVRegD(env);
3452            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3453            UInt size = 0;
3454            addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
3455                                          res, arg, size, False));
3456            return res;
3457         }
3458         case Iop_Clz8Sx8:
3459         case Iop_Clz16Sx4:
3460         case Iop_Clz32Sx2: {
3461            HReg res = newVRegD(env);
3462            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3463            UInt size = 0;
3464            switch(e->Iex.Binop.op) {
3465               case Iop_Clz8Sx8: size = 0; break;
3466               case Iop_Clz16Sx4: size = 1; break;
3467               case Iop_Clz32Sx2: size = 2; break;
3468               default: vassert(0);
3469            }
3470            addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
3471                                          res, arg, size, False));
3472            return res;
3473         }
3474         case Iop_Cls8Sx8:
3475         case Iop_Cls16Sx4:
3476         case Iop_Cls32Sx2: {
3477            HReg res = newVRegD(env);
3478            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3479            UInt size = 0;
3480            switch(e->Iex.Binop.op) {
3481               case Iop_Cls8Sx8: size = 0; break;
3482               case Iop_Cls16Sx4: size = 1; break;
3483               case Iop_Cls32Sx2: size = 2; break;
3484               default: vassert(0);
3485            }
3486            addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
3487                                          res, arg, size, False));
3488            return res;
3489         }
3490         case Iop_FtoI32Sx2_RZ: {
3491            HReg res = newVRegD(env);
3492            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3493            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
3494                                          res, arg, 2, False));
3495            return res;
3496         }
3497         case Iop_FtoI32Ux2_RZ: {
3498            HReg res = newVRegD(env);
3499            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3500            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
3501                                          res, arg, 2, False));
3502            return res;
3503         }
3504         case Iop_I32StoFx2: {
3505            HReg res = newVRegD(env);
3506            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3507            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
3508                                          res, arg, 2, False));
3509            return res;
3510         }
3511         case Iop_I32UtoFx2: {
3512            HReg res = newVRegD(env);
3513            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3514            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
3515                                          res, arg, 2, False));
3516            return res;
3517         }
3518         case Iop_F32toF16x4: {
3519            HReg res = newVRegD(env);
3520            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3521            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
3522                                          res, arg, 2, False));
3523            return res;
3524         }
3525         case Iop_Recip32Fx2: {
3526            HReg res = newVRegD(env);
3527            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3528            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
3529                                          res, argL, 0, False));
3530            return res;
3531         }
3532         case Iop_Recip32x2: {
3533            HReg res = newVRegD(env);
3534            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3535            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
3536                                          res, argL, 0, False));
3537            return res;
3538         }
3539         case Iop_Abs32Fx2: {
3540            DECLARE_PATTERN(p_vabd_32fx2);
3541            DEFINE_PATTERN(p_vabd_32fx2,
3542                           unop(Iop_Abs32Fx2,
3543                                binop(Iop_Sub32Fx2,
3544                                      bind(0),
3545                                      bind(1))));
3546            if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
3547               HReg res = newVRegD(env);
3548               HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3549               HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3550               addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
3551                                              res, argL, argR, 0, False));
3552               return res;
3553            } else {
3554               HReg res = newVRegD(env);
3555               HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3556               addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
3557                                             res, arg, 0, False));
3558               return res;
3559            }
3560         }
3561         case Iop_Rsqrte32Fx2: {
3562            HReg res = newVRegD(env);
3563            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3564            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
3565                                          res, arg, 0, False));
3566            return res;
3567         }
3568         case Iop_Rsqrte32x2: {
3569            HReg res = newVRegD(env);
3570            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3571            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
3572                                          res, arg, 0, False));
3573            return res;
3574         }
3575         case Iop_Neg32Fx2: {
3576            HReg res = newVRegD(env);
3577            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3578            addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
3579                                          res, arg, 0, False));
3580            return res;
3581         }
3582         default:
3583            break;
3584      }
3585   } /* if (e->tag == Iex_Unop) */
3586
3587   if (e->tag == Iex_Triop) {
3588      switch (e->Iex.Triop.op) {
3589         case Iop_Extract64: {
3590            HReg res = newVRegD(env);
3591            HReg argL = iselNeon64Expr(env, e->Iex.Triop.arg1);
3592            HReg argR = iselNeon64Expr(env, e->Iex.Triop.arg2);
3593            UInt imm4;
3594            if (e->Iex.Triop.arg3->tag != Iex_Const ||
3595                typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
3596               vpanic("ARM target supports Iop_Extract64 with constant "
3597                      "third argument less than 16 only\n");
3598            }
3599            imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
3600            if (imm4 >= 8) {
3601               vpanic("ARM target supports Iop_Extract64 with constant "
3602                      "third argument less than 16 only\n");
3603            }
3604            addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
3605                                           res, argL, argR, imm4, False));
3606            return res;
3607         }
3608         case Iop_SetElem8x8:
3609         case Iop_SetElem16x4:
3610         case Iop_SetElem32x2: {
3611            HReg res = newVRegD(env);
3612            HReg dreg = iselNeon64Expr(env, e->Iex.Triop.arg1);
3613            HReg arg = iselIntExpr_R(env, e->Iex.Triop.arg3);
3614            UInt index, size;
3615            if (e->Iex.Triop.arg2->tag != Iex_Const ||
3616                typeOfIRExpr(env->type_env, e->Iex.Triop.arg2) != Ity_I8) {
3617               vpanic("ARM target supports SetElem with constant "
3618                      "second argument only\n");
3619            }
3620            index = e->Iex.Triop.arg2->Iex.Const.con->Ico.U8;
3621            switch (e->Iex.Triop.op) {
3622               case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
3623               case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
3624               case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
3625               default: vassert(0);
3626            }
3627            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
3628            addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
3629                                           mkARMNRS(ARMNRS_Scalar, res, index),
3630                                           mkARMNRS(ARMNRS_Reg, arg, 0),
3631                                           size, False));
3632            return res;
3633         }
3634         default:
3635            break;
3636      }
3637   }
3638
3639   /* --------- MULTIPLEX --------- */
3640   if (e->tag == Iex_Mux0X) {
3641      HReg rLo, rHi;
3642      HReg res = newVRegD(env);
3643      iselInt64Expr(&rHi, &rLo, env, e);
3644      addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3645      return res;
3646   }
3647
3648   ppIRExpr(e);
3649   vpanic("iselNeon64Expr");
3650}
3651
3652static HReg iselNeonExpr ( ISelEnv* env, IRExpr* e )
3653{
3654   HReg r = iselNeonExpr_wrk( env, e );
3655   vassert(hregClass(r) == HRcVec128);
3656   vassert(hregIsVirtual(r));
3657   return r;
3658}
3659
3660/* DO NOT CALL THIS DIRECTLY */
3661static HReg iselNeonExpr_wrk ( ISelEnv* env, IRExpr* e )
3662{
3663   IRType ty = typeOfIRExpr(env->type_env, e);
3664   MatchInfo mi;
3665   vassert(e);
3666   vassert(ty == Ity_V128);
3667
3668   if (e->tag == Iex_RdTmp) {
3669      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3670   }
3671
3672   if (e->tag == Iex_Const) {
3673      /* At the moment there should be no 128-bit constants in IR for ARM
3674         generated during disassemble. They are represented as Iop_64HLtoV128
3675         binary operation and are handled among binary ops. */
3676      /* But zero can be created by valgrind internal optimizer */
3677      if (e->Iex.Const.con->Ico.V128 == 0) {
3678         HReg res = newVRegV(env);
3679         addInstr(env, ARMInstr_NeonImm(res, ARMNImm_TI(0, 0)));
3680         return res;
3681      }
3682      ppIRExpr(e);
3683      vpanic("128-bit constant is not implemented");
3684   }
3685
3686   if (e->tag == Iex_Load) {
3687      HReg res = newVRegV(env);
3688      ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
3689      vassert(ty == Ity_V128);
3690      addInstr(env, ARMInstr_NLdStQ(True, res, am));
3691      return res;
3692   }
3693
3694   if (e->tag == Iex_Get) {
3695      HReg addr = newVRegI(env);
3696      HReg res = newVRegV(env);
3697      vassert(ty == Ity_V128);
3698      addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
3699      addInstr(env, ARMInstr_NLdStQ(True, res, mkARMAModeN_R(addr)));
3700      return res;
3701   }
3702
3703   if (e->tag == Iex_Unop) {
3704      switch (e->Iex.Unop.op) {
3705         case Iop_NotV128: {
3706            DECLARE_PATTERN(p_veqz_8x16);
3707            DECLARE_PATTERN(p_veqz_16x8);
3708            DECLARE_PATTERN(p_veqz_32x4);
3709            DECLARE_PATTERN(p_vcge_8sx16);
3710            DECLARE_PATTERN(p_vcge_16sx8);
3711            DECLARE_PATTERN(p_vcge_32sx4);
3712            DECLARE_PATTERN(p_vcge_8ux16);
3713            DECLARE_PATTERN(p_vcge_16ux8);
3714            DECLARE_PATTERN(p_vcge_32ux4);
3715            DEFINE_PATTERN(p_veqz_8x16,
3716                  unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
3717            DEFINE_PATTERN(p_veqz_16x8,
3718                  unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
3719            DEFINE_PATTERN(p_veqz_32x4,
3720                  unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
3721            DEFINE_PATTERN(p_vcge_8sx16,
3722                  unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
3723            DEFINE_PATTERN(p_vcge_16sx8,
3724                  unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
3725            DEFINE_PATTERN(p_vcge_32sx4,
3726                  unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
3727            DEFINE_PATTERN(p_vcge_8ux16,
3728                  unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
3729            DEFINE_PATTERN(p_vcge_16ux8,
3730                  unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
3731            DEFINE_PATTERN(p_vcge_32ux4,
3732                  unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
3733            if (matchIRExpr(&mi, p_veqz_8x16, e)) {
3734               HReg res = newVRegV(env);
3735               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3736               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
3737               return res;
3738            } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
3739               HReg res = newVRegV(env);
3740               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3741               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
3742               return res;
3743            } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
3744               HReg res = newVRegV(env);
3745               HReg arg = iselNeonExpr(env, mi.bindee[0]);
3746               addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
3747               return res;
3748            } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
3749               HReg res = newVRegV(env);
3750               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3751               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3752               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3753                                              res, argL, argR, 0, True));
3754               return res;
3755            } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
3756               HReg res = newVRegV(env);
3757               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3758               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3759               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3760                                              res, argL, argR, 1, True));
3761               return res;
3762            } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
3763               HReg res = newVRegV(env);
3764               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3765               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3766               addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3767                                              res, argL, argR, 2, True));
3768               return res;
3769            } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
3770               HReg res = newVRegV(env);
3771               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3772               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3773               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3774                                              res, argL, argR, 0, True));
3775               return res;
3776            } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
3777               HReg res = newVRegV(env);
3778               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3779               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3780               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3781                                              res, argL, argR, 1, True));
3782               return res;
3783            } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
3784               HReg res = newVRegV(env);
3785               HReg argL = iselNeonExpr(env, mi.bindee[0]);
3786               HReg argR = iselNeonExpr(env, mi.bindee[1]);
3787               addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3788                                              res, argL, argR, 2, True));
3789               return res;
3790            } else {
3791               HReg res = newVRegV(env);
3792               HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3793               addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
3794               return res;
3795            }
3796         }
3797         case Iop_Dup8x16:
3798         case Iop_Dup16x8:
3799         case Iop_Dup32x4: {
3800            HReg res, arg;
3801            UInt size;
3802            DECLARE_PATTERN(p_vdup_8x16);
3803            DECLARE_PATTERN(p_vdup_16x8);
3804            DECLARE_PATTERN(p_vdup_32x4);
3805            DEFINE_PATTERN(p_vdup_8x16,
3806                  unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
3807            DEFINE_PATTERN(p_vdup_16x8,
3808                  unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
3809            DEFINE_PATTERN(p_vdup_32x4,
3810                  unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
3811            if (matchIRExpr(&mi, p_vdup_8x16, e)) {
3812               UInt index;
3813               UInt imm4;
3814               if (mi.bindee[1]->tag == Iex_Const &&
3815                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3816                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3817                  imm4 = (index << 1) + 1;
3818                  if (index < 8) {
3819                     res = newVRegV(env);
3820                     arg = iselNeon64Expr(env, mi.bindee[0]);
3821                     addInstr(env, ARMInstr_NUnaryS(
3822                                      ARMneon_VDUP,
3823                                      mkARMNRS(ARMNRS_Reg, res, 0),
3824                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3825                                      imm4, True
3826                             ));
3827                     return res;
3828                  }
3829               }
3830            } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
3831               UInt index;
3832               UInt imm4;
3833               if (mi.bindee[1]->tag == Iex_Const &&
3834                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3835                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3836                  imm4 = (index << 2) + 2;
3837                  if (index < 4) {
3838                     res = newVRegV(env);
3839                     arg = iselNeon64Expr(env, mi.bindee[0]);
3840                     addInstr(env, ARMInstr_NUnaryS(
3841                                      ARMneon_VDUP,
3842                                      mkARMNRS(ARMNRS_Reg, res, 0),
3843                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3844                                      imm4, True
3845                             ));
3846                     return res;
3847                  }
3848               }
3849            } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
3850               UInt index;
3851               UInt imm4;
3852               if (mi.bindee[1]->tag == Iex_Const &&
3853                  typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3854                  index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3855                  imm4 = (index << 3) + 4;
3856                  if (index < 2) {
3857                     res = newVRegV(env);
3858                     arg = iselNeon64Expr(env, mi.bindee[0]);
3859                     addInstr(env, ARMInstr_NUnaryS(
3860                                      ARMneon_VDUP,
3861                                      mkARMNRS(ARMNRS_Reg, res, 0),
3862                                      mkARMNRS(ARMNRS_Scalar, arg, index),
3863                                      imm4, True
3864                             ));
3865                     return res;
3866                  }
3867               }
3868            }
3869            arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3870            res = newVRegV(env);
3871            switch (e->Iex.Unop.op) {
3872               case Iop_Dup8x16: size = 0; break;
3873               case Iop_Dup16x8: size = 1; break;
3874               case Iop_Dup32x4: size = 2; break;
3875               default: vassert(0);
3876            }
3877            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
3878            return res;
3879         }
3880         case Iop_Abs8x16:
3881         case Iop_Abs16x8:
3882         case Iop_Abs32x4: {
3883            HReg res = newVRegV(env);
3884            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3885            UInt size = 0;
3886            switch(e->Iex.Binop.op) {
3887               case Iop_Abs8x16: size = 0; break;
3888               case Iop_Abs16x8: size = 1; break;
3889               case Iop_Abs32x4: size = 2; break;
3890               default: vassert(0);
3891            }
3892            addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
3893            return res;
3894         }
3895         case Iop_Reverse64_8x16:
3896         case Iop_Reverse64_16x8:
3897         case Iop_Reverse64_32x4: {
3898            HReg res = newVRegV(env);
3899            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3900            UInt size = 0;
3901            switch(e->Iex.Binop.op) {
3902               case Iop_Reverse64_8x16: size = 0; break;
3903               case Iop_Reverse64_16x8: size = 1; break;
3904               case Iop_Reverse64_32x4: size = 2; break;
3905               default: vassert(0);
3906            }
3907            addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3908                                          res, arg, size, True));
3909            return res;
3910         }
3911         case Iop_Reverse32_8x16:
3912         case Iop_Reverse32_16x8: {
3913            HReg res = newVRegV(env);
3914            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3915            UInt size = 0;
3916            switch(e->Iex.Binop.op) {
3917               case Iop_Reverse32_8x16: size = 0; break;
3918               case Iop_Reverse32_16x8: size = 1; break;
3919               default: vassert(0);
3920            }
3921            addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3922                                          res, arg, size, True));
3923            return res;
3924         }
3925         case Iop_Reverse16_8x16: {
3926            HReg res = newVRegV(env);
3927            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3928            UInt size = 0;
3929            addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3930                                          res, arg, size, True));
3931            return res;
3932         }
3933         case Iop_CmpNEZ64x2: {
3934            HReg x_lsh = newVRegV(env);
3935            HReg x_rsh = newVRegV(env);
3936            HReg lsh_amt = newVRegV(env);
3937            HReg rsh_amt = newVRegV(env);
3938            HReg zero = newVRegV(env);
3939            HReg tmp = newVRegV(env);
3940            HReg tmp2 = newVRegV(env);
3941            HReg res = newVRegV(env);
3942            HReg x = newVRegV(env);
3943            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3944            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
3945            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
3946            addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3947            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3948            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3949                                           rsh_amt, zero, lsh_amt, 2, True));
3950            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3951                                          x_lsh, x, lsh_amt, 3, True));
3952            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3953                                          x_rsh, x, rsh_amt, 3, True));
3954            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3955                                           tmp, x_lsh, x_rsh, 0, True));
3956            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3957                                           res, tmp, x, 0, True));
3958            return res;
3959         }
3960         case Iop_CmpNEZ8x16:
3961         case Iop_CmpNEZ16x8:
3962         case Iop_CmpNEZ32x4: {
3963            HReg res = newVRegV(env);
3964            HReg tmp = newVRegV(env);
3965            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
3966            UInt size;
3967            switch (e->Iex.Unop.op) {
3968               case Iop_CmpNEZ8x16: size = 0; break;
3969               case Iop_CmpNEZ16x8: size = 1; break;
3970               case Iop_CmpNEZ32x4: size = 2; break;
3971               default: vassert(0);
3972            }
3973            addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, True));
3974            addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
3975            return res;
3976         }
3977         case Iop_Longen8Ux8:
3978         case Iop_Longen16Ux4:
3979         case Iop_Longen32Ux2: {
3980            HReg res = newVRegV(env);
3981            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3982            UInt size;
3983            switch (e->Iex.Unop.op) {
3984               case Iop_Longen8Ux8: size = 0; break;
3985               case Iop_Longen16Ux4: size = 1; break;
3986               case Iop_Longen32Ux2: size = 2; break;
3987               default: vassert(0);
3988            }
3989            addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
3990                                          res, arg, size, True));
3991            return res;
3992         }
3993         case Iop_Longen8Sx8:
3994         case Iop_Longen16Sx4:
3995         case Iop_Longen32Sx2: {
3996            HReg res = newVRegV(env);
3997            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3998            UInt size;
3999            switch (e->Iex.Unop.op) {
4000               case Iop_Longen8Sx8: size = 0; break;
4001               case Iop_Longen16Sx4: size = 1; break;
4002               case Iop_Longen32Sx2: size = 2; break;
4003               default: vassert(0);
4004            }
4005            addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4006                                          res, arg, size, True));
4007            return res;
4008         }
4009         case Iop_PwAddL8Sx16:
4010         case Iop_PwAddL16Sx8:
4011         case Iop_PwAddL32Sx4: {
4012            HReg res = newVRegV(env);
4013            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4014            UInt size = 0;
4015            switch(e->Iex.Binop.op) {
4016               case Iop_PwAddL8Sx16: size = 0; break;
4017               case Iop_PwAddL16Sx8: size = 1; break;
4018               case Iop_PwAddL32Sx4: size = 2; break;
4019               default: vassert(0);
4020            }
4021            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4022                                          res, arg, size, True));
4023            return res;
4024         }
4025         case Iop_PwAddL8Ux16:
4026         case Iop_PwAddL16Ux8:
4027         case Iop_PwAddL32Ux4: {
4028            HReg res = newVRegV(env);
4029            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4030            UInt size = 0;
4031            switch(e->Iex.Binop.op) {
4032               case Iop_PwAddL8Ux16: size = 0; break;
4033               case Iop_PwAddL16Ux8: size = 1; break;
4034               case Iop_PwAddL32Ux4: size = 2; break;
4035               default: vassert(0);
4036            }
4037            addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4038                                          res, arg, size, True));
4039            return res;
4040         }
4041         case Iop_Cnt8x16: {
4042            HReg res = newVRegV(env);
4043            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4044            UInt size = 0;
4045            addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4046            return res;
4047         }
4048         case Iop_Clz8Sx16:
4049         case Iop_Clz16Sx8:
4050         case Iop_Clz32Sx4: {
4051            HReg res = newVRegV(env);
4052            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4053            UInt size = 0;
4054            switch(e->Iex.Binop.op) {
4055               case Iop_Clz8Sx16: size = 0; break;
4056               case Iop_Clz16Sx8: size = 1; break;
4057               case Iop_Clz32Sx4: size = 2; break;
4058               default: vassert(0);
4059            }
4060            addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4061            return res;
4062         }
4063         case Iop_Cls8Sx16:
4064         case Iop_Cls16Sx8:
4065         case Iop_Cls32Sx4: {
4066            HReg res = newVRegV(env);
4067            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4068            UInt size = 0;
4069            switch(e->Iex.Binop.op) {
4070               case Iop_Cls8Sx16: size = 0; break;
4071               case Iop_Cls16Sx8: size = 1; break;
4072               case Iop_Cls32Sx4: size = 2; break;
4073               default: vassert(0);
4074            }
4075            addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4076            return res;
4077         }
4078         case Iop_FtoI32Sx4_RZ: {
4079            HReg res = newVRegV(env);
4080            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4081            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4082                                          res, arg, 2, True));
4083            return res;
4084         }
4085         case Iop_FtoI32Ux4_RZ: {
4086            HReg res = newVRegV(env);
4087            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4088            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4089                                          res, arg, 2, True));
4090            return res;
4091         }
4092         case Iop_I32StoFx4: {
4093            HReg res = newVRegV(env);
4094            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4095            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4096                                          res, arg, 2, True));
4097            return res;
4098         }
4099         case Iop_I32UtoFx4: {
4100            HReg res = newVRegV(env);
4101            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4102            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4103                                          res, arg, 2, True));
4104            return res;
4105         }
4106         case Iop_F16toF32x4: {
4107            HReg res = newVRegV(env);
4108            HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4109            addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4110                                          res, arg, 2, True));
4111            return res;
4112         }
4113         case Iop_Recip32Fx4: {
4114            HReg res = newVRegV(env);
4115            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4116            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4117                                          res, argL, 0, True));
4118            return res;
4119         }
4120         case Iop_Recip32x4: {
4121            HReg res = newVRegV(env);
4122            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4123            addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4124                                          res, argL, 0, True));
4125            return res;
4126         }
4127         case Iop_Abs32Fx4: {
4128            DECLARE_PATTERN(p_vabd_32fx4);
4129            DEFINE_PATTERN(p_vabd_32fx4,
4130                           unop(Iop_Abs32Fx4,
4131                                binop(Iop_Sub32Fx4,
4132                                      bind(0),
4133                                      bind(1))));
4134            if (matchIRExpr(&mi, p_vabd_32fx4, e)) {
4135               HReg res = newVRegV(env);
4136               HReg argL = iselNeonExpr(env, mi.bindee[0]);
4137               HReg argR = iselNeonExpr(env, mi.bindee[1]);
4138               addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4139                                              res, argL, argR, 0, True));
4140               return res;
4141            } else {
4142               HReg res = newVRegV(env);
4143               HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4144               addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4145                                             res, argL, 0, True));
4146               return res;
4147            }
4148         }
4149         case Iop_Rsqrte32Fx4: {
4150            HReg res = newVRegV(env);
4151            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4152            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4153                                          res, argL, 0, True));
4154            return res;
4155         }
4156         case Iop_Rsqrte32x4: {
4157            HReg res = newVRegV(env);
4158            HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4159            addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4160                                          res, argL, 0, True));
4161            return res;
4162         }
4163         case Iop_Neg32Fx4: {
4164            HReg res = newVRegV(env);
4165            HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4166            addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4167                                          res, arg, 0, True));
4168            return res;
4169         }
4170         /* ... */
4171         default:
4172            break;
4173      }
4174   }
4175
4176   if (e->tag == Iex_Binop) {
4177      switch (e->Iex.Binop.op) {
4178         case Iop_64HLtoV128:
4179            /* Try to match into single "VMOV reg, imm" instruction */
4180            if (e->Iex.Binop.arg1->tag == Iex_Const &&
4181                e->Iex.Binop.arg2->tag == Iex_Const &&
4182                typeOfIRExpr(env->type_env, e->Iex.Binop.arg1) == Ity_I64 &&
4183                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) == Ity_I64 &&
4184                e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 ==
4185                           e->Iex.Binop.arg2->Iex.Const.con->Ico.U64) {
4186               ULong imm64 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
4187               ARMNImm *imm = Imm64_to_ARMNImm(imm64);
4188               if (imm) {
4189                  HReg res = newVRegV(env);
4190                  addInstr(env, ARMInstr_NeonImm(res, imm));
4191                  return res;
4192               }
4193               if ((imm64 >> 32) == 0LL &&
4194                   (imm = Imm64_to_ARMNImm(imm64 | (imm64 << 32))) != NULL) {
4195                  HReg tmp1 = newVRegV(env);
4196                  HReg tmp2 = newVRegV(env);
4197                  HReg res = newVRegV(env);
4198                  if (imm->type < 10) {
4199                     addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0x0f)));
4200                     addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4201                     addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4202                                                    res, tmp1, tmp2, 4, True));
4203                     return res;
4204                  }
4205               }
4206               if ((imm64 & 0xFFFFFFFFLL) == 0LL &&
4207                   (imm = Imm64_to_ARMNImm(imm64 | (imm64 >> 32))) != NULL) {
4208                  HReg tmp1 = newVRegV(env);
4209                  HReg tmp2 = newVRegV(env);
4210                  HReg res = newVRegV(env);
4211                  if (imm->type < 10) {
4212                     addInstr(env, ARMInstr_NeonImm(tmp1, ARMNImm_TI(9,0xf0)));
4213                     addInstr(env, ARMInstr_NeonImm(tmp2, imm));
4214                     addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4215                                                    res, tmp1, tmp2, 4, True));
4216                     return res;
4217                  }
4218               }
4219            }
4220            /* Does not match "VMOV Reg, Imm" form */
4221            goto neon_expr_bad;
4222         case Iop_AndV128: {
4223            HReg res = newVRegV(env);
4224            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4225            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4226            addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4227                                           res, argL, argR, 4, True));
4228            return res;
4229         }
4230         case Iop_OrV128: {
4231            HReg res = newVRegV(env);
4232            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4233            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4234            addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4235                                           res, argL, argR, 4, True));
4236            return res;
4237         }
4238         case Iop_XorV128: {
4239            HReg res = newVRegV(env);
4240            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4241            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4242            addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4243                                           res, argL, argR, 4, True));
4244            return res;
4245         }
4246         case Iop_Add8x16:
4247         case Iop_Add16x8:
4248         case Iop_Add32x4:
4249         case Iop_Add64x2: {
4250            /*
4251            FIXME: remove this if not used
4252            DECLARE_PATTERN(p_vrhadd_32sx4);
4253            ULong one = (1LL << 32) | 1LL;
4254            DEFINE_PATTERN(p_vrhadd_32sx4,
4255                  binop(Iop_Add32x4,
4256                        binop(Iop_Add32x4,
4257                              binop(Iop_SarN32x4,
4258                                    bind(0),
4259                                    mkU8(1)),
4260                              binop(Iop_SarN32x4,
4261                                    bind(1),
4262                                    mkU8(1))),
4263                        binop(Iop_SarN32x4,
4264                              binop(Iop_Add32x4,
4265                                    binop(Iop_Add32x4,
4266                                          binop(Iop_AndV128,
4267                                                bind(0),
4268                                                mkU128(one)),
4269                                          binop(Iop_AndV128,
4270                                                bind(1),
4271                                                mkU128(one))),
4272                                    mkU128(one)),
4273                              mkU8(1))));
4274            */
4275            HReg res = newVRegV(env);
4276            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4277            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4278            UInt size;
4279            switch (e->Iex.Binop.op) {
4280               case Iop_Add8x16: size = 0; break;
4281               case Iop_Add16x8: size = 1; break;
4282               case Iop_Add32x4: size = 2; break;
4283               case Iop_Add64x2: size = 3; break;
4284               default:
4285                  ppIROp(e->Iex.Binop.op);
4286                  vpanic("Illegal element size in VADD");
4287            }
4288            addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
4289                                           res, argL, argR, size, True));
4290            return res;
4291         }
4292         case Iop_Add32Fx4: {
4293            HReg res = newVRegV(env);
4294            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4295            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4296            UInt size = 0;
4297            addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
4298                                           res, argL, argR, size, True));
4299            return res;
4300         }
4301         case Iop_Recps32Fx4: {
4302            HReg res = newVRegV(env);
4303            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4304            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4305            UInt size = 0;
4306            addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
4307                                           res, argL, argR, size, True));
4308            return res;
4309         }
4310         case Iop_Rsqrts32Fx4: {
4311            HReg res = newVRegV(env);
4312            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4313            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4314            UInt size = 0;
4315            addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
4316                                           res, argL, argR, size, True));
4317            return res;
4318         }
4319         case Iop_InterleaveEvenLanes8x16:
4320         case Iop_InterleaveEvenLanes16x8:
4321         case Iop_InterleaveEvenLanes32x4:
4322         case Iop_InterleaveOddLanes8x16:
4323         case Iop_InterleaveOddLanes16x8:
4324         case Iop_InterleaveOddLanes32x4: {
4325            HReg tmp = newVRegV(env);
4326            HReg res = newVRegV(env);
4327            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4328            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4329            UInt size;
4330            UInt is_lo;
4331            switch (e->Iex.Binop.op) {
4332               case Iop_InterleaveEvenLanes8x16: is_lo = 0; size = 0; break;
4333               case Iop_InterleaveOddLanes8x16: is_lo = 1; size = 0; break;
4334               case Iop_InterleaveEvenLanes16x8: is_lo = 0; size = 1; break;
4335               case Iop_InterleaveOddLanes16x8: is_lo = 1; size = 1; break;
4336               case Iop_InterleaveEvenLanes32x4: is_lo = 0; size = 2; break;
4337               case Iop_InterleaveOddLanes32x4: is_lo = 1; size = 2; break;
4338               default:
4339                  ppIROp(e->Iex.Binop.op);
4340                  vpanic("Illegal element size in VTRN");
4341            }
4342            if (is_lo) {
4343               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4344                                             tmp, argL, 4, True));
4345               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4346                                             res, argR, 4, True));
4347               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4348                                            res, tmp, size, True));
4349            } else {
4350               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4351                                             tmp, argR, 4, True));
4352               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4353                                             res, argL, 4, True));
4354               addInstr(env, ARMInstr_NDual(ARMneon_TRN,
4355                                            tmp, res, size, True));
4356            }
4357            return res;
4358         }
4359         case Iop_InterleaveHI8x16:
4360         case Iop_InterleaveHI16x8:
4361         case Iop_InterleaveHI32x4:
4362         case Iop_InterleaveLO8x16:
4363         case Iop_InterleaveLO16x8:
4364         case Iop_InterleaveLO32x4: {
4365            HReg tmp = newVRegV(env);
4366            HReg res = newVRegV(env);
4367            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4368            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4369            UInt size;
4370            UInt is_lo;
4371            switch (e->Iex.Binop.op) {
4372               case Iop_InterleaveHI8x16: is_lo = 1; size = 0; break;
4373               case Iop_InterleaveLO8x16: is_lo = 0; size = 0; break;
4374               case Iop_InterleaveHI16x8: is_lo = 1; size = 1; break;
4375               case Iop_InterleaveLO16x8: is_lo = 0; size = 1; break;
4376               case Iop_InterleaveHI32x4: is_lo = 1; size = 2; break;
4377               case Iop_InterleaveLO32x4: is_lo = 0; size = 2; break;
4378               default:
4379                  ppIROp(e->Iex.Binop.op);
4380                  vpanic("Illegal element size in VZIP");
4381            }
4382            if (is_lo) {
4383               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4384                                             tmp, argL, 4, True));
4385               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4386                                             res, argR, 4, True));
4387               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4388                                            res, tmp, size, True));
4389            } else {
4390               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4391                                             tmp, argR, 4, True));
4392               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4393                                             res, argL, 4, True));
4394               addInstr(env, ARMInstr_NDual(ARMneon_ZIP,
4395                                            tmp, res, size, True));
4396            }
4397            return res;
4398         }
4399         case Iop_CatOddLanes8x16:
4400         case Iop_CatOddLanes16x8:
4401         case Iop_CatOddLanes32x4:
4402         case Iop_CatEvenLanes8x16:
4403         case Iop_CatEvenLanes16x8:
4404         case Iop_CatEvenLanes32x4: {
4405            HReg tmp = newVRegV(env);
4406            HReg res = newVRegV(env);
4407            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4408            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4409            UInt size;
4410            UInt is_lo;
4411            switch (e->Iex.Binop.op) {
4412               case Iop_CatOddLanes8x16: is_lo = 1; size = 0; break;
4413               case Iop_CatEvenLanes8x16: is_lo = 0; size = 0; break;
4414               case Iop_CatOddLanes16x8: is_lo = 1; size = 1; break;
4415               case Iop_CatEvenLanes16x8: is_lo = 0; size = 1; break;
4416               case Iop_CatOddLanes32x4: is_lo = 1; size = 2; break;
4417               case Iop_CatEvenLanes32x4: is_lo = 0; size = 2; break;
4418               default:
4419                  ppIROp(e->Iex.Binop.op);
4420                  vpanic("Illegal element size in VUZP");
4421            }
4422            if (is_lo) {
4423               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4424                                             tmp, argL, 4, True));
4425               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4426                                             res, argR, 4, True));
4427               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4428                                            res, tmp, size, True));
4429            } else {
4430               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4431                                             tmp, argR, 4, True));
4432               addInstr(env, ARMInstr_NUnary(ARMneon_COPY,
4433                                             res, argL, 4, True));
4434               addInstr(env, ARMInstr_NDual(ARMneon_UZP,
4435                                            tmp, res, size, True));
4436            }
4437            return res;
4438         }
4439         case Iop_QAdd8Ux16:
4440         case Iop_QAdd16Ux8:
4441         case Iop_QAdd32Ux4:
4442         case Iop_QAdd64Ux2: {
4443            HReg res = newVRegV(env);
4444            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4445            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4446            UInt size;
4447            switch (e->Iex.Binop.op) {
4448               case Iop_QAdd8Ux16: size = 0; break;
4449               case Iop_QAdd16Ux8: size = 1; break;
4450               case Iop_QAdd32Ux4: size = 2; break;
4451               case Iop_QAdd64Ux2: size = 3; break;
4452               default:
4453                  ppIROp(e->Iex.Binop.op);
4454                  vpanic("Illegal element size in VQADDU");
4455            }
4456            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
4457                                           res, argL, argR, size, True));
4458            return res;
4459         }
4460         case Iop_QAdd8Sx16:
4461         case Iop_QAdd16Sx8:
4462         case Iop_QAdd32Sx4:
4463         case Iop_QAdd64Sx2: {
4464            HReg res = newVRegV(env);
4465            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4466            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4467            UInt size;
4468            switch (e->Iex.Binop.op) {
4469               case Iop_QAdd8Sx16: size = 0; break;
4470               case Iop_QAdd16Sx8: size = 1; break;
4471               case Iop_QAdd32Sx4: size = 2; break;
4472               case Iop_QAdd64Sx2: size = 3; break;
4473               default:
4474                  ppIROp(e->Iex.Binop.op);
4475                  vpanic("Illegal element size in VQADDS");
4476            }
4477            addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
4478                                           res, argL, argR, size, True));
4479            return res;
4480         }
4481         case Iop_Sub8x16:
4482         case Iop_Sub16x8:
4483         case Iop_Sub32x4:
4484         case Iop_Sub64x2: {
4485            HReg res = newVRegV(env);
4486            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4487            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4488            UInt size;
4489            switch (e->Iex.Binop.op) {
4490               case Iop_Sub8x16: size = 0; break;
4491               case Iop_Sub16x8: size = 1; break;
4492               case Iop_Sub32x4: size = 2; break;
4493               case Iop_Sub64x2: size = 3; break;
4494               default:
4495                  ppIROp(e->Iex.Binop.op);
4496                  vpanic("Illegal element size in VSUB");
4497            }
4498            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4499                                           res, argL, argR, size, True));
4500            return res;
4501         }
4502         case Iop_Sub32Fx4: {
4503            HReg res = newVRegV(env);
4504            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4505            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4506            UInt size = 0;
4507            addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
4508                                           res, argL, argR, size, True));
4509            return res;
4510         }
4511         case Iop_QSub8Ux16:
4512         case Iop_QSub16Ux8:
4513         case Iop_QSub32Ux4:
4514         case Iop_QSub64Ux2: {
4515            HReg res = newVRegV(env);
4516            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4517            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4518            UInt size;
4519            switch (e->Iex.Binop.op) {
4520               case Iop_QSub8Ux16: size = 0; break;
4521               case Iop_QSub16Ux8: size = 1; break;
4522               case Iop_QSub32Ux4: size = 2; break;
4523               case Iop_QSub64Ux2: size = 3; break;
4524               default:
4525                  ppIROp(e->Iex.Binop.op);
4526                  vpanic("Illegal element size in VQSUBU");
4527            }
4528            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
4529                                           res, argL, argR, size, True));
4530            return res;
4531         }
4532         case Iop_QSub8Sx16:
4533         case Iop_QSub16Sx8:
4534         case Iop_QSub32Sx4:
4535         case Iop_QSub64Sx2: {
4536            HReg res = newVRegV(env);
4537            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4538            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4539            UInt size;
4540            switch (e->Iex.Binop.op) {
4541               case Iop_QSub8Sx16: size = 0; break;
4542               case Iop_QSub16Sx8: size = 1; break;
4543               case Iop_QSub32Sx4: size = 2; break;
4544               case Iop_QSub64Sx2: size = 3; break;
4545               default:
4546                  ppIROp(e->Iex.Binop.op);
4547                  vpanic("Illegal element size in VQSUBS");
4548            }
4549            addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
4550                                           res, argL, argR, size, True));
4551            return res;
4552         }
4553         case Iop_Max8Ux16:
4554         case Iop_Max16Ux8:
4555         case Iop_Max32Ux4: {
4556            HReg res = newVRegV(env);
4557            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4558            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4559            UInt size;
4560            switch (e->Iex.Binop.op) {
4561               case Iop_Max8Ux16: size = 0; break;
4562               case Iop_Max16Ux8: size = 1; break;
4563               case Iop_Max32Ux4: size = 2; break;
4564               default: vpanic("Illegal element size in VMAXU");
4565            }
4566            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
4567                                           res, argL, argR, size, True));
4568            return res;
4569         }
4570         case Iop_Max8Sx16:
4571         case Iop_Max16Sx8:
4572         case Iop_Max32Sx4: {
4573            HReg res = newVRegV(env);
4574            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4575            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4576            UInt size;
4577            switch (e->Iex.Binop.op) {
4578               case Iop_Max8Sx16: size = 0; break;
4579               case Iop_Max16Sx8: size = 1; break;
4580               case Iop_Max32Sx4: size = 2; break;
4581               default: vpanic("Illegal element size in VMAXU");
4582            }
4583            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
4584                                           res, argL, argR, size, True));
4585            return res;
4586         }
4587         case Iop_Min8Ux16:
4588         case Iop_Min16Ux8:
4589         case Iop_Min32Ux4: {
4590            HReg res = newVRegV(env);
4591            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4592            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4593            UInt size;
4594            switch (e->Iex.Binop.op) {
4595               case Iop_Min8Ux16: size = 0; break;
4596               case Iop_Min16Ux8: size = 1; break;
4597               case Iop_Min32Ux4: size = 2; break;
4598               default: vpanic("Illegal element size in VMAXU");
4599            }
4600            addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
4601                                           res, argL, argR, size, True));
4602            return res;
4603         }
4604         case Iop_Min8Sx16:
4605         case Iop_Min16Sx8:
4606         case Iop_Min32Sx4: {
4607            HReg res = newVRegV(env);
4608            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4609            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4610            UInt size;
4611            switch (e->Iex.Binop.op) {
4612               case Iop_Min8Sx16: size = 0; break;
4613               case Iop_Min16Sx8: size = 1; break;
4614               case Iop_Min32Sx4: size = 2; break;
4615               default: vpanic("Illegal element size in VMAXU");
4616            }
4617            addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
4618                                           res, argL, argR, size, True));
4619            return res;
4620         }
4621         case Iop_Sar8x16:
4622         case Iop_Sar16x8:
4623         case Iop_Sar32x4:
4624         case Iop_Sar64x2: {
4625            HReg res = newVRegV(env);
4626            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4627            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4628            HReg argR2 = newVRegV(env);
4629            HReg zero = newVRegV(env);
4630            UInt size;
4631            switch (e->Iex.Binop.op) {
4632               case Iop_Sar8x16: size = 0; break;
4633               case Iop_Sar16x8: size = 1; break;
4634               case Iop_Sar32x4: size = 2; break;
4635               case Iop_Sar64x2: size = 3; break;
4636               default: vassert(0);
4637            }
4638            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4639            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4640                                           argR2, zero, argR, size, True));
4641            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4642                                          res, argL, argR2, size, True));
4643            return res;
4644         }
4645         case Iop_Sal8x16:
4646         case Iop_Sal16x8:
4647         case Iop_Sal32x4:
4648         case Iop_Sal64x2: {
4649            HReg res = newVRegV(env);
4650            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4651            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4652            UInt size;
4653            switch (e->Iex.Binop.op) {
4654               case Iop_Sal8x16: size = 0; break;
4655               case Iop_Sal16x8: size = 1; break;
4656               case Iop_Sal32x4: size = 2; break;
4657               case Iop_Sal64x2: size = 3; break;
4658               default: vassert(0);
4659            }
4660            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4661                                          res, argL, argR, size, True));
4662            return res;
4663         }
4664         case Iop_Shr8x16:
4665         case Iop_Shr16x8:
4666         case Iop_Shr32x4:
4667         case Iop_Shr64x2: {
4668            HReg res = newVRegV(env);
4669            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4670            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4671            HReg argR2 = newVRegV(env);
4672            HReg zero = newVRegV(env);
4673            UInt size;
4674            switch (e->Iex.Binop.op) {
4675               case Iop_Shr8x16: size = 0; break;
4676               case Iop_Shr16x8: size = 1; break;
4677               case Iop_Shr32x4: size = 2; break;
4678               case Iop_Shr64x2: size = 3; break;
4679               default: vassert(0);
4680            }
4681            addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
4682            addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4683                                           argR2, zero, argR, size, True));
4684            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4685                                          res, argL, argR2, size, True));
4686            return res;
4687         }
4688         case Iop_Shl8x16:
4689         case Iop_Shl16x8:
4690         case Iop_Shl32x4:
4691         case Iop_Shl64x2: {
4692            HReg res = newVRegV(env);
4693            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4694            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4695            UInt size;
4696            switch (e->Iex.Binop.op) {
4697               case Iop_Shl8x16: size = 0; break;
4698               case Iop_Shl16x8: size = 1; break;
4699               case Iop_Shl32x4: size = 2; break;
4700               case Iop_Shl64x2: size = 3; break;
4701               default: vassert(0);
4702            }
4703            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4704                                          res, argL, argR, size, True));
4705            return res;
4706         }
4707         case Iop_QShl8x16:
4708         case Iop_QShl16x8:
4709         case Iop_QShl32x4:
4710         case Iop_QShl64x2: {
4711            HReg res = newVRegV(env);
4712            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4713            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4714            UInt size;
4715            switch (e->Iex.Binop.op) {
4716               case Iop_QShl8x16: size = 0; break;
4717               case Iop_QShl16x8: size = 1; break;
4718               case Iop_QShl32x4: size = 2; break;
4719               case Iop_QShl64x2: size = 3; break;
4720               default: vassert(0);
4721            }
4722            addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
4723                                          res, argL, argR, size, True));
4724            return res;
4725         }
4726         case Iop_QSal8x16:
4727         case Iop_QSal16x8:
4728         case Iop_QSal32x4:
4729         case Iop_QSal64x2: {
4730            HReg res = newVRegV(env);
4731            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4732            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4733            UInt size;
4734            switch (e->Iex.Binop.op) {
4735               case Iop_QSal8x16: size = 0; break;
4736               case Iop_QSal16x8: size = 1; break;
4737               case Iop_QSal32x4: size = 2; break;
4738               case Iop_QSal64x2: size = 3; break;
4739               default: vassert(0);
4740            }
4741            addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
4742                                          res, argL, argR, size, True));
4743            return res;
4744         }
4745         case Iop_QShlN8x16:
4746         case Iop_QShlN16x8:
4747         case Iop_QShlN32x4:
4748         case Iop_QShlN64x2: {
4749            HReg res = newVRegV(env);
4750            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4751            UInt size, imm;
4752            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4753                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4754               vpanic("ARM taget supports Iop_QShlNAxB with constant "
4755                      "second argument only\n");
4756            }
4757            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4758            switch (e->Iex.Binop.op) {
4759               case Iop_QShlN8x16: size = 8 | imm; break;
4760               case Iop_QShlN16x8: size = 16 | imm; break;
4761               case Iop_QShlN32x4: size = 32 | imm; break;
4762               case Iop_QShlN64x2: size = 64 | imm; break;
4763               default: vassert(0);
4764            }
4765            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
4766                                          res, argL, size, True));
4767            return res;
4768         }
4769         case Iop_QShlN8Sx16:
4770         case Iop_QShlN16Sx8:
4771         case Iop_QShlN32Sx4:
4772         case Iop_QShlN64Sx2: {
4773            HReg res = newVRegV(env);
4774            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4775            UInt size, imm;
4776            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4777                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4778               vpanic("ARM taget supports Iop_QShlNASxB with constant "
4779                      "second argument only\n");
4780            }
4781            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4782            switch (e->Iex.Binop.op) {
4783               case Iop_QShlN8Sx16: size = 8 | imm; break;
4784               case Iop_QShlN16Sx8: size = 16 | imm; break;
4785               case Iop_QShlN32Sx4: size = 32 | imm; break;
4786               case Iop_QShlN64Sx2: size = 64 | imm; break;
4787               default: vassert(0);
4788            }
4789            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
4790                                          res, argL, size, True));
4791            return res;
4792         }
4793         case Iop_QSalN8x16:
4794         case Iop_QSalN16x8:
4795         case Iop_QSalN32x4:
4796         case Iop_QSalN64x2: {
4797            HReg res = newVRegV(env);
4798            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4799            UInt size, imm;
4800            if (e->Iex.Binop.arg2->tag != Iex_Const ||
4801                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
4802               vpanic("ARM taget supports Iop_QShlNAxB with constant "
4803                      "second argument only\n");
4804            }
4805            imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
4806            switch (e->Iex.Binop.op) {
4807               case Iop_QSalN8x16: size = 8 | imm; break;
4808               case Iop_QSalN16x8: size = 16 | imm; break;
4809               case Iop_QSalN32x4: size = 32 | imm; break;
4810               case Iop_QSalN64x2: size = 64 | imm; break;
4811               default: vassert(0);
4812            }
4813            addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
4814                                          res, argL, size, True));
4815            return res;
4816         }
4817         case Iop_ShrN8x16:
4818         case Iop_ShrN16x8:
4819         case Iop_ShrN32x4:
4820         case Iop_ShrN64x2: {
4821            HReg res = newVRegV(env);
4822            HReg tmp = newVRegV(env);
4823            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4824            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4825            HReg argR2 = newVRegI(env);
4826            UInt size;
4827            switch (e->Iex.Binop.op) {
4828               case Iop_ShrN8x16: size = 0; break;
4829               case Iop_ShrN16x8: size = 1; break;
4830               case Iop_ShrN32x4: size = 2; break;
4831               case Iop_ShrN64x2: size = 3; break;
4832               default: vassert(0);
4833            }
4834            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4835            addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
4836                                          tmp, argR2, 0, True));
4837            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4838                                          res, argL, tmp, size, True));
4839            return res;
4840         }
4841         case Iop_ShlN8x16:
4842         case Iop_ShlN16x8:
4843         case Iop_ShlN32x4:
4844         case Iop_ShlN64x2: {
4845            HReg res = newVRegV(env);
4846            HReg tmp = newVRegV(env);
4847            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4848            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4849            UInt size;
4850            switch (e->Iex.Binop.op) {
4851               case Iop_ShlN8x16: size = 0; break;
4852               case Iop_ShlN16x8: size = 1; break;
4853               case Iop_ShlN32x4: size = 2; break;
4854               case Iop_ShlN64x2: size = 3; break;
4855               default: vassert(0);
4856            }
4857            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR, 0, True));
4858            addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4859                                          res, argL, tmp, size, True));
4860            return res;
4861         }
4862         case Iop_SarN8x16:
4863         case Iop_SarN16x8:
4864         case Iop_SarN32x4:
4865         case Iop_SarN64x2: {
4866            HReg res = newVRegV(env);
4867            HReg tmp = newVRegV(env);
4868            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4869            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4870            HReg argR2 = newVRegI(env);
4871            UInt size;
4872            switch (e->Iex.Binop.op) {
4873               case Iop_SarN8x16: size = 0; break;
4874               case Iop_SarN16x8: size = 1; break;
4875               case Iop_SarN32x4: size = 2; break;
4876               case Iop_SarN64x2: size = 3; break;
4877               default: vassert(0);
4878            }
4879            addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
4880            addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, True));
4881            addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
4882                                          res, argL, tmp, size, True));
4883            return res;
4884         }
4885         case Iop_CmpGT8Ux16:
4886         case Iop_CmpGT16Ux8:
4887         case Iop_CmpGT32Ux4: {
4888            HReg res = newVRegV(env);
4889            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4890            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4891            UInt size;
4892            switch (e->Iex.Binop.op) {
4893               case Iop_CmpGT8Ux16: size = 0; break;
4894               case Iop_CmpGT16Ux8: size = 1; break;
4895               case Iop_CmpGT32Ux4: size = 2; break;
4896               default: vassert(0);
4897            }
4898            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
4899                                           res, argL, argR, size, True));
4900            return res;
4901         }
4902         case Iop_CmpGT8Sx16:
4903         case Iop_CmpGT16Sx8:
4904         case Iop_CmpGT32Sx4: {
4905            HReg res = newVRegV(env);
4906            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4907            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4908            UInt size;
4909            switch (e->Iex.Binop.op) {
4910               case Iop_CmpGT8Sx16: size = 0; break;
4911               case Iop_CmpGT16Sx8: size = 1; break;
4912               case Iop_CmpGT32Sx4: size = 2; break;
4913               default: vassert(0);
4914            }
4915            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
4916                                           res, argL, argR, size, True));
4917            return res;
4918         }
4919         case Iop_CmpEQ8x16:
4920         case Iop_CmpEQ16x8:
4921         case Iop_CmpEQ32x4: {
4922            HReg res = newVRegV(env);
4923            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4924            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4925            UInt size;
4926            switch (e->Iex.Binop.op) {
4927               case Iop_CmpEQ8x16: size = 0; break;
4928               case Iop_CmpEQ16x8: size = 1; break;
4929               case Iop_CmpEQ32x4: size = 2; break;
4930               default: vassert(0);
4931            }
4932            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
4933                                           res, argL, argR, size, True));
4934            return res;
4935         }
4936         case Iop_Mul8x16:
4937         case Iop_Mul16x8:
4938         case Iop_Mul32x4: {
4939            HReg res = newVRegV(env);
4940            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4941            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4942            UInt size = 0;
4943            switch(e->Iex.Binop.op) {
4944               case Iop_Mul8x16: size = 0; break;
4945               case Iop_Mul16x8: size = 1; break;
4946               case Iop_Mul32x4: size = 2; break;
4947               default: vassert(0);
4948            }
4949            addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
4950                                           res, argL, argR, size, True));
4951            return res;
4952         }
4953         case Iop_Mul32Fx4: {
4954            HReg res = newVRegV(env);
4955            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4956            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4957            UInt size = 0;
4958            addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
4959                                           res, argL, argR, size, True));
4960            return res;
4961         }
4962         case Iop_Mull8Ux8:
4963         case Iop_Mull16Ux4:
4964         case Iop_Mull32Ux2: {
4965            HReg res = newVRegV(env);
4966            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4967            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
4968            UInt size = 0;
4969            switch(e->Iex.Binop.op) {
4970               case Iop_Mull8Ux8: size = 0; break;
4971               case Iop_Mull16Ux4: size = 1; break;
4972               case Iop_Mull32Ux2: size = 2; break;
4973               default: vassert(0);
4974            }
4975            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLU,
4976                                           res, argL, argR, size, True));
4977            return res;
4978         }
4979
4980         case Iop_Mull8Sx8:
4981         case Iop_Mull16Sx4:
4982         case Iop_Mull32Sx2: {
4983            HReg res = newVRegV(env);
4984            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4985            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
4986            UInt size = 0;
4987            switch(e->Iex.Binop.op) {
4988               case Iop_Mull8Sx8: size = 0; break;
4989               case Iop_Mull16Sx4: size = 1; break;
4990               case Iop_Mull32Sx2: size = 2; break;
4991               default: vassert(0);
4992            }
4993            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
4994                                           res, argL, argR, size, True));
4995            return res;
4996         }
4997
4998         case Iop_QDMulHi16Sx8:
4999         case Iop_QDMulHi32Sx4: {
5000            HReg res = newVRegV(env);
5001            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5002            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5003            UInt size = 0;
5004            switch(e->Iex.Binop.op) {
5005               case Iop_QDMulHi16Sx8: size = 1; break;
5006               case Iop_QDMulHi32Sx4: size = 2; break;
5007               default: vassert(0);
5008            }
5009            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5010                                           res, argL, argR, size, True));
5011            return res;
5012         }
5013
5014         case Iop_QRDMulHi16Sx8:
5015         case Iop_QRDMulHi32Sx4: {
5016            HReg res = newVRegV(env);
5017            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5018            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5019            UInt size = 0;
5020            switch(e->Iex.Binop.op) {
5021               case Iop_QRDMulHi16Sx8: size = 1; break;
5022               case Iop_QRDMulHi32Sx4: size = 2; break;
5023               default: vassert(0);
5024            }
5025            addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5026                                           res, argL, argR, size, True));
5027            return res;
5028         }
5029
5030         case Iop_QDMulLong16Sx4:
5031         case Iop_QDMulLong32Sx2: {
5032            HReg res = newVRegV(env);
5033            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5034            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5035            UInt size = 0;
5036            switch(e->Iex.Binop.op) {
5037               case Iop_QDMulLong16Sx4: size = 1; break;
5038               case Iop_QDMulLong32Sx2: size = 2; break;
5039               default: vassert(0);
5040            }
5041            addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5042                                           res, argL, argR, size, True));
5043            return res;
5044         }
5045         case Iop_PolynomialMul8x16: {
5046            HReg res = newVRegV(env);
5047            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5048            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5049            UInt size = 0;
5050            addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5051                                           res, argL, argR, size, True));
5052            return res;
5053         }
5054         case Iop_Max32Fx4: {
5055            HReg res = newVRegV(env);
5056            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5057            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5058            addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5059                                           res, argL, argR, 2, True));
5060            return res;
5061         }
5062         case Iop_Min32Fx4: {
5063            HReg res = newVRegV(env);
5064            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5065            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5066            addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5067                                           res, argL, argR, 2, True));
5068            return res;
5069         }
5070         case Iop_PwMax32Fx4: {
5071            HReg res = newVRegV(env);
5072            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5073            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5074            addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5075                                           res, argL, argR, 2, True));
5076            return res;
5077         }
5078         case Iop_PwMin32Fx4: {
5079            HReg res = newVRegV(env);
5080            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5081            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5082            addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5083                                           res, argL, argR, 2, True));
5084            return res;
5085         }
5086         case Iop_CmpGT32Fx4: {
5087            HReg res = newVRegV(env);
5088            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5089            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5090            addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5091                                           res, argL, argR, 2, True));
5092            return res;
5093         }
5094         case Iop_CmpGE32Fx4: {
5095            HReg res = newVRegV(env);
5096            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5097            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5098            addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5099                                           res, argL, argR, 2, True));
5100            return res;
5101         }
5102         case Iop_CmpEQ32Fx4: {
5103            HReg res = newVRegV(env);
5104            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5105            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5106            addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5107                                           res, argL, argR, 2, True));
5108            return res;
5109         }
5110
5111         case Iop_PolynomialMull8x8: {
5112            HReg res = newVRegV(env);
5113            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5114            HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5115            UInt size = 0;
5116            addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5117                                           res, argL, argR, size, True));
5118            return res;
5119         }
5120         case Iop_F32ToFixed32Ux4_RZ:
5121         case Iop_F32ToFixed32Sx4_RZ:
5122         case Iop_Fixed32UToF32x4_RN:
5123         case Iop_Fixed32SToF32x4_RN: {
5124            HReg res = newVRegV(env);
5125            HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5126            ARMNeonUnOp op;
5127            UInt imm6;
5128            if (e->Iex.Binop.arg2->tag != Iex_Const ||
5129               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5130                  vpanic("ARM supports FP <-> Fixed conversion with constant "
5131                         "second argument less than 33 only\n");
5132            }
5133            imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5134            vassert(imm6 <= 32 && imm6 > 0);
5135            imm6 = 64 - imm6;
5136            switch(e->Iex.Binop.op) {
5137               case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5138               case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5139               case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5140               case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5141               default: vassert(0);
5142            }
5143            addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5144            return res;
5145         }
5146         /*
5147         FIXME remove if not used
5148         case Iop_VDup8x16:
5149         case Iop_VDup16x8:
5150         case Iop_VDup32x4: {
5151            HReg res = newVRegV(env);
5152            HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5153            UInt imm4;
5154            UInt index;
5155            if (e->Iex.Binop.arg2->tag != Iex_Const ||
5156               typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5157                  vpanic("ARM supports Iop_VDup with constant "
5158                         "second argument less than 16 only\n");
5159            }
5160            index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5161            switch(e->Iex.Binop.op) {
5162               case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5163               case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5164               case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5165               default: vassert(0);
5166            }
5167            if (imm4 >= 16) {
5168               vpanic("ARM supports Iop_VDup with constant "
5169                      "second argument less than 16 only\n");
5170            }
5171            addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5172                                          res, argL, imm4, True));
5173            return res;
5174         }
5175         */
5176         case Iop_PwAdd8x16:
5177         case Iop_PwAdd16x8:
5178         case Iop_PwAdd32x4: {
5179            HReg res = newVRegV(env);
5180            HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5181            HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5182            UInt size = 0;
5183            switch(e->Iex.Binop.op) {
5184               case Iop_PwAdd8x16: size = 0; break;
5185               case Iop_PwAdd16x8: size = 1; break;
5186               case Iop_PwAdd32x4: size = 2; break;
5187               default: vassert(0);
5188            }
5189            addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5190                                           res, argL, argR, size, True));
5191            return res;
5192         }
5193         /* ... */
5194         default:
5195            break;
5196      }
5197   }
5198
5199   if (e->tag == Iex_Triop) {
5200      switch (e->Iex.Triop.op) {
5201         case Iop_ExtractV128: {
5202            HReg res = newVRegV(env);
5203            HReg argL = iselNeonExpr(env, e->Iex.Triop.arg1);
5204            HReg argR = iselNeonExpr(env, e->Iex.Triop.arg2);
5205            UInt imm4;
5206            if (e->Iex.Triop.arg3->tag != Iex_Const ||
5207                typeOfIRExpr(env->type_env, e->Iex.Triop.arg3) != Ity_I8) {
5208               vpanic("ARM target supports Iop_ExtractV128 with constant "
5209                      "third argument less than 16 only\n");
5210            }
5211            imm4 = e->Iex.Triop.arg3->Iex.Const.con->Ico.U8;
5212            if (imm4 >= 16) {
5213               vpanic("ARM target supports Iop_ExtractV128 with constant "
5214                      "third argument less than 16 only\n");
5215            }
5216            addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5217                                           res, argL, argR, imm4, True));
5218            return res;
5219         }
5220         default:
5221            break;
5222      }
5223   }
5224
5225   if (e->tag == Iex_Mux0X) {
5226      HReg r8;
5227      HReg rX  = iselNeonExpr(env, e->Iex.Mux0X.exprX);
5228      HReg r0  = iselNeonExpr(env, e->Iex.Mux0X.expr0);
5229      HReg dst = newVRegV(env);
5230      addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, rX, 4, True));
5231      r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5232      addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5233                                      ARMRI84_I84(0xFF,0)));
5234      addInstr(env, ARMInstr_NCMovQ(ARMcc_EQ, dst, r0));
5235      return dst;
5236   }
5237
5238  neon_expr_bad:
5239   ppIRExpr(e);
5240   vpanic("iselNeonExpr_wrk");
5241}
5242
5243/*---------------------------------------------------------*/
5244/*--- ISEL: Floating point expressions (64 bit)         ---*/
5245/*---------------------------------------------------------*/
5246
5247/* Compute a 64-bit floating point value into a register, the identity
5248   of which is returned.  As with iselIntExpr_R, the reg may be either
5249   real or virtual; in any case it must not be changed by subsequent
5250   code emitted by the caller.  */
5251
5252static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5253{
5254   HReg r = iselDblExpr_wrk( env, e );
5255#  if 0
5256   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5257#  endif
5258   vassert(hregClass(r) == HRcFlt64);
5259   vassert(hregIsVirtual(r));
5260   return r;
5261}
5262
5263/* DO NOT CALL THIS DIRECTLY */
5264static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5265{
5266   IRType ty = typeOfIRExpr(env->type_env,e);
5267   vassert(e);
5268   vassert(ty == Ity_F64);
5269
5270   if (e->tag == Iex_RdTmp) {
5271      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5272   }
5273
5274   if (e->tag == Iex_Const) {
5275      /* Just handle the zero case. */
5276      IRConst* con = e->Iex.Const.con;
5277      if (con->tag == Ico_F64i && con->Ico.F64i == 0ULL) {
5278         HReg z32 = newVRegI(env);
5279         HReg dst = newVRegD(env);
5280         addInstr(env, ARMInstr_Imm32(z32, 0));
5281         addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, z32, z32));
5282         return dst;
5283      }
5284   }
5285
5286   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5287      ARMAModeV* am;
5288      HReg res = newVRegD(env);
5289      vassert(e->Iex.Load.ty == Ity_F64);
5290      am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5291      addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5292      return res;
5293   }
5294
5295   if (e->tag == Iex_Get) {
5296      // XXX This won't work if offset > 1020 or is not 0 % 4.
5297      // In which case we'll have to generate more longwinded code.
5298      ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5299      HReg       res = newVRegD(env);
5300      addInstr(env, ARMInstr_VLdStD(True/*isLoad*/, res, am));
5301      return res;
5302   }
5303
5304   if (e->tag == Iex_Unop) {
5305      switch (e->Iex.Unop.op) {
5306         case Iop_ReinterpI64asF64: {
5307            if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5308               return iselNeon64Expr(env, e->Iex.Unop.arg);
5309            } else {
5310               HReg srcHi, srcLo;
5311               HReg dst = newVRegD(env);
5312               iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
5313               addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
5314               return dst;
5315            }
5316         }
5317         case Iop_NegF64: {
5318            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5319            HReg dst = newVRegD(env);
5320            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_NEG, dst, src));
5321            return dst;
5322         }
5323         case Iop_AbsF64: {
5324            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
5325            HReg dst = newVRegD(env);
5326            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_ABS, dst, src));
5327            return dst;
5328         }
5329         case Iop_F32toF64: {
5330            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5331            HReg dst = newVRegD(env);
5332            addInstr(env, ARMInstr_VCvtSD(True/*sToD*/, dst, src));
5333            return dst;
5334         }
5335         case Iop_I32UtoF64:
5336         case Iop_I32StoF64: {
5337            HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
5338            HReg f32   = newVRegF(env);
5339            HReg dst   = newVRegD(env);
5340            Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
5341            /* VMOV f32, src */
5342            addInstr(env, ARMInstr_VXferS(True/*toS*/, f32, src));
5343            /* FSITOD dst, f32 */
5344            addInstr(env, ARMInstr_VCvtID(True/*iToD*/, syned,
5345                                          dst, f32));
5346            return dst;
5347         }
5348         default:
5349            break;
5350      }
5351   }
5352
5353   if (e->tag == Iex_Binop) {
5354      switch (e->Iex.Binop.op) {
5355         case Iop_SqrtF64: {
5356            /* first arg is rounding mode; we ignore it. */
5357            HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
5358            HReg dst = newVRegD(env);
5359            addInstr(env, ARMInstr_VUnaryD(ARMvfpu_SQRT, dst, src));
5360            return dst;
5361         }
5362         default:
5363            break;
5364      }
5365   }
5366
5367   if (e->tag == Iex_Triop) {
5368      switch (e->Iex.Triop.op) {
5369         case Iop_DivF64:
5370         case Iop_MulF64:
5371         case Iop_AddF64:
5372         case Iop_SubF64: {
5373            ARMVfpOp op = 0; /*INVALID*/
5374            HReg argL = iselDblExpr(env, e->Iex.Triop.arg2);
5375            HReg argR = iselDblExpr(env, e->Iex.Triop.arg3);
5376            HReg dst  = newVRegD(env);
5377            switch (e->Iex.Triop.op) {
5378               case Iop_DivF64: op = ARMvfp_DIV; break;
5379               case Iop_MulF64: op = ARMvfp_MUL; break;
5380               case Iop_AddF64: op = ARMvfp_ADD; break;
5381               case Iop_SubF64: op = ARMvfp_SUB; break;
5382               default: vassert(0);
5383            }
5384            addInstr(env, ARMInstr_VAluD(op, dst, argL, argR));
5385            return dst;
5386         }
5387         default:
5388            break;
5389      }
5390   }
5391
5392   if (e->tag == Iex_Mux0X) {
5393      if (ty == Ity_F64
5394          && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5395         HReg r8;
5396         HReg rX  = iselDblExpr(env, e->Iex.Mux0X.exprX);
5397         HReg r0  = iselDblExpr(env, e->Iex.Mux0X.expr0);
5398         HReg dst = newVRegD(env);
5399         addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, rX));
5400         r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5401         addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5402                                         ARMRI84_I84(0xFF,0)));
5403         addInstr(env, ARMInstr_VCMovD(ARMcc_EQ, dst, r0));
5404         return dst;
5405      }
5406   }
5407
5408   ppIRExpr(e);
5409   vpanic("iselDblExpr_wrk");
5410}
5411
5412
5413/*---------------------------------------------------------*/
5414/*--- ISEL: Floating point expressions (32 bit)         ---*/
5415/*---------------------------------------------------------*/
5416
5417/* Compute a 64-bit floating point value into a register, the identity
5418   of which is returned.  As with iselIntExpr_R, the reg may be either
5419   real or virtual; in any case it must not be changed by subsequent
5420   code emitted by the caller.  */
5421
5422static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
5423{
5424   HReg r = iselFltExpr_wrk( env, e );
5425#  if 0
5426   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5427#  endif
5428   vassert(hregClass(r) == HRcFlt32);
5429   vassert(hregIsVirtual(r));
5430   return r;
5431}
5432
5433/* DO NOT CALL THIS DIRECTLY */
5434static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
5435{
5436   IRType ty = typeOfIRExpr(env->type_env,e);
5437   vassert(e);
5438   vassert(ty == Ity_F32);
5439
5440   if (e->tag == Iex_RdTmp) {
5441      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5442   }
5443
5444   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
5445      ARMAModeV* am;
5446      HReg res = newVRegF(env);
5447      vassert(e->Iex.Load.ty == Ity_F32);
5448      am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
5449      addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5450      return res;
5451   }
5452
5453   if (e->tag == Iex_Get) {
5454      // XXX This won't work if offset > 1020 or is not 0 % 4.
5455      // In which case we'll have to generate more longwinded code.
5456      ARMAModeV* am  = mkARMAModeV(hregARM_R8(), e->Iex.Get.offset);
5457      HReg       res = newVRegF(env);
5458      addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
5459      return res;
5460   }
5461
5462   if (e->tag == Iex_Unop) {
5463      switch (e->Iex.Unop.op) {
5464         case Iop_ReinterpI32asF32: {
5465            HReg dst = newVRegF(env);
5466            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
5467            addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
5468            return dst;
5469         }
5470         case Iop_NegF32: {
5471            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5472            HReg dst = newVRegF(env);
5473            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_NEG, dst, src));
5474            return dst;
5475         }
5476         case Iop_AbsF32: {
5477            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
5478            HReg dst = newVRegF(env);
5479            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_ABS, dst, src));
5480            return dst;
5481         }
5482         default:
5483            break;
5484      }
5485   }
5486
5487   if (e->tag == Iex_Binop) {
5488      switch (e->Iex.Binop.op) {
5489         case Iop_SqrtF32: {
5490            /* first arg is rounding mode; we ignore it. */
5491            HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
5492            HReg dst = newVRegF(env);
5493            addInstr(env, ARMInstr_VUnaryS(ARMvfpu_SQRT, dst, src));
5494            return dst;
5495         }
5496         case Iop_F64toF32: {
5497            HReg valD = iselDblExpr(env, e->Iex.Binop.arg2);
5498            set_VFP_rounding_mode(env, e->Iex.Binop.arg1);
5499            HReg valS = newVRegF(env);
5500            /* FCVTSD valS, valD */
5501            addInstr(env, ARMInstr_VCvtSD(False/*!sToD*/, valS, valD));
5502            set_VFP_rounding_default(env);
5503            return valS;
5504         }
5505         default:
5506            break;
5507      }
5508   }
5509
5510   if (e->tag == Iex_Triop) {
5511      switch (e->Iex.Triop.op) {
5512         case Iop_DivF32:
5513         case Iop_MulF32:
5514         case Iop_AddF32:
5515         case Iop_SubF32: {
5516            ARMVfpOp op = 0; /*INVALID*/
5517            HReg argL = iselFltExpr(env, e->Iex.Triop.arg2);
5518            HReg argR = iselFltExpr(env, e->Iex.Triop.arg3);
5519            HReg dst  = newVRegF(env);
5520            switch (e->Iex.Triop.op) {
5521               case Iop_DivF32: op = ARMvfp_DIV; break;
5522               case Iop_MulF32: op = ARMvfp_MUL; break;
5523               case Iop_AddF32: op = ARMvfp_ADD; break;
5524               case Iop_SubF32: op = ARMvfp_SUB; break;
5525               default: vassert(0);
5526            }
5527            addInstr(env, ARMInstr_VAluS(op, dst, argL, argR));
5528            return dst;
5529         }
5530         default:
5531            break;
5532      }
5533   }
5534
5535   if (e->tag == Iex_Mux0X) {
5536      if (ty == Ity_F32
5537          && typeOfIRExpr(env->type_env,e->Iex.Mux0X.cond) == Ity_I8) {
5538         HReg r8;
5539         HReg rX  = iselFltExpr(env, e->Iex.Mux0X.exprX);
5540         HReg r0  = iselFltExpr(env, e->Iex.Mux0X.expr0);
5541         HReg dst = newVRegF(env);
5542         addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, rX));
5543         r8 = iselIntExpr_R(env, e->Iex.Mux0X.cond);
5544         addInstr(env, ARMInstr_CmpOrTst(False/*!isCmp*/, r8,
5545                                         ARMRI84_I84(0xFF,0)));
5546         addInstr(env, ARMInstr_VCMovS(ARMcc_EQ, dst, r0));
5547         return dst;
5548      }
5549   }
5550
5551   ppIRExpr(e);
5552   vpanic("iselFltExpr_wrk");
5553}
5554
5555
5556/*---------------------------------------------------------*/
5557/*--- ISEL: Statements                                  ---*/
5558/*---------------------------------------------------------*/
5559
5560static void iselStmt ( ISelEnv* env, IRStmt* stmt )
5561{
5562   if (vex_traceflags & VEX_TRACE_VCODE) {
5563      vex_printf("\n-- ");
5564      ppIRStmt(stmt);
5565      vex_printf("\n");
5566   }
5567   switch (stmt->tag) {
5568
5569   /* --------- STORE --------- */
5570   /* little-endian write to memory */
5571   case Ist_Store: {
5572      IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
5573      IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
5574      IREndness end  = stmt->Ist.Store.end;
5575
5576      if (tya != Ity_I32 || end != Iend_LE)
5577         goto stmt_fail;
5578
5579      if (tyd == Ity_I32) {
5580         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5581         ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5582         addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5583         return;
5584      }
5585      if (tyd == Ity_I16) {
5586         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5587         ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
5588         addInstr(env, ARMInstr_LdSt16(False/*!isLoad*/,
5589                                       False/*!isSignedLoad*/, rD, am));
5590         return;
5591      }
5592      if (tyd == Ity_I8) {
5593         HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
5594         ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
5595         addInstr(env, ARMInstr_LdSt8U(False/*!isLoad*/, rD, am));
5596         return;
5597      }
5598      if (tyd == Ity_I64) {
5599         if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5600            HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
5601            ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5602            addInstr(env, ARMInstr_NLdStD(False, dD, am));
5603         } else {
5604            HReg rDhi, rDlo, rA;
5605            iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
5606            rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
5607            addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDhi,
5608                                          ARMAMode1_RI(rA,4)));
5609            addInstr(env, ARMInstr_LdSt32(False/*!load*/, rDlo,
5610                                          ARMAMode1_RI(rA,0)));
5611         }
5612         return;
5613      }
5614      if (tyd == Ity_F64) {
5615         HReg       dD = iselDblExpr(env, stmt->Ist.Store.data);
5616         ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5617         addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
5618         return;
5619      }
5620      if (tyd == Ity_F32) {
5621         HReg       fD = iselFltExpr(env, stmt->Ist.Store.data);
5622         ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
5623         addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
5624         return;
5625      }
5626      if (tyd == Ity_V128) {
5627         HReg       qD = iselNeonExpr(env, stmt->Ist.Store.data);
5628         ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
5629         addInstr(env, ARMInstr_NLdStQ(False, qD, am));
5630         return;
5631      }
5632
5633      break;
5634   }
5635
5636   /* --------- PUT --------- */
5637   /* write guest state, fixed offset */
5638   case Ist_Put: {
5639       IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
5640
5641       if (tyd == Ity_I32) {
5642           HReg       rD = iselIntExpr_R(env, stmt->Ist.Put.data);
5643           ARMAMode1* am = ARMAMode1_RI(hregARM_R8(), stmt->Ist.Put.offset);
5644           addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rD, am));
5645           return;
5646       }
5647       if (tyd == Ity_I64) {
5648          if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5649             HReg addr = newVRegI(env);
5650             HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
5651             addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5652                                                stmt->Ist.Put.offset));
5653             addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
5654          } else {
5655             HReg rDhi, rDlo;
5656             ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
5657                                           stmt->Ist.Put.offset + 0);
5658             ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
5659                                           stmt->Ist.Put.offset + 4);
5660             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
5661             addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDhi, am4));
5662             addInstr(env, ARMInstr_LdSt32(False/*!isLoad*/, rDlo, am0));
5663          }
5664          return;
5665       }
5666       if (tyd == Ity_F64) {
5667          // XXX This won't work if offset > 1020 or is not 0 % 4.
5668          // In which case we'll have to generate more longwinded code.
5669          ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5670          HReg       rD = iselDblExpr(env, stmt->Ist.Put.data);
5671          addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
5672          return;
5673       }
5674       if (tyd == Ity_F32) {
5675          // XXX This won't work if offset > 1020 or is not 0 % 4.
5676          // In which case we'll have to generate more longwinded code.
5677          ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
5678          HReg       rD = iselFltExpr(env, stmt->Ist.Put.data);
5679          addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
5680          return;
5681       }
5682       if (tyd == Ity_V128) {
5683          HReg addr = newVRegI(env);
5684          HReg qD = iselNeonExpr(env, stmt->Ist.Put.data);
5685          addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
5686                                       stmt->Ist.Put.offset));
5687          addInstr(env, ARMInstr_NLdStQ(False, qD, mkARMAModeN_R(addr)));
5688          return;
5689       }
5690       break;
5691   }
5692
5693//zz   /* --------- Indexed PUT --------- */
5694//zz   /* write guest state, run-time offset */
5695//zz   case Ist_PutI: {
5696//zz      ARMAMode2* am2
5697//zz           = genGuestArrayOffset(
5698//zz               env, stmt->Ist.PutI.descr,
5699//zz               stmt->Ist.PutI.ix, stmt->Ist.PutI.bias );
5700//zz
5701//zz       IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.PutI.data);
5702//zz
5703//zz       if (tyd == Ity_I8) {
5704//zz           HReg reg = iselIntExpr_R(env, stmt->Ist.PutI.data);
5705//zz           addInstr(env, ARMInstr_StoreB(reg, am2));
5706//zz           return;
5707//zz       }
5708//zz// CAB: Ity_I32, Ity_I16 ?
5709//zz       break;
5710//zz   }
5711
5712   /* --------- TMP --------- */
5713   /* assign value to temporary */
5714   case Ist_WrTmp: {
5715      IRTemp tmp = stmt->Ist.WrTmp.tmp;
5716      IRType ty = typeOfIRTemp(env->type_env, tmp);
5717
5718      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
5719         ARMRI84* ri84 = iselIntExpr_RI84(NULL, False,
5720                                          env, stmt->Ist.WrTmp.data);
5721         HReg     dst  = lookupIRTemp(env, tmp);
5722         addInstr(env, ARMInstr_Mov(dst,ri84));
5723         return;
5724      }
5725      if (ty == Ity_I1) {
5726         HReg        dst  = lookupIRTemp(env, tmp);
5727         ARMCondCode cond = iselCondCode(env, stmt->Ist.WrTmp.data);
5728         addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
5729         addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
5730         return;
5731      }
5732      if (ty == Ity_I64) {
5733         if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5734            HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
5735            HReg dst = lookupIRTemp(env, tmp);
5736            addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
5737         } else {
5738            HReg rHi, rLo, dstHi, dstLo;
5739            iselInt64Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
5740            lookupIRTemp64( &dstHi, &dstLo, env, tmp);
5741            addInstr(env, mk_iMOVds_RR(dstHi, rHi) );
5742            addInstr(env, mk_iMOVds_RR(dstLo, rLo) );
5743         }
5744         return;
5745      }
5746      if (ty == Ity_F64) {
5747         HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
5748         HReg dst = lookupIRTemp(env, tmp);
5749         addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, src));
5750         return;
5751      }
5752      if (ty == Ity_F32) {
5753         HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
5754         HReg dst = lookupIRTemp(env, tmp);
5755         addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, src));
5756         return;
5757      }
5758      if (ty == Ity_V128) {
5759         HReg src = iselNeonExpr(env, stmt->Ist.WrTmp.data);
5760         HReg dst = lookupIRTemp(env, tmp);
5761         addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, True));
5762         return;
5763      }
5764      break;
5765   }
5766
5767   /* --------- Call to DIRTY helper --------- */
5768   /* call complex ("dirty") helper function */
5769   case Ist_Dirty: {
5770      IRType   retty;
5771      IRDirty* d = stmt->Ist.Dirty.details;
5772      Bool     passBBP = False;
5773
5774      if (d->nFxState == 0)
5775         vassert(!d->needsBBP);
5776
5777      passBBP = toBool(d->nFxState > 0 && d->needsBBP);
5778
5779      /* Marshal args, do the call, clear stack. */
5780      Bool ok = doHelperCall( env, passBBP, d->guard, d->cee, d->args );
5781      if (!ok)
5782         break; /* will go to stmt_fail: */
5783
5784      /* Now figure out what to do with the returned value, if any. */
5785      if (d->tmp == IRTemp_INVALID)
5786         /* No return value.  Nothing to do. */
5787         return;
5788
5789      retty = typeOfIRTemp(env->type_env, d->tmp);
5790
5791      if (retty == Ity_I64) {
5792         if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5793            HReg tmp = lookupIRTemp(env, d->tmp);
5794            addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
5795                                                     hregARM_R0()));
5796         } else {
5797            HReg dstHi, dstLo;
5798            /* The returned value is in r1:r0.  Park it in the
5799               register-pair associated with tmp. */
5800            lookupIRTemp64( &dstHi, &dstLo, env, d->tmp);
5801            addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R1()) );
5802            addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R0()) );
5803         }
5804         return;
5805      }
5806      if (retty == Ity_I32 || retty == Ity_I16 || retty == Ity_I8) {
5807         /* The returned value is in r0.  Park it in the register
5808            associated with tmp. */
5809         HReg dst = lookupIRTemp(env, d->tmp);
5810         addInstr(env, mk_iMOVds_RR(dst, hregARM_R0()) );
5811         return;
5812      }
5813
5814      break;
5815   }
5816
5817   /* --------- Load Linked and Store Conditional --------- */
5818   case Ist_LLSC: {
5819      if (stmt->Ist.LLSC.storedata == NULL) {
5820         /* LL */
5821         IRTemp res = stmt->Ist.LLSC.result;
5822         IRType ty  = typeOfIRTemp(env->type_env, res);
5823         if (ty == Ity_I32 || ty == Ity_I8) {
5824            Int  szB   = 0;
5825            HReg r_dst = lookupIRTemp(env, res);
5826            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5827            switch (ty) {
5828               case Ity_I8:  szB = 1; break;
5829               case Ity_I32: szB = 4; break;
5830               default:      vassert(0);
5831            }
5832            addInstr(env, mk_iMOVds_RR(hregARM_R0(), raddr));
5833            addInstr(env, ARMInstr_LdrEX(szB));
5834            addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
5835            return;
5836         } else if (ty == Ity_I64) {
5837            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5838            addInstr(env, mk_iMOVds_RR(hregARM_R0(), raddr));
5839            addInstr(env, ARMInstr_LdrEX(8 /* 64-bit */));
5840            if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
5841                HReg tmp = lookupIRTemp(env, res);
5842                addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R2(),
5843                        hregARM_R3()));
5844            } else {
5845                HReg dstHi, dstLo;
5846                /* The returned value is in r1:r0.  Park it in the
5847                   register-pair associated with tmp. */
5848                lookupIRTemp64( &dstHi, &dstLo, env, res);
5849                addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R2()) );
5850                addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R3()) );
5851            }
5852            return;
5853         }
5854         /* else fall thru; is unhandled */
5855      } else {
5856         /* SC */
5857         IRTemp res = stmt->Ist.LLSC.result;
5858         IRType ty  = typeOfIRTemp(env->type_env, res);
5859         IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
5860         vassert(ty == Ity_I1);
5861         if (tyd == Ity_I32 || tyd == Ity_I8) {
5862            Int  szB     = 0;
5863            HReg r_res   = lookupIRTemp(env, res);
5864            HReg rD      = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
5865            HReg rA      = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5866            ARMRI84* one = ARMRI84_I84(1,0);
5867            switch (tyd) {
5868               case Ity_I8:  szB = 1; break;
5869               case Ity_I32: szB = 4; break;
5870               default:      vassert(0);
5871            }
5872            addInstr(env, mk_iMOVds_RR(hregARM_R0(), rA));
5873            addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
5874            addInstr(env, ARMInstr_StrEX(szB));
5875            /* now r1 is 1 if failed, 0 if success.  Change to IR
5876               conventions (0 is fail, 1 is success).  Also transfer
5877               result to r_res. */
5878            addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R1(), one));
5879            /* And be conservative -- mask off all but the lowest bit */
5880            addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
5881            return;
5882         } else if (tyd == Ity_I64) {
5883             HReg r_res   = lookupIRTemp(env, res);
5884             HReg rA      = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
5885             ARMRI84* one = ARMRI84_I84(1,0);
5886             HReg rDHi, rDLo;
5887             iselInt64Expr(&rDHi, &rDLo, env, stmt->Ist.LLSC.storedata);
5888             addInstr(env, mk_iMOVds_RR(hregARM_R0(), rA));
5889             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDHi));
5890             addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDLo));
5891             addInstr(env, ARMInstr_StrEX(8 /* 64-bit */));
5892             /* now r1 is 1 if failed, 0 if success.  Change to IR
5893                conventions (0 is fail, 1 is success).  Also transfer
5894                result to r_res. */
5895             addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R1(), one));
5896             /* And be conservative -- mask off all but the lowest bit */
5897             addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
5898             return;
5899         }
5900         /* else fall thru; is unhandled */
5901      }
5902      break;
5903   }
5904
5905   /* --------- MEM FENCE --------- */
5906   case Ist_MBE:
5907      switch (stmt->Ist.MBE.event) {
5908         case Imbe_Fence:
5909            addInstr(env,ARMInstr_MFence());
5910            return;
5911         default:
5912            break;
5913      }
5914      break;
5915
5916   /* --------- INSTR MARK --------- */
5917   /* Doesn't generate any executable code ... */
5918   case Ist_IMark:
5919       return;
5920
5921   /* --------- NO-OP --------- */
5922   case Ist_NoOp:
5923       return;
5924
5925   /* --------- EXIT --------- */
5926   case Ist_Exit: {
5927      HReg        gnext;
5928      ARMCondCode cc;
5929      if (stmt->Ist.Exit.dst->tag != Ico_U32)
5930         vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
5931      gnext = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
5932      cc    = iselCondCode(env, stmt->Ist.Exit.guard);
5933      addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
5934      addInstr(env, ARMInstr_Goto(stmt->Ist.Exit.jk, cc, gnext));
5935      return;
5936   }
5937
5938   default: break;
5939   }
5940  stmt_fail:
5941   ppIRStmt(stmt);
5942   vpanic("iselStmt");
5943}
5944
5945
5946/*---------------------------------------------------------*/
5947/*--- ISEL: Basic block terminators (Nexts)             ---*/
5948/*---------------------------------------------------------*/
5949
5950static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
5951{
5952   HReg rDst;
5953   if (vex_traceflags & VEX_TRACE_VCODE) {
5954      vex_printf("\n-- goto {");
5955      ppIRJumpKind(jk);
5956      vex_printf("} ");
5957      ppIRExpr(next);
5958      vex_printf("\n");
5959   }
5960   rDst = iselIntExpr_R(env, next);
5961   addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
5962   addInstr(env, ARMInstr_Goto(jk, ARMcc_AL, rDst));
5963}
5964
5965
5966/*---------------------------------------------------------*/
5967/*--- Insn selector top-level                           ---*/
5968/*---------------------------------------------------------*/
5969
5970/* Translate an entire SB to arm code. */
5971
5972HInstrArray* iselSB_ARM ( IRSB* bb, VexArch      arch_host,
5973                                    VexArchInfo* archinfo_host,
5974                                    VexAbiInfo*  vbi/*UNUSED*/ )
5975{
5976   Int      i, j;
5977   HReg     hreg, hregHI;
5978   ISelEnv* env;
5979   UInt     hwcaps_host = archinfo_host->hwcaps;
5980   Bool     neon = False;
5981   static UInt counter = 0;
5982
5983   /* sanity ... */
5984   vassert(arch_host == VexArchARM);
5985
5986   /* hwcaps should not change from one ISEL call to another. */
5987   arm_hwcaps = hwcaps_host;
5988
5989   /* Make up an initial environment to use. */
5990   env = LibVEX_Alloc(sizeof(ISelEnv));
5991   env->vreg_ctr = 0;
5992
5993   /* Set up output code array. */
5994   env->code = newHInstrArray();
5995
5996   /* Copy BB's type env. */
5997   env->type_env = bb->tyenv;
5998
5999   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
6000      change as we go along. */
6001   env->n_vregmap = bb->tyenv->types_used;
6002   env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6003   env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
6004
6005   /* For each IR temporary, allocate a suitably-kinded virtual
6006      register. */
6007   j = 0;
6008   for (i = 0; i < env->n_vregmap; i++) {
6009      hregHI = hreg = INVALID_HREG;
6010      switch (bb->tyenv->types[i]) {
6011         case Ity_I1:
6012         case Ity_I8:
6013         case Ity_I16:
6014         case Ity_I32:  hreg   = mkHReg(j++, HRcInt32, True); break;
6015         case Ity_I64:
6016            if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
6017               hreg = mkHReg(j++, HRcFlt64, True);
6018               neon = True;
6019            } else {
6020               hregHI = mkHReg(j++, HRcInt32, True);
6021               hreg   = mkHReg(j++, HRcInt32, True);
6022            }
6023            break;
6024         case Ity_F32:  hreg   = mkHReg(j++, HRcFlt32, True); break;
6025         case Ity_F64:  hreg   = mkHReg(j++, HRcFlt64, True); break;
6026         case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True);
6027                        neon   = True; break;
6028         default: ppIRType(bb->tyenv->types[i]);
6029                  vpanic("iselBB: IRTemp type");
6030      }
6031      env->vregmap[i]   = hreg;
6032      env->vregmapHI[i] = hregHI;
6033   }
6034   env->vreg_ctr = j;
6035
6036   /* Keep a copy of the link reg, since any call to a helper function
6037      will trash it, and we can't get back to the dispatcher once that
6038      happens. */
6039   env->savedLR = newVRegI(env);
6040   addInstr(env, mk_iMOVds_RR(env->savedLR, hregARM_R14()));
6041
6042   /* Ok, finally we can iterate over the statements. */
6043   for (i = 0; i < bb->stmts_used; i++)
6044      iselStmt(env,bb->stmts[i]);
6045
6046   iselNext(env,bb->next,bb->jumpkind);
6047
6048   /* record the number of vregs we used. */
6049   env->code->n_vregs = env->vreg_ctr;
6050   counter++;
6051   return env->code;
6052}
6053
6054
6055/*---------------------------------------------------------------*/
6056/*--- end                                     host_arm_isel.c ---*/
6057/*---------------------------------------------------------------*/
6058