1
2/*---------------------------------------------------------------*/
3/*--- begin                                 host_arm64_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2013-2013 OpenWorks
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "libvex_basictypes.h"
32#include "libvex_ir.h"
33#include "libvex.h"
34#include "ir_match.h"
35
36#include "main_util.h"
37#include "main_globals.h"
38#include "host_generic_regs.h"
39#include "host_generic_simd64.h"  // for 32-bit SIMD helpers
40#include "host_arm64_defs.h"
41
42
43//ZZ /*---------------------------------------------------------*/
44//ZZ /*--- ARMvfp control word stuff                         ---*/
45//ZZ /*---------------------------------------------------------*/
46//ZZ
47//ZZ /* Vex-generated code expects to run with the FPU set as follows: all
48//ZZ    exceptions masked, round-to-nearest, non-vector mode, with the NZCV
49//ZZ    flags cleared, and FZ (flush to zero) disabled.  Curiously enough,
50//ZZ    this corresponds to a FPSCR value of zero.
51//ZZ
52//ZZ    fpscr should therefore be zero on entry to Vex-generated code, and
53//ZZ    should be unchanged at exit.  (Or at least the bottom 28 bits
54//ZZ    should be zero).
55//ZZ */
56//ZZ
57//ZZ #define DEFAULT_FPSCR 0
58
59
60/*---------------------------------------------------------*/
61/*--- ISelEnv                                           ---*/
62/*---------------------------------------------------------*/
63
64/* This carries around:
65
66   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
67     might encounter.  This is computed before insn selection starts,
68     and does not change.
69
70   - A mapping from IRTemp to HReg.  This tells the insn selector
71     which virtual register is associated with each IRTemp temporary.
72     This is computed before insn selection starts, and does not
73     change.  We expect this mapping to map precisely the same set of
74     IRTemps as the type mapping does.
75
76     |vregmap|   holds the primary register for the IRTemp.
77     |vregmapHI| is only used for 128-bit integer-typed
78                 IRTemps.  It holds the identity of a second
79                 64-bit virtual HReg, which holds the high half
80                 of the value.
81
82   - The code array, that is, the insns selected so far.
83
84   - A counter, for generating new virtual registers.
85
86   - The host hardware capabilities word.  This is set at the start
87     and does not change.
88
89   - A Bool for indicating whether we may generate chain-me
90     instructions for control flow transfers, or whether we must use
91     XAssisted.
92
93   - The maximum guest address of any guest insn in this block.
94     Actually, the address of the highest-addressed byte from any insn
95     in this block.  Is set at the start and does not change.  This is
96     used for detecting jumps which are definitely forward-edges from
97     this block, and therefore can be made (chained) to the fast entry
98     point of the destination, thereby avoiding the destination's
99     event check.
100
101    - An IRExpr*, which may be NULL, holding the IR expression (an
102      IRRoundingMode-encoded value) to which the FPU's rounding mode
103      was most recently set.  Setting to NULL is always safe.  Used to
104      avoid redundant settings of the FPU's rounding mode, as
105      described in set_FPCR_rounding_mode below.
106
107   Note, this is all (well, mostly) host-independent.
108*/
109
110typedef
111   struct {
112      /* Constant -- are set at the start and do not change. */
113      IRTypeEnv*   type_env;
114
115      HReg*        vregmap;
116      HReg*        vregmapHI;
117      Int          n_vregmap;
118
119      UInt         hwcaps;
120
121      Bool         chainingAllowed;
122      Addr64       max_ga;
123
124      /* These are modified as we go along. */
125      HInstrArray* code;
126      Int          vreg_ctr;
127
128      IRExpr*      previous_rm;
129   }
130   ISelEnv;
131
132static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
133{
134   vassert(tmp >= 0);
135   vassert(tmp < env->n_vregmap);
136   return env->vregmap[tmp];
137}
138
139static void addInstr ( ISelEnv* env, ARM64Instr* instr )
140{
141   addHInstr(env->code, instr);
142   if (vex_traceflags & VEX_TRACE_VCODE) {
143      ppARM64Instr(instr);
144      vex_printf("\n");
145   }
146}
147
148static HReg newVRegI ( ISelEnv* env )
149{
150   HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True/*virtual reg*/);
151   env->vreg_ctr++;
152   return reg;
153}
154
155static HReg newVRegD ( ISelEnv* env )
156{
157   HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True/*virtual reg*/);
158   env->vreg_ctr++;
159   return reg;
160}
161
162//ZZ static HReg newVRegF ( ISelEnv* env )
163//ZZ {
164//ZZ    HReg reg = mkHReg(env->vreg_ctr, HRcFlt32, True/*virtual reg*/);
165//ZZ    env->vreg_ctr++;
166//ZZ    return reg;
167//ZZ }
168
169static HReg newVRegV ( ISelEnv* env )
170{
171   HReg reg = mkHReg(env->vreg_ctr, HRcVec128, True/*virtual reg*/);
172   env->vreg_ctr++;
173   return reg;
174}
175
176//ZZ /* These are duplicated in guest_arm_toIR.c */
177//ZZ static IRExpr* unop ( IROp op, IRExpr* a )
178//ZZ {
179//ZZ    return IRExpr_Unop(op, a);
180//ZZ }
181//ZZ
182//ZZ static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
183//ZZ {
184//ZZ    return IRExpr_Binop(op, a1, a2);
185//ZZ }
186//ZZ
187//ZZ static IRExpr* bind ( Int binder )
188//ZZ {
189//ZZ    return IRExpr_Binder(binder);
190//ZZ }
191
192
193/*---------------------------------------------------------*/
194/*--- ISEL: Forward declarations                        ---*/
195/*---------------------------------------------------------*/
196
197/* These are organised as iselXXX and iselXXX_wrk pairs.  The
198   iselXXX_wrk do the real work, but are not to be called directly.
199   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
200   checks that all returned registers are virtual.  You should not
201   call the _wrk version directly.
202
203   Because some forms of ARM64 memory amodes are implicitly scaled by
204   the access size, iselIntExpr_AMode takes an IRType which tells it
205   the type of the access for which the amode is to be used.  This
206   type needs to be correct, else you'll get incorrect code.
207*/
208static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
209                                           IRExpr* e, IRType dty );
210static ARM64AMode* iselIntExpr_AMode     ( ISelEnv* env,
211                                           IRExpr* e, IRType dty );
212
213static ARM64RIA*   iselIntExpr_RIA_wrk   ( ISelEnv* env, IRExpr* e );
214static ARM64RIA*   iselIntExpr_RIA       ( ISelEnv* env, IRExpr* e );
215
216static ARM64RIL*   iselIntExpr_RIL_wrk   ( ISelEnv* env, IRExpr* e );
217static ARM64RIL*   iselIntExpr_RIL       ( ISelEnv* env, IRExpr* e );
218
219static ARM64RI6*   iselIntExpr_RI6_wrk   ( ISelEnv* env, IRExpr* e );
220static ARM64RI6*   iselIntExpr_RI6       ( ISelEnv* env, IRExpr* e );
221
222static ARM64CondCode iselCondCode_wrk    ( ISelEnv* env, IRExpr* e );
223static ARM64CondCode iselCondCode        ( ISelEnv* env, IRExpr* e );
224
225static HReg        iselIntExpr_R_wrk     ( ISelEnv* env, IRExpr* e );
226static HReg        iselIntExpr_R         ( ISelEnv* env, IRExpr* e );
227
228static void        iselInt128Expr_wrk    ( /*OUT*/HReg* rHi, HReg* rLo,
229                                           ISelEnv* env, IRExpr* e );
230static void        iselInt128Expr        ( /*OUT*/HReg* rHi, HReg* rLo,
231                                           ISelEnv* env, IRExpr* e );
232
233
234//ZZ static void        iselInt64Expr_wrk      ( HReg* rHi, HReg* rLo,
235//ZZ                                             ISelEnv* env, IRExpr* e );
236//ZZ static void        iselInt64Expr          ( HReg* rHi, HReg* rLo,
237//ZZ                                             ISelEnv* env, IRExpr* e );
238
239static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
240static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
241
242static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
243static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
244
245//ZZ static HReg        iselNeon64Expr_wrk     ( ISelEnv* env, IRExpr* e );
246//ZZ static HReg        iselNeon64Expr         ( ISelEnv* env, IRExpr* e );
247
248static HReg        iselV128Expr_wrk       ( ISelEnv* env, IRExpr* e );
249static HReg        iselV128Expr           ( ISelEnv* env, IRExpr* e );
250
251static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
252
253
254/*---------------------------------------------------------*/
255/*--- ISEL: Misc helpers                                ---*/
256/*---------------------------------------------------------*/
257
258/* Generate an amode suitable for a 64-bit sized access relative to
259   the baseblock register (X21).  This generates an RI12 amode, which
260   means its scaled by the access size, which is why the access size
261   -- 64 bit -- is stated explicitly here.  Consequently |off| needs
262   to be divisible by 8. */
263static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
264{
265   vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
266   vassert((off & 7) == 0);  /* ditto */
267   return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
268}
269
270/* Ditto, for 32 bit accesses. */
271static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
272{
273   vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
274   vassert((off & 3) == 0);  /* ditto */
275   return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
276}
277
278/* Ditto, for 16 bit accesses. */
279static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
280{
281   vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
282   vassert((off & 1) == 0);  /* ditto */
283   return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
284}
285
286/* Ditto, for 8 bit accesses. */
287static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
288{
289   vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
290   return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
291}
292
293static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
294{
295   vassert(off < (1<<12));
296   HReg r = newVRegI(env);
297   addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
298                                     ARM64RIA_I12(off,0), True/*isAdd*/));
299   return r;
300}
301
302static HReg get_baseblock_register ( void )
303{
304   return hregARM64_X21();
305}
306
307/* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
308   a new register, and return the new register. */
309static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
310{
311   HReg      dst  = newVRegI(env);
312   ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
313   addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
314   return dst;
315}
316
317/* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
318   a new register, and return the new register. */
319static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
320{
321   HReg      dst = newVRegI(env);
322   ARM64RI6* n48 = ARM64RI6_I6(48);
323   addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
324   addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
325   return dst;
326}
327
328/* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
329   a new register, and return the new register. */
330static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
331{
332   HReg      dst = newVRegI(env);
333   ARM64RI6* n48 = ARM64RI6_I6(48);
334   addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
335   addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR));
336   return dst;
337}
338
339/* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
340   a new register, and return the new register. */
341static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
342{
343   HReg      dst = newVRegI(env);
344   ARM64RI6* n32 = ARM64RI6_I6(32);
345   addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
346   addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
347   return dst;
348}
349
350/* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
351   a new register, and return the new register. */
352static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
353{
354   HReg      dst = newVRegI(env);
355   ARM64RI6* n56 = ARM64RI6_I6(56);
356   addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
357   addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
358   return dst;
359}
360
361static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
362{
363   HReg      dst = newVRegI(env);
364   ARM64RI6* n56 = ARM64RI6_I6(56);
365   addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
366   addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR));
367   return dst;
368}
369
370/* Is this IRExpr_Const(IRConst_U64(0)) ? */
371static Bool isZeroU64 ( IRExpr* e ) {
372   if (e->tag != Iex_Const) return False;
373   IRConst* con = e->Iex.Const.con;
374   vassert(con->tag == Ico_U64);
375   return con->Ico.U64 == 0;
376}
377
378
379/*---------------------------------------------------------*/
380/*--- ISEL: FP rounding mode helpers                    ---*/
381/*---------------------------------------------------------*/
382
383/* Set the FP rounding mode: 'mode' is an I32-typed expression
384   denoting a value in the range 0 .. 3, indicating a round mode
385   encoded as per type IRRoundingMode -- the first four values only
386   (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO).  Set the PPC
387   FSCR to have the same rounding.
388
389   For speed & simplicity, we're setting the *entire* FPCR here.
390
391   Setting the rounding mode is expensive.  So this function tries to
392   avoid repeatedly setting the rounding mode to the same thing by
393   first comparing 'mode' to the 'mode' tree supplied in the previous
394   call to this function, if any.  (The previous value is stored in
395   env->previous_rm.)  If 'mode' is a single IR temporary 't' and
396   env->previous_rm is also just 't', then the setting is skipped.
397
398   This is safe because of the SSA property of IR: an IR temporary can
399   only be defined once and so will have the same value regardless of
400   where it appears in the block.  Cool stuff, SSA.
401
402   A safety condition: all attempts to set the RM must be aware of
403   this mechanism - by being routed through the functions here.
404
405   Of course this only helps if blocks where the RM is set more than
406   once and it is set to the same value each time, *and* that value is
407   held in the same IR temporary each time.  In order to assure the
408   latter as much as possible, the IR optimiser takes care to do CSE
409   on any block with any sign of floating point activity.
410*/
411static
412void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
413{
414   vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
415
416   /* Do we need to do anything? */
417   if (env->previous_rm
418       && env->previous_rm->tag == Iex_RdTmp
419       && mode->tag == Iex_RdTmp
420       && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
421      /* no - setting it to what it was before.  */
422      vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
423      return;
424   }
425
426   /* No luck - we better set it, and remember what we set it to. */
427   env->previous_rm = mode;
428
429   /* Only supporting the rounding-mode bits - the rest of FPCR is set
430      to zero - so we can set the whole register at once (faster). */
431
432   /* This isn't simple, because 'mode' carries an IR rounding
433      encoding, and we need to translate that to an ARM64 FP one:
434      The IR encoding:
435         00  to nearest (the default)
436         10  to +infinity
437         01  to -infinity
438         11  to zero
439      The ARM64 FP encoding:
440         00  to nearest
441         01  to +infinity
442         10  to -infinity
443         11  to zero
444      Easy enough to do; just swap the two bits.
445   */
446   HReg irrm = iselIntExpr_R(env, mode);
447   HReg tL   = newVRegI(env);
448   HReg tR   = newVRegI(env);
449   HReg t3   = newVRegI(env);
450   /* tL = irrm << 1;
451      tR = irrm >> 1;  if we're lucky, these will issue together
452      tL &= 2;
453      tR &= 1;         ditto
454      t3 = tL | tR;
455      t3 <<= 22;
456      fmxr fpscr, t3
457   */
458   ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
459   ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
460   vassert(ril_one && ril_two);
461   addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
462   addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
463   addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
464   addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
465   addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
466   addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
467   addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
468}
469
470
471/*---------------------------------------------------------*/
472/*--- ISEL: Function call helpers                       ---*/
473/*---------------------------------------------------------*/
474
475/* Used only in doHelperCall.  See big comment in doHelperCall re
476   handling of register-parameter args.  This function figures out
477   whether evaluation of an expression might require use of a fixed
478   register.  If in doubt return True (safe but suboptimal).
479*/
480static
481Bool mightRequireFixedRegs ( IRExpr* e )
482{
483   if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
484      // These are always "safe" -- either a copy of SP in some
485      // arbitrary vreg, or a copy of x21, respectively.
486      return False;
487   }
488   /* Else it's a "normal" expression. */
489   switch (e->tag) {
490      case Iex_RdTmp: case Iex_Const: case Iex_Get:
491         return False;
492      default:
493         return True;
494   }
495}
496
497
498/* Do a complete function call.  |guard| is a Ity_Bit expression
499   indicating whether or not the call happens.  If guard==NULL, the
500   call is unconditional.  |retloc| is set to indicate where the
501   return value is after the call.  The caller (of this fn) must
502   generate code to add |stackAdjustAfterCall| to the stack pointer
503   after the call is done.  Returns True iff it managed to handle this
504   combination of arg/return types, else returns False. */
505
506static
507Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
508                    /*OUT*/RetLoc* retloc,
509                    ISelEnv* env,
510                    IRExpr* guard,
511                    IRCallee* cee, IRType retTy, IRExpr** args )
512{
513   ARM64CondCode cc;
514   HReg          argregs[ARM64_N_ARGREGS];
515   HReg          tmpregs[ARM64_N_ARGREGS];
516   Bool          go_fast;
517   Int           n_args, i, nextArgReg;
518   ULong         target;
519
520   vassert(ARM64_N_ARGREGS == 8);
521
522   /* Set default returns.  We'll update them later if needed. */
523   *stackAdjustAfterCall = 0;
524   *retloc               = mk_RetLoc_INVALID();
525
526   /* These are used for cross-checking that IR-level constraints on
527      the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
528   UInt nVECRETs = 0;
529   UInt nBBPTRs  = 0;
530
531   /* Marshal args for a call and do the call.
532
533      This function only deals with a tiny set of possibilities, which
534      cover all helpers in practice.  The restrictions are that only
535      arguments in registers are supported, hence only
536      ARM64_N_REGPARMS x 64 integer bits in total can be passed.  In
537      fact the only supported arg type is I64.
538
539      The return type can be I{64,32} or V128.  In the V128 case, it
540      is expected that |args| will contain the special node
541      IRExpr_VECRET(), in which case this routine generates code to
542      allocate space on the stack for the vector return value.  Since
543      we are not passing any scalars on the stack, it is enough to
544      preallocate the return space before marshalling any arguments,
545      in this case.
546
547      |args| may also contain IRExpr_BBPTR(), in which case the
548      value in x21 is passed as the corresponding argument.
549
550      Generating code which is both efficient and correct when
551      parameters are to be passed in registers is difficult, for the
552      reasons elaborated in detail in comments attached to
553      doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
554      of the method described in those comments.
555
556      The problem is split into two cases: the fast scheme and the
557      slow scheme.  In the fast scheme, arguments are computed
558      directly into the target (real) registers.  This is only safe
559      when we can be sure that computation of each argument will not
560      trash any real registers set by computation of any other
561      argument.
562
563      In the slow scheme, all args are first computed into vregs, and
564      once they are all done, they are moved to the relevant real
565      regs.  This always gives correct code, but it also gives a bunch
566      of vreg-to-rreg moves which are usually redundant but are hard
567      for the register allocator to get rid of.
568
569      To decide which scheme to use, all argument expressions are
570      first examined.  If they are all so simple that it is clear they
571      will be evaluated without use of any fixed registers, use the
572      fast scheme, else use the slow scheme.  Note also that only
573      unconditional calls may use the fast scheme, since having to
574      compute a condition expression could itself trash real
575      registers.
576
577      Note this requires being able to examine an expression and
578      determine whether or not evaluation of it might use a fixed
579      register.  That requires knowledge of how the rest of this insn
580      selector works.  Currently just the following 3 are regarded as
581      safe -- hopefully they cover the majority of arguments in
582      practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
583   */
584
585   /* Note that the cee->regparms field is meaningless on ARM64 hosts
586      (since there is only one calling convention) and so we always
587      ignore it. */
588
589   n_args = 0;
590   for (i = 0; args[i]; i++) {
591      IRExpr* arg = args[i];
592      if (UNLIKELY(arg->tag == Iex_VECRET)) {
593         nVECRETs++;
594      } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
595         nBBPTRs++;
596      }
597      n_args++;
598   }
599
600   /* If this fails, the IR is ill-formed */
601   vassert(nBBPTRs == 0 || nBBPTRs == 1);
602
603   /* If we have a VECRET, allocate space on the stack for the return
604      value, and record the stack pointer after that. */
605   HReg r_vecRetAddr = INVALID_HREG;
606   if (nVECRETs == 1) {
607      vassert(retTy == Ity_V128 || retTy == Ity_V256);
608      vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
609      r_vecRetAddr = newVRegI(env);
610      addInstr(env, ARM64Instr_AddToSP(-16));
611      addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
612   } else {
613      // If either of these fail, the IR is ill-formed
614      vassert(retTy != Ity_V128 && retTy != Ity_V256);
615      vassert(nVECRETs == 0);
616   }
617
618   argregs[0] = hregARM64_X0();
619   argregs[1] = hregARM64_X1();
620   argregs[2] = hregARM64_X2();
621   argregs[3] = hregARM64_X3();
622   argregs[4] = hregARM64_X4();
623   argregs[5] = hregARM64_X5();
624   argregs[6] = hregARM64_X6();
625   argregs[7] = hregARM64_X7();
626
627   tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
628   tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
629
630   /* First decide which scheme (slow or fast) is to be used.  First
631      assume the fast scheme, and select slow if any contraindications
632      (wow) appear. */
633
634   go_fast = True;
635
636   if (guard) {
637      if (guard->tag == Iex_Const
638          && guard->Iex.Const.con->tag == Ico_U1
639          && guard->Iex.Const.con->Ico.U1 == True) {
640         /* unconditional */
641      } else {
642         /* Not manifestly unconditional -- be conservative. */
643         go_fast = False;
644      }
645   }
646
647   if (go_fast) {
648      for (i = 0; i < n_args; i++) {
649         if (mightRequireFixedRegs(args[i])) {
650            go_fast = False;
651            break;
652         }
653      }
654   }
655
656   if (go_fast) {
657      if (retTy == Ity_V128 || retTy == Ity_V256)
658         go_fast = False;
659   }
660
661   /* At this point the scheme to use has been established.  Generate
662      code to get the arg values into the argument rregs.  If we run
663      out of arg regs, give up. */
664
665   if (go_fast) {
666
667      /* FAST SCHEME */
668      nextArgReg = 0;
669
670      for (i = 0; i < n_args; i++) {
671         IRExpr* arg = args[i];
672
673         IRType  aTy = Ity_INVALID;
674         if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
675            aTy = typeOfIRExpr(env->type_env, args[i]);
676
677         if (nextArgReg >= ARM64_N_ARGREGS)
678            return False; /* out of argregs */
679
680         if (aTy == Ity_I64) {
681            addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
682                                           iselIntExpr_R(env, args[i]) ));
683            nextArgReg++;
684         }
685         else if (arg->tag == Iex_BBPTR) {
686            vassert(0); //ATC
687            addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
688                                           hregARM64_X21() ));
689            nextArgReg++;
690         }
691         else if (arg->tag == Iex_VECRET) {
692            // because of the go_fast logic above, we can't get here,
693            // since vector return values makes us use the slow path
694            // instead.
695            vassert(0);
696         }
697         else
698            return False; /* unhandled arg type */
699      }
700
701      /* Fast scheme only applies for unconditional calls.  Hence: */
702      cc = ARM64cc_AL;
703
704   } else {
705
706      /* SLOW SCHEME; move via temporaries */
707      nextArgReg = 0;
708
709      for (i = 0; i < n_args; i++) {
710         IRExpr* arg = args[i];
711
712         IRType  aTy = Ity_INVALID;
713         if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
714            aTy = typeOfIRExpr(env->type_env, args[i]);
715
716         if (nextArgReg >= ARM64_N_ARGREGS)
717            return False; /* out of argregs */
718
719         if (aTy == Ity_I64) {
720            tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
721            nextArgReg++;
722         }
723         else if (arg->tag == Iex_BBPTR) {
724            vassert(0); //ATC
725            tmpregs[nextArgReg] = hregARM64_X21();
726            nextArgReg++;
727         }
728         else if (arg->tag == Iex_VECRET) {
729            vassert(!hregIsInvalid(r_vecRetAddr));
730            tmpregs[nextArgReg] = r_vecRetAddr;
731            nextArgReg++;
732         }
733         else
734            return False; /* unhandled arg type */
735      }
736
737      /* Now we can compute the condition.  We can't do it earlier
738         because the argument computations could trash the condition
739         codes.  Be a bit clever to handle the common case where the
740         guard is 1:Bit. */
741      cc = ARM64cc_AL;
742      if (guard) {
743         if (guard->tag == Iex_Const
744             && guard->Iex.Const.con->tag == Ico_U1
745             && guard->Iex.Const.con->Ico.U1 == True) {
746            /* unconditional -- do nothing */
747         } else {
748            cc = iselCondCode( env, guard );
749         }
750      }
751
752      /* Move the args to their final destinations. */
753      for (i = 0; i < nextArgReg; i++) {
754         vassert(!(hregIsInvalid(tmpregs[i])));
755         /* None of these insns, including any spill code that might
756            be generated, may alter the condition codes. */
757         addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
758      }
759
760   }
761
762   /* Should be assured by checks above */
763   vassert(nextArgReg <= ARM64_N_ARGREGS);
764
765   /* Do final checks, set the return values, and generate the call
766      instruction proper. */
767   vassert(nBBPTRs == 0 || nBBPTRs == 1);
768   vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
769   vassert(*stackAdjustAfterCall == 0);
770   vassert(is_RetLoc_INVALID(*retloc));
771   switch (retTy) {
772      case Ity_INVALID:
773         /* Function doesn't return a value. */
774         *retloc = mk_RetLoc_simple(RLPri_None);
775         break;
776      case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
777         *retloc = mk_RetLoc_simple(RLPri_Int);
778         break;
779      case Ity_V128:
780         *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
781         *stackAdjustAfterCall = 16;
782         break;
783      case Ity_V256:
784         vassert(0); // ATC
785         *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
786         *stackAdjustAfterCall = 32;
787         break;
788      default:
789         /* IR can denote other possible return types, but we don't
790            handle those here. */
791         vassert(0);
792   }
793
794   /* Finally, generate the call itself.  This needs the *retloc value
795      set in the switch above, which is why it's at the end. */
796
797   /* nextArgReg doles out argument registers.  Since these are
798      assigned in the order x0 .. x7, its numeric value at this point,
799      which must be between 0 and 8 inclusive, is going to be equal to
800      the number of arg regs in use for the call.  Hence bake that
801      number into the call (we'll need to know it when doing register
802      allocation, to know what regs the call reads.) */
803
804   target = (HWord)Ptr_to_ULong(cee->addr);
805   addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
806
807   return True; /* success */
808}
809
810
811/*---------------------------------------------------------*/
812/*--- ISEL: Integer expressions (64/32 bit)             ---*/
813/*---------------------------------------------------------*/
814
815/* Select insns for an integer-typed expression, and add them to the
816   code list.  Return a reg holding the result.  This reg will be a
817   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
818   want to modify it, ask for a new vreg, copy it in there, and modify
819   the copy.  The register allocator will do its best to map both
820   vregs to the same real register, so the copies will often disappear
821   later in the game.
822
823   This should handle expressions of 64- and 32-bit type.  All results
824   are returned in a 64-bit register.  For 32-bit expressions, the
825   upper 32 bits are arbitrary, so you should mask or sign extend
826   partial values if necessary.
827*/
828
829/* --------------------- AMode --------------------- */
830
831/* Return an AMode which computes the value of the specified
832   expression, possibly also adding insns to the code list as a
833   result.  The expression may only be a 64-bit one.
834*/
835
836static Bool isValidScale ( UChar scale )
837{
838   switch (scale) {
839      case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
840      default: return False;
841   }
842}
843
844static Bool sane_AMode ( ARM64AMode* am )
845{
846   switch (am->tag) {
847      case ARM64am_RI9:
848         return
849            toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
850                    && (hregIsVirtual(am->ARM64am.RI9.reg)
851                        /* || sameHReg(am->ARM64am.RI9.reg,
852                                       hregARM64_X21()) */ )
853                    && am->ARM64am.RI9.simm9 >= -256
854                    && am->ARM64am.RI9.simm9 <= 255 );
855      case ARM64am_RI12:
856         return
857            toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
858                    && (hregIsVirtual(am->ARM64am.RI12.reg)
859                        /* || sameHReg(am->ARM64am.RI12.reg,
860                                       hregARM64_X21()) */ )
861                    && am->ARM64am.RI12.uimm12 < 4096
862                    && isValidScale(am->ARM64am.RI12.szB) );
863      case ARM64am_RR:
864         return
865            toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
866                    && hregIsVirtual(am->ARM64am.RR.base)
867                    && hregClass(am->ARM64am.RR.index) == HRcInt64
868                    && hregIsVirtual(am->ARM64am.RR.index) );
869      default:
870         vpanic("sane_AMode: unknown ARM64 AMode1 tag");
871   }
872}
873
874static
875ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
876{
877   ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
878   vassert(sane_AMode(am));
879   return am;
880}
881
882static
883ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
884{
885   IRType ty = typeOfIRExpr(env->type_env,e);
886   vassert(ty == Ity_I64);
887
888   ULong szBbits = 0;
889   switch (dty) {
890      case Ity_I64: szBbits = 3; break;
891      case Ity_I32: szBbits = 2; break;
892      case Ity_I16: szBbits = 1; break;
893      case Ity_I8:  szBbits = 0; break;
894      default: vassert(0);
895   }
896
897   /* {Add64,Sub64}(expr,simm9).  We don't care about |dty| here since
898      we're going to create an amode suitable for LDU* or STU*
899      instructions, which use unscaled immediate offsets.  */
900   if (e->tag == Iex_Binop
901       && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
902       && e->Iex.Binop.arg2->tag == Iex_Const
903       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
904      Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
905      if (simm >= -255 && simm <= 255) {
906         /* Although the gating condition might seem to be
907               simm >= -256 && simm <= 255
908            we will need to negate simm in the case where the op is Sub64.
909            Hence limit the lower value to -255 in order that its negation
910            is representable. */
911         HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
912         if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
913         return ARM64AMode_RI9(reg, (Int)simm);
914      }
915   }
916
917   /* Add64(expr, uimm12 * transfer-size) */
918   if (e->tag == Iex_Binop
919       && e->Iex.Binop.op == Iop_Add64
920       && e->Iex.Binop.arg2->tag == Iex_Const
921       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
922      ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
923      ULong szB  = 1 << szBbits;
924      if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
925          && (uimm >> szBbits) < 4096) {
926         HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
927         return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
928      }
929   }
930
931   /* Add64(expr1, expr2) */
932   if (e->tag == Iex_Binop
933       && e->Iex.Binop.op == Iop_Add64) {
934      HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
935      HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
936      return ARM64AMode_RR(reg1, reg2);
937   }
938
939   /* Doesn't match anything in particular.  Generate it into
940      a register and use that. */
941   HReg reg = iselIntExpr_R(env, e);
942   return ARM64AMode_RI9(reg, 0);
943}
944
945//ZZ /* --------------------- AModeV --------------------- */
946//ZZ
947//ZZ /* Return an AModeV which computes the value of the specified
948//ZZ    expression, possibly also adding insns to the code list as a
949//ZZ    result.  The expression may only be a 32-bit one.
950//ZZ */
951//ZZ
952//ZZ static Bool sane_AModeV ( ARMAModeV* am )
953//ZZ {
954//ZZ   return toBool( hregClass(am->reg) == HRcInt32
955//ZZ                  && hregIsVirtual(am->reg)
956//ZZ                  && am->simm11 >= -1020 && am->simm11 <= 1020
957//ZZ                  && 0 == (am->simm11 & 3) );
958//ZZ }
959//ZZ
960//ZZ static ARMAModeV* iselIntExpr_AModeV ( ISelEnv* env, IRExpr* e )
961//ZZ {
962//ZZ    ARMAModeV* am = iselIntExpr_AModeV_wrk(env, e);
963//ZZ    vassert(sane_AModeV(am));
964//ZZ    return am;
965//ZZ }
966//ZZ
967//ZZ static ARMAModeV* iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e )
968//ZZ {
969//ZZ    IRType ty = typeOfIRExpr(env->type_env,e);
970//ZZ    vassert(ty == Ity_I32);
971//ZZ
972//ZZ    /* {Add32,Sub32}(expr, simm8 << 2) */
973//ZZ    if (e->tag == Iex_Binop
974//ZZ        && (e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Sub32)
975//ZZ        && e->Iex.Binop.arg2->tag == Iex_Const
976//ZZ        && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U32) {
977//ZZ       Int simm = (Int)e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
978//ZZ       if (simm >= -1020 && simm <= 1020 && 0 == (simm & 3)) {
979//ZZ          HReg reg;
980//ZZ          if (e->Iex.Binop.op == Iop_Sub32)
981//ZZ             simm = -simm;
982//ZZ          reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
983//ZZ          return mkARMAModeV(reg, simm);
984//ZZ       }
985//ZZ    }
986//ZZ
987//ZZ    /* Doesn't match anything in particular.  Generate it into
988//ZZ       a register and use that. */
989//ZZ    {
990//ZZ       HReg reg = iselIntExpr_R(env, e);
991//ZZ       return mkARMAModeV(reg, 0);
992//ZZ    }
993//ZZ
994//ZZ }
995//ZZ
996//ZZ /* -------------------- AModeN -------------------- */
997//ZZ
998//ZZ static ARMAModeN* iselIntExpr_AModeN ( ISelEnv* env, IRExpr* e )
999//ZZ {
1000//ZZ    return iselIntExpr_AModeN_wrk(env, e);
1001//ZZ }
1002//ZZ
1003//ZZ static ARMAModeN* iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e )
1004//ZZ {
1005//ZZ    HReg reg = iselIntExpr_R(env, e);
1006//ZZ    return mkARMAModeN_R(reg);
1007//ZZ }
1008//ZZ
1009//ZZ
1010//ZZ /* --------------------- RI84 --------------------- */
1011//ZZ
1012//ZZ /* Select instructions to generate 'e' into a RI84.  If mayInv is
1013//ZZ    true, then the caller will also accept an I84 form that denotes
1014//ZZ    'not e'.  In this case didInv may not be NULL, and *didInv is set
1015//ZZ    to True.  This complication is so as to allow generation of an RI84
1016//ZZ    which is suitable for use in either an AND or BIC instruction,
1017//ZZ    without knowing (before this call) which one.
1018//ZZ */
1019//ZZ static ARMRI84* iselIntExpr_RI84 ( /*OUT*/Bool* didInv, Bool mayInv,
1020//ZZ                                    ISelEnv* env, IRExpr* e )
1021//ZZ {
1022//ZZ    ARMRI84* ri;
1023//ZZ    if (mayInv)
1024//ZZ       vassert(didInv != NULL);
1025//ZZ    ri = iselIntExpr_RI84_wrk(didInv, mayInv, env, e);
1026//ZZ    /* sanity checks ... */
1027//ZZ    switch (ri->tag) {
1028//ZZ       case ARMri84_I84:
1029//ZZ          return ri;
1030//ZZ       case ARMri84_R:
1031//ZZ          vassert(hregClass(ri->ARMri84.R.reg) == HRcInt32);
1032//ZZ          vassert(hregIsVirtual(ri->ARMri84.R.reg));
1033//ZZ          return ri;
1034//ZZ       default:
1035//ZZ          vpanic("iselIntExpr_RI84: unknown arm RI84 tag");
1036//ZZ    }
1037//ZZ }
1038//ZZ
1039//ZZ /* DO NOT CALL THIS DIRECTLY ! */
1040//ZZ static ARMRI84* iselIntExpr_RI84_wrk ( /*OUT*/Bool* didInv, Bool mayInv,
1041//ZZ                                        ISelEnv* env, IRExpr* e )
1042//ZZ {
1043//ZZ    IRType ty = typeOfIRExpr(env->type_env,e);
1044//ZZ    vassert(ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1045//ZZ
1046//ZZ    if (didInv) *didInv = False;
1047//ZZ
1048//ZZ    /* special case: immediate */
1049//ZZ    if (e->tag == Iex_Const) {
1050//ZZ       UInt u, u8 = 0x100, u4 = 0x10; /* both invalid */
1051//ZZ       switch (e->Iex.Const.con->tag) {
1052//ZZ          case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1053//ZZ          case Ico_U16: u = 0xFFFF & (e->Iex.Const.con->Ico.U16); break;
1054//ZZ          case Ico_U8:  u = 0xFF   & (e->Iex.Const.con->Ico.U8); break;
1055//ZZ          default: vpanic("iselIntExpr_RI84.Iex_Const(armh)");
1056//ZZ       }
1057//ZZ       if (fitsIn8x4(&u8, &u4, u)) {
1058//ZZ          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1059//ZZ       }
1060//ZZ       if (mayInv && fitsIn8x4(&u8, &u4, ~u)) {
1061//ZZ          vassert(didInv);
1062//ZZ          *didInv = True;
1063//ZZ          return ARMRI84_I84( (UShort)u8, (UShort)u4 );
1064//ZZ       }
1065//ZZ       /* else fail, fall through to default case */
1066//ZZ    }
1067//ZZ
1068//ZZ    /* default case: calculate into a register and return that */
1069//ZZ    {
1070//ZZ       HReg r = iselIntExpr_R ( env, e );
1071//ZZ       return ARMRI84_R(r);
1072//ZZ    }
1073//ZZ }
1074
1075
1076/* --------------------- RIA --------------------- */
1077
1078/* Select instructions to generate 'e' into a RIA. */
1079
1080static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
1081{
1082   ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
1083   /* sanity checks ... */
1084   switch (ri->tag) {
1085      case ARM64riA_I12:
1086         vassert(ri->ARM64riA.I12.imm12 < 4096);
1087         vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
1088         return ri;
1089      case ARM64riA_R:
1090         vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
1091         vassert(hregIsVirtual(ri->ARM64riA.R.reg));
1092         return ri;
1093      default:
1094         vpanic("iselIntExpr_RIA: unknown arm RIA tag");
1095   }
1096}
1097
1098/* DO NOT CALL THIS DIRECTLY ! */
1099static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
1100{
1101   IRType ty = typeOfIRExpr(env->type_env,e);
1102   vassert(ty == Ity_I64 || ty == Ity_I32);
1103
1104   /* special case: immediate */
1105   if (e->tag == Iex_Const) {
1106      ULong u = 0xF000000ULL; /* invalid */
1107      switch (e->Iex.Const.con->tag) {
1108         case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
1109         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
1110         default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
1111      }
1112      if (0 == (u & ~(0xFFFULL << 0)))
1113         return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
1114      if (0 == (u & ~(0xFFFULL << 12)))
1115         return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
1116      /* else fail, fall through to default case */
1117   }
1118
1119   /* default case: calculate into a register and return that */
1120   {
1121      HReg r = iselIntExpr_R ( env, e );
1122      return ARM64RIA_R(r);
1123   }
1124}
1125
1126
1127/* --------------------- RIL --------------------- */
1128
1129/* Select instructions to generate 'e' into a RIL.  At this point we
1130   have to deal with the strange bitfield-immediate encoding for logic
1131   instructions. */
1132
1133
1134// The following four functions
1135//    CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
1136// are copied, with modifications, from
1137// https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
1138// which has the following copyright notice:
1139/*
1140   Copyright 2013, ARM Limited
1141   All rights reserved.
1142
1143   Redistribution and use in source and binary forms, with or without
1144   modification, are permitted provided that the following conditions are met:
1145
1146   * Redistributions of source code must retain the above copyright notice,
1147     this list of conditions and the following disclaimer.
1148   * Redistributions in binary form must reproduce the above copyright notice,
1149     this list of conditions and the following disclaimer in the documentation
1150     and/or other materials provided with the distribution.
1151   * Neither the name of ARM Limited nor the names of its contributors may be
1152     used to endorse or promote products derived from this software without
1153     specific prior written permission.
1154
1155   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
1156   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1157   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1158   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
1159   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1160   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1161   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1162   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1163   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1164   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1165*/
1166
1167static Int CountLeadingZeros(ULong value, Int width)
1168{
1169   vassert(width == 32 || width == 64);
1170   Int count = 0;
1171   ULong bit_test = 1ULL << (width - 1);
1172   while ((count < width) && ((bit_test & value) == 0)) {
1173      count++;
1174      bit_test >>= 1;
1175   }
1176   return count;
1177}
1178
1179static Int CountTrailingZeros(ULong value, Int width)
1180{
1181   vassert(width == 32 || width == 64);
1182   Int count = 0;
1183   while ((count < width) && (((value >> count) & 1) == 0)) {
1184      count++;
1185   }
1186   return count;
1187}
1188
1189static Int CountSetBits(ULong value, Int width)
1190{
1191   // TODO: Other widths could be added here, as the implementation already
1192   // supports them.
1193   vassert(width == 32 || width == 64);
1194
1195   // Mask out unused bits to ensure that they are not counted.
1196   value &= (0xffffffffffffffffULL >> (64-width));
1197
1198   // Add up the set bits.
1199   // The algorithm works by adding pairs of bit fields together iteratively,
1200   // where the size of each bit field doubles each time.
1201   // An example for an 8-bit value:
1202   // Bits: h g f e d c b a
1203   // \ | \ | \ | \ |
1204   // value = h+g f+e d+c b+a
1205   // \ | \ |
1206   // value = h+g+f+e d+c+b+a
1207   // \ |
1208   // value = h+g+f+e+d+c+b+a
1209   value = ((value >>  1) & 0x5555555555555555ULL)
1210                 + (value & 0x5555555555555555ULL);
1211   value = ((value >>  2) & 0x3333333333333333ULL)
1212                 + (value & 0x3333333333333333ULL);
1213   value = ((value >>  4) & 0x0f0f0f0f0f0f0f0fULL)
1214                 + (value & 0x0f0f0f0f0f0f0f0fULL);
1215   value = ((value >>  8) & 0x00ff00ff00ff00ffULL)
1216                 + (value & 0x00ff00ff00ff00ffULL);
1217   value = ((value >> 16) & 0x0000ffff0000ffffULL)
1218                 + (value & 0x0000ffff0000ffffULL);
1219   value = ((value >> 32) & 0x00000000ffffffffULL)
1220                 + (value & 0x00000000ffffffffULL);
1221
1222   return value;
1223}
1224
1225static Bool isImmLogical ( /*OUT*/UInt* n,
1226                           /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1227                           ULong value, UInt width )
1228{
1229  // Test if a given value can be encoded in the immediate field of a
1230  // logical instruction.
1231
1232  // If it can be encoded, the function returns true, and values
1233  // pointed to by n, imm_s and imm_r are updated with immediates
1234  // encoded in the format required by the corresponding fields in the
1235  // logical instruction.  If it can not be encoded, the function
1236  // returns false, and the values pointed to by n, imm_s and imm_r
1237  // are undefined.
1238  vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1239  vassert(width == 32 || width == 64);
1240
1241  // Logical immediates are encoded using parameters n, imm_s and imm_r using
1242  // the following table:
1243  //
1244  // N imms immr size S R
1245  // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1246  // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1247  // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1248  // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1249  // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1250  // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1251  // (s bits must not be all set)
1252  //
1253  // A pattern is constructed of size bits, where the least significant S+1
1254  // bits are set. The pattern is rotated right by R, and repeated across a
1255  // 32 or 64-bit value, depending on destination register width.
1256  //
1257  // To test if an arbitrary immediate can be encoded using this scheme, an
1258  // iterative algorithm is used.
1259  //
1260  // TODO: This code does not consider using X/W register overlap to support
1261  // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1262  // are an encodable logical immediate.
1263
1264  // 1. If the value has all set or all clear bits, it can't be encoded.
1265  if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1266      ((width == 32) && (value == 0xffffffff))) {
1267    return False;
1268  }
1269
1270  UInt lead_zero = CountLeadingZeros(value, width);
1271  UInt lead_one = CountLeadingZeros(~value, width);
1272  UInt trail_zero = CountTrailingZeros(value, width);
1273  UInt trail_one = CountTrailingZeros(~value, width);
1274  UInt set_bits = CountSetBits(value, width);
1275
1276  // The fixed bits in the immediate s field.
1277  // If width == 64 (X reg), start at 0xFFFFFF80.
1278  // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1279  // widths won't be executed.
1280  Int imm_s_fixed = (width == 64) ? -128 : -64;
1281  Int imm_s_mask = 0x3F;
1282
1283  for (;;) {
1284    // 2. If the value is two bits wide, it can be encoded.
1285    if (width == 2) {
1286      *n = 0;
1287      *imm_s = 0x3C;
1288      *imm_r = (value & 3) - 1;
1289      return True;
1290    }
1291
1292    *n = (width == 64) ? 1 : 0;
1293    *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1294    if ((lead_zero + set_bits) == width) {
1295      *imm_r = 0;
1296    } else {
1297      *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1298    }
1299
1300    // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1301    // the bit width of the value, it can be encoded.
1302    if (lead_zero + trail_zero + set_bits == width) {
1303      return True;
1304    }
1305
1306    // 4. If the sum of leading ones, trailing ones and unset bits in the
1307    // value is equal to the bit width of the value, it can be encoded.
1308    if (lead_one + trail_one + (width - set_bits) == width) {
1309      return True;
1310    }
1311
1312    // 5. If the most-significant half of the bitwise value is equal to the
1313    // least-significant half, return to step 2 using the least-significant
1314    // half of the value.
1315    ULong mask = (1ULL << (width >> 1)) - 1;
1316    if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1317      width >>= 1;
1318      set_bits >>= 1;
1319      imm_s_fixed >>= 1;
1320      continue;
1321    }
1322
1323    // 6. Otherwise, the value can't be encoded.
1324    return False;
1325  }
1326}
1327
1328
1329/* Create a RIL for the given immediate, if it is representable, or
1330   return NULL if not. */
1331
1332static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1333{
1334   UInt n = 0, imm_s = 0, imm_r = 0;
1335   Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1336   if (!ok) return NULL;
1337   vassert(n < 2 && imm_s < 64 && imm_r < 64);
1338   return ARM64RIL_I13(n, imm_r, imm_s);
1339}
1340
1341/* So, finally .. */
1342
1343static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1344{
1345   ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1346   /* sanity checks ... */
1347   switch (ri->tag) {
1348      case ARM64riL_I13:
1349         vassert(ri->ARM64riL.I13.bitN < 2);
1350         vassert(ri->ARM64riL.I13.immR < 64);
1351         vassert(ri->ARM64riL.I13.immS < 64);
1352         return ri;
1353      case ARM64riL_R:
1354         vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1355         vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1356         return ri;
1357      default:
1358         vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1359   }
1360}
1361
1362/* DO NOT CALL THIS DIRECTLY ! */
1363static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1364{
1365   IRType ty = typeOfIRExpr(env->type_env,e);
1366   vassert(ty == Ity_I64 || ty == Ity_I32);
1367
1368   /* special case: immediate */
1369   if (e->tag == Iex_Const) {
1370      ARM64RIL* maybe = NULL;
1371      if (ty == Ity_I64) {
1372         vassert(e->Iex.Const.con->tag == Ico_U64);
1373         maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1374      } else {
1375         vassert(ty == Ity_I32);
1376         vassert(e->Iex.Const.con->tag == Ico_U32);
1377         UInt  u32 = e->Iex.Const.con->Ico.U32;
1378         ULong u64 = (ULong)u32;
1379         /* First try with 32 leading zeroes. */
1380         maybe = mb_mkARM64RIL_I(u64);
1381         /* If that doesn't work, try with 2 copies, since it doesn't
1382            matter what winds up in the upper 32 bits. */
1383         if (!maybe) {
1384            maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1385         }
1386      }
1387      if (maybe) return maybe;
1388      /* else fail, fall through to default case */
1389   }
1390
1391   /* default case: calculate into a register and return that */
1392   {
1393      HReg r = iselIntExpr_R ( env, e );
1394      return ARM64RIL_R(r);
1395   }
1396}
1397
1398
1399/* --------------------- RI6 --------------------- */
1400
1401/* Select instructions to generate 'e' into a RI6. */
1402
1403static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1404{
1405   ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1406   /* sanity checks ... */
1407   switch (ri->tag) {
1408      case ARM64ri6_I6:
1409         vassert(ri->ARM64ri6.I6.imm6 < 64);
1410         vassert(ri->ARM64ri6.I6.imm6 > 0);
1411         return ri;
1412      case ARM64ri6_R:
1413         vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1414         vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1415         return ri;
1416      default:
1417         vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1418   }
1419}
1420
1421/* DO NOT CALL THIS DIRECTLY ! */
1422static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1423{
1424   IRType ty = typeOfIRExpr(env->type_env,e);
1425   vassert(ty == Ity_I64 || ty == Ity_I8);
1426
1427   /* special case: immediate */
1428   if (e->tag == Iex_Const) {
1429      switch (e->Iex.Const.con->tag) {
1430         case Ico_U8: {
1431            UInt u = e->Iex.Const.con->Ico.U8;
1432            if (u > 0 && u < 64)
1433              return ARM64RI6_I6(u);
1434            break;
1435         default:
1436            break;
1437         }
1438      }
1439      /* else fail, fall through to default case */
1440   }
1441
1442   /* default case: calculate into a register and return that */
1443   {
1444      HReg r = iselIntExpr_R ( env, e );
1445      return ARM64RI6_R(r);
1446   }
1447}
1448
1449
1450/* ------------------- CondCode ------------------- */
1451
1452/* Generate code to evaluated a bit-typed expression, returning the
1453   condition code which would correspond when the expression would
1454   notionally have returned 1. */
1455
1456static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1457{
1458   ARM64CondCode cc = iselCondCode_wrk(env,e);
1459   vassert(cc != ARM64cc_NV);
1460   return cc;
1461}
1462
1463static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1464{
1465   vassert(e);
1466   vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1467
1468   /* var */
1469   if (e->tag == Iex_RdTmp) {
1470      HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1471      /* Cmp doesn't modify rTmp; so this is OK. */
1472      ARM64RIL* one = mb_mkARM64RIL_I(1);
1473      vassert(one);
1474      addInstr(env, ARM64Instr_Test(rTmp, one));
1475      return ARM64cc_NE;
1476   }
1477
1478   /* Not1(e) */
1479   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1480      /* Generate code for the arg, and negate the test condition */
1481      ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1482      if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1483        return ARM64cc_AL;
1484      } else {
1485        return 1 ^ cc;
1486      }
1487   }
1488
1489   /* --- patterns rooted at: 64to1 --- */
1490
1491   if (e->tag == Iex_Unop
1492       && e->Iex.Unop.op == Iop_64to1) {
1493      HReg      rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1494      ARM64RIL* one  = mb_mkARM64RIL_I(1);
1495      vassert(one); /* '1' must be representable */
1496      addInstr(env, ARM64Instr_Test(rTmp, one));
1497      return ARM64cc_NE;
1498   }
1499
1500   /* --- patterns rooted at: CmpNEZ8 --- */
1501
1502   if (e->tag == Iex_Unop
1503       && e->Iex.Unop.op == Iop_CmpNEZ8) {
1504      HReg      r1  = iselIntExpr_R(env, e->Iex.Unop.arg);
1505      ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1506      addInstr(env, ARM64Instr_Test(r1, xFF));
1507      return ARM64cc_NE;
1508   }
1509
1510   /* --- patterns rooted at: CmpNEZ64 --- */
1511
1512   if (e->tag == Iex_Unop
1513       && e->Iex.Unop.op == Iop_CmpNEZ64) {
1514      HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1515      ARM64RIA* zero = ARM64RIA_I12(0,0);
1516      addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1517      return ARM64cc_NE;
1518   }
1519
1520   /* --- patterns rooted at: CmpNEZ32 --- */
1521
1522   if (e->tag == Iex_Unop
1523       && e->Iex.Unop.op == Iop_CmpNEZ32) {
1524      HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1525      ARM64RIA* zero = ARM64RIA_I12(0,0);
1526      addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1527      return ARM64cc_NE;
1528   }
1529
1530   /* --- Cmp*64*(x,y) --- */
1531   if (e->tag == Iex_Binop
1532       && (e->Iex.Binop.op == Iop_CmpEQ64
1533           || e->Iex.Binop.op == Iop_CmpNE64
1534           || e->Iex.Binop.op == Iop_CmpLT64S
1535           || e->Iex.Binop.op == Iop_CmpLT64U
1536           || e->Iex.Binop.op == Iop_CmpLE64S
1537           || e->Iex.Binop.op == Iop_CmpLE64U)) {
1538      HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1539      ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1540      addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1541      switch (e->Iex.Binop.op) {
1542         case Iop_CmpEQ64:  return ARM64cc_EQ;
1543         case Iop_CmpNE64:  return ARM64cc_NE;
1544         case Iop_CmpLT64S: return ARM64cc_LT;
1545         case Iop_CmpLT64U: return ARM64cc_CC;
1546         case Iop_CmpLE64S: return ARM64cc_LE;
1547         case Iop_CmpLE64U: return ARM64cc_LS;
1548         default: vpanic("iselCondCode(arm64): CmpXX64");
1549      }
1550   }
1551
1552   /* --- Cmp*32*(x,y) --- */
1553   if (e->tag == Iex_Binop
1554       && (e->Iex.Binop.op == Iop_CmpEQ32
1555           || e->Iex.Binop.op == Iop_CmpNE32
1556           || e->Iex.Binop.op == Iop_CmpLT32S
1557           || e->Iex.Binop.op == Iop_CmpLT32U
1558           || e->Iex.Binop.op == Iop_CmpLE32S
1559           || e->Iex.Binop.op == Iop_CmpLE32U)) {
1560      HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1561      ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1562      addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1563      switch (e->Iex.Binop.op) {
1564         case Iop_CmpEQ32:  return ARM64cc_EQ;
1565         case Iop_CmpNE32:  return ARM64cc_NE;
1566         case Iop_CmpLT32S: return ARM64cc_LT;
1567         case Iop_CmpLT32U: return ARM64cc_CC;
1568         case Iop_CmpLE32S: return ARM64cc_LE;
1569         case Iop_CmpLE32U: return ARM64cc_LS;
1570         default: vpanic("iselCondCode(arm64): CmpXX32");
1571      }
1572   }
1573
1574//ZZ    /* const */
1575//ZZ    /* Constant 1:Bit */
1576//ZZ    if (e->tag == Iex_Const) {
1577//ZZ       HReg r;
1578//ZZ       vassert(e->Iex.Const.con->tag == Ico_U1);
1579//ZZ       vassert(e->Iex.Const.con->Ico.U1 == True
1580//ZZ               || e->Iex.Const.con->Ico.U1 == False);
1581//ZZ       r = newVRegI(env);
1582//ZZ       addInstr(env, ARMInstr_Imm32(r, 0));
1583//ZZ       addInstr(env, ARMInstr_CmpOrTst(True/*isCmp*/, r, ARMRI84_R(r)));
1584//ZZ       return e->Iex.Const.con->Ico.U1 ? ARMcc_EQ : ARMcc_NE;
1585//ZZ    }
1586//ZZ
1587//ZZ    // JRS 2013-Jan-03: this seems completely nonsensical
1588//ZZ    /* --- CasCmpEQ* --- */
1589//ZZ    /* Ist_Cas has a dummy argument to compare with, so comparison is
1590//ZZ       always true. */
1591//ZZ    //if (e->tag == Iex_Binop
1592//ZZ    //    && (e->Iex.Binop.op == Iop_CasCmpEQ32
1593//ZZ    //        || e->Iex.Binop.op == Iop_CasCmpEQ16
1594//ZZ    //        || e->Iex.Binop.op == Iop_CasCmpEQ8)) {
1595//ZZ    //   return ARMcc_AL;
1596//ZZ    //}
1597
1598   ppIRExpr(e);
1599   vpanic("iselCondCode");
1600}
1601
1602
1603/* --------------------- Reg --------------------- */
1604
1605static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1606{
1607   HReg r = iselIntExpr_R_wrk(env, e);
1608   /* sanity checks ... */
1609#  if 0
1610   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1611#  endif
1612   vassert(hregClass(r) == HRcInt64);
1613   vassert(hregIsVirtual(r));
1614   return r;
1615}
1616
1617/* DO NOT CALL THIS DIRECTLY ! */
1618static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1619{
1620   IRType ty = typeOfIRExpr(env->type_env,e);
1621   vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1622
1623   switch (e->tag) {
1624
1625   /* --------- TEMP --------- */
1626   case Iex_RdTmp: {
1627      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1628   }
1629
1630   /* --------- LOAD --------- */
1631   case Iex_Load: {
1632      HReg dst  = newVRegI(env);
1633
1634      if (e->Iex.Load.end != Iend_LE)
1635         goto irreducible;
1636
1637      if (ty == Ity_I64) {
1638         ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1639         addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1640         return dst;
1641      }
1642      if (ty == Ity_I32) {
1643         ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1644         addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1645         return dst;
1646      }
1647      if (ty == Ity_I16) {
1648         ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1649         addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1650         return dst;
1651      }
1652      if (ty == Ity_I8) {
1653         ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1654         addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1655         return dst;
1656      }
1657      break;
1658   }
1659
1660   /* --------- BINARY OP --------- */
1661   case Iex_Binop: {
1662
1663      ARM64LogicOp lop = 0; /* invalid */
1664      ARM64ShiftOp sop = 0; /* invalid */
1665
1666      /* Special-case 0-x into a Neg instruction.  Not because it's
1667         particularly useful but more so as to give value flow using
1668         this instruction, so as to check its assembly correctness for
1669         implementation of Left32/Left64. */
1670      switch (e->Iex.Binop.op) {
1671         case Iop_Sub64:
1672            if (isZeroU64(e->Iex.Binop.arg1)) {
1673               HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1674               HReg dst  = newVRegI(env);
1675               addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1676               return dst;
1677            }
1678            break;
1679         default:
1680            break;
1681      }
1682
1683      /* ADD/SUB */
1684      switch (e->Iex.Binop.op) {
1685         case Iop_Add64: case Iop_Add32:
1686         case Iop_Sub64: case Iop_Sub32: {
1687            Bool      isAdd = e->Iex.Binop.op == Iop_Add64
1688                              || e->Iex.Binop.op == Iop_Add32;
1689            HReg      dst   = newVRegI(env);
1690            HReg      argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1691            ARM64RIA* argR  = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1692            addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1693            return dst;
1694         }
1695         default:
1696            break;
1697      }
1698
1699      /* AND/OR/XOR */
1700      switch (e->Iex.Binop.op) {
1701         case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1702         case Iop_Or64:  case Iop_Or32:  lop = ARM64lo_OR;  goto log_binop;
1703         case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1704         log_binop: {
1705            HReg      dst  = newVRegI(env);
1706            HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1707            ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1708            addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1709            return dst;
1710         }
1711         default:
1712            break;
1713      }
1714
1715      /* SHL/SHR/SAR */
1716      switch (e->Iex.Binop.op) {
1717         case Iop_Shr64:                 sop = ARM64sh_SHR; goto sh_binop;
1718         case Iop_Sar64:                 sop = ARM64sh_SAR; goto sh_binop;
1719         case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1720         sh_binop: {
1721            HReg      dst  = newVRegI(env);
1722            HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1723            ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1724            addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1725            return dst;
1726         }
1727         case Iop_Shr32:
1728         case Iop_Sar32: {
1729            Bool      zx   = e->Iex.Binop.op == Iop_Shr32;
1730            HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1731            ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1732            HReg      dst  = zx ? widen_z_32_to_64(env, argL)
1733                                : widen_s_32_to_64(env, argL);
1734            addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1735            return dst;
1736         }
1737         default: break;
1738      }
1739
1740      /* MUL */
1741      if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1742         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1743         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1744         HReg dst  = newVRegI(env);
1745         addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1746         return dst;
1747      }
1748
1749      /* MULL */
1750      if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1751         Bool isS  = e->Iex.Binop.op == Iop_MullS32;
1752         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1753         HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1754         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1755         HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1756         HReg dst  = newVRegI(env);
1757         addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1758         return dst;
1759      }
1760
1761      /* Handle misc other ops. */
1762
1763      if (e->Iex.Binop.op == Iop_Max32U) {
1764         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1765         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1766         HReg dst  = newVRegI(env);
1767         addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1768         addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1769         return dst;
1770      }
1771
1772      if (e->Iex.Binop.op == Iop_32HLto64) {
1773         HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1774         HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1775         HReg lo32  = widen_z_32_to_64(env, lo32s);
1776         HReg hi32  = newVRegI(env);
1777         addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1778                                        ARM64sh_SHL));
1779         addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1780                                        ARM64lo_OR));
1781         return hi32;
1782      }
1783
1784      if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1785         Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1786         HReg dL  = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1787         HReg dR  = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1788         HReg dst = newVRegI(env);
1789         HReg imm = newVRegI(env);
1790         /* Do the compare (FCMP), which sets NZCV in PSTATE.  Then
1791            create in dst, the IRCmpF64Result encoded result. */
1792         addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1793         addInstr(env, ARM64Instr_Imm64(dst, 0));
1794         addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1795         addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1796         addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1797         addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1798         addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1799         addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1800         addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1801         addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1802         return dst;
1803      }
1804
1805      { /* local scope */
1806        ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1807        Bool       srcIsD = False;
1808        switch (e->Iex.Binop.op) {
1809           case Iop_F64toI64S:
1810              cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1811           case Iop_F64toI64U:
1812              cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1813           case Iop_F64toI32S:
1814              cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1815           case Iop_F64toI32U:
1816              cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1817           case Iop_F32toI32S:
1818              cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1819           case Iop_F32toI32U:
1820              cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1821           case Iop_F32toI64S:
1822              cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1823           case Iop_F32toI64U:
1824              cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1825           default:
1826              break;
1827        }
1828        if (cvt_op != ARM64cvt_INVALID) {
1829           /* This is all a bit dodgy, because we can't handle a
1830              non-constant (not-known-at-JIT-time) rounding mode
1831              indication.  That's because there's no instruction
1832              AFAICS that does this conversion but rounds according to
1833              FPCR.RM, so we have to bake the rounding mode into the
1834              instruction right now.  But that should be OK because
1835              (1) the front end attaches a literal Irrm_ value to the
1836              conversion binop, and (2) iropt will never float that
1837              off via CSE, into a literal.  Hence we should always
1838              have an Irrm_ value as the first arg. */
1839           IRExpr* arg1 = e->Iex.Binop.arg1;
1840           if (arg1->tag != Iex_Const) goto irreducible;
1841           IRConst* arg1con = arg1->Iex.Const.con;
1842           vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1843           UInt irrm = arg1con->Ico.U32;
1844           /* Find the ARM-encoded equivalent for |irrm|. */
1845           UInt armrm = 4; /* impossible */
1846           switch (irrm) {
1847              case Irrm_NEAREST: armrm = 0; break;
1848              case Irrm_NegINF:  armrm = 2; break;
1849              case Irrm_PosINF:  armrm = 1; break;
1850              case Irrm_ZERO:    armrm = 3; break;
1851              default: goto irreducible;
1852           }
1853           HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1854                         (env, e->Iex.Binop.arg2);
1855           HReg dst = newVRegI(env);
1856           addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1857           return dst;
1858        }
1859      } /* local scope */
1860
1861//ZZ       if (e->Iex.Binop.op == Iop_GetElem8x8
1862//ZZ           || e->Iex.Binop.op == Iop_GetElem16x4
1863//ZZ           || e->Iex.Binop.op == Iop_GetElem32x2) {
1864//ZZ          HReg res = newVRegI(env);
1865//ZZ          HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
1866//ZZ          UInt index, size;
1867//ZZ          if (e->Iex.Binop.arg2->tag != Iex_Const ||
1868//ZZ              typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1869//ZZ             vpanic("ARM target supports GetElem with constant "
1870//ZZ                    "second argument only\n");
1871//ZZ          }
1872//ZZ          index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1873//ZZ          switch (e->Iex.Binop.op) {
1874//ZZ             case Iop_GetElem8x8: vassert(index < 8); size = 0; break;
1875//ZZ             case Iop_GetElem16x4: vassert(index < 4); size = 1; break;
1876//ZZ             case Iop_GetElem32x2: vassert(index < 2); size = 2; break;
1877//ZZ             default: vassert(0);
1878//ZZ          }
1879//ZZ          addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1880//ZZ                                         mkARMNRS(ARMNRS_Reg, res, 0),
1881//ZZ                                         mkARMNRS(ARMNRS_Scalar, arg, index),
1882//ZZ                                         size, False));
1883//ZZ          return res;
1884//ZZ       }
1885//ZZ
1886//ZZ       if (e->Iex.Binop.op == Iop_GetElem8x16
1887//ZZ           || e->Iex.Binop.op == Iop_GetElem16x8
1888//ZZ           || e->Iex.Binop.op == Iop_GetElem32x4) {
1889//ZZ          HReg res = newVRegI(env);
1890//ZZ          HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
1891//ZZ          UInt index, size;
1892//ZZ          if (e->Iex.Binop.arg2->tag != Iex_Const ||
1893//ZZ              typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
1894//ZZ             vpanic("ARM target supports GetElem with constant "
1895//ZZ                    "second argument only\n");
1896//ZZ          }
1897//ZZ          index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
1898//ZZ          switch (e->Iex.Binop.op) {
1899//ZZ             case Iop_GetElem8x16: vassert(index < 16); size = 0; break;
1900//ZZ             case Iop_GetElem16x8: vassert(index < 8); size = 1; break;
1901//ZZ             case Iop_GetElem32x4: vassert(index < 4); size = 2; break;
1902//ZZ             default: vassert(0);
1903//ZZ          }
1904//ZZ          addInstr(env, ARMInstr_NUnaryS(ARMneon_GETELEMS,
1905//ZZ                                         mkARMNRS(ARMNRS_Reg, res, 0),
1906//ZZ                                         mkARMNRS(ARMNRS_Scalar, arg, index),
1907//ZZ                                         size, True));
1908//ZZ          return res;
1909//ZZ       }
1910
1911      /* All cases involving host-side helper calls. */
1912      void* fn = NULL;
1913      switch (e->Iex.Binop.op) {
1914//ZZ          case Iop_Add16x2:
1915//ZZ             fn = &h_generic_calc_Add16x2; break;
1916//ZZ          case Iop_Sub16x2:
1917//ZZ             fn = &h_generic_calc_Sub16x2; break;
1918//ZZ          case Iop_HAdd16Ux2:
1919//ZZ             fn = &h_generic_calc_HAdd16Ux2; break;
1920//ZZ          case Iop_HAdd16Sx2:
1921//ZZ             fn = &h_generic_calc_HAdd16Sx2; break;
1922//ZZ          case Iop_HSub16Ux2:
1923//ZZ             fn = &h_generic_calc_HSub16Ux2; break;
1924//ZZ          case Iop_HSub16Sx2:
1925//ZZ             fn = &h_generic_calc_HSub16Sx2; break;
1926//ZZ          case Iop_QAdd16Sx2:
1927//ZZ             fn = &h_generic_calc_QAdd16Sx2; break;
1928//ZZ          case Iop_QAdd16Ux2:
1929//ZZ             fn = &h_generic_calc_QAdd16Ux2; break;
1930//ZZ          case Iop_QSub16Sx2:
1931//ZZ             fn = &h_generic_calc_QSub16Sx2; break;
1932//ZZ          case Iop_Add8x4:
1933//ZZ             fn = &h_generic_calc_Add8x4; break;
1934//ZZ          case Iop_Sub8x4:
1935//ZZ             fn = &h_generic_calc_Sub8x4; break;
1936//ZZ          case Iop_HAdd8Ux4:
1937//ZZ             fn = &h_generic_calc_HAdd8Ux4; break;
1938//ZZ          case Iop_HAdd8Sx4:
1939//ZZ             fn = &h_generic_calc_HAdd8Sx4; break;
1940//ZZ          case Iop_HSub8Ux4:
1941//ZZ             fn = &h_generic_calc_HSub8Ux4; break;
1942//ZZ          case Iop_HSub8Sx4:
1943//ZZ             fn = &h_generic_calc_HSub8Sx4; break;
1944//ZZ          case Iop_QAdd8Sx4:
1945//ZZ             fn = &h_generic_calc_QAdd8Sx4; break;
1946//ZZ          case Iop_QAdd8Ux4:
1947//ZZ             fn = &h_generic_calc_QAdd8Ux4; break;
1948//ZZ          case Iop_QSub8Sx4:
1949//ZZ             fn = &h_generic_calc_QSub8Sx4; break;
1950//ZZ          case Iop_QSub8Ux4:
1951//ZZ             fn = &h_generic_calc_QSub8Ux4; break;
1952//ZZ          case Iop_Sad8Ux4:
1953//ZZ             fn = &h_generic_calc_Sad8Ux4; break;
1954//ZZ          case Iop_QAdd32S:
1955//ZZ             fn = &h_generic_calc_QAdd32S; break;
1956//ZZ          case Iop_QSub32S:
1957//ZZ             fn = &h_generic_calc_QSub32S; break;
1958//ZZ          case Iop_QSub16Ux2:
1959//ZZ             fn = &h_generic_calc_QSub16Ux2; break;
1960         case Iop_DivU32:
1961            fn = &h_calc_udiv32_w_arm_semantics; break;
1962         case Iop_DivS32:
1963            fn = &h_calc_sdiv32_w_arm_semantics; break;
1964         case Iop_DivU64:
1965            fn = &h_calc_udiv64_w_arm_semantics; break;
1966         case Iop_DivS64:
1967            fn = &h_calc_sdiv64_w_arm_semantics; break;
1968         default:
1969            break;
1970      }
1971
1972      if (fn) {
1973         HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1974         HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1975         HReg res  = newVRegI(env);
1976         addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1977         addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1978         addInstr(env, ARM64Instr_Call( ARM64cc_AL, (HWord)Ptr_to_ULong(fn),
1979                                        2, mk_RetLoc_simple(RLPri_Int) ));
1980         addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1981         return res;
1982      }
1983
1984      break;
1985   }
1986
1987   /* --------- UNARY OP --------- */
1988   case Iex_Unop: {
1989
1990      switch (e->Iex.Unop.op) {
1991         case Iop_16Uto64: {
1992            /* This probably doesn't occur often enough to be worth
1993               rolling the extension into the load. */
1994            IRExpr* arg = e->Iex.Unop.arg;
1995            HReg    src = iselIntExpr_R(env, arg);
1996            HReg    dst = widen_z_16_to_64(env, src);
1997            return dst;
1998         }
1999         case Iop_32Uto64: {
2000            IRExpr* arg = e->Iex.Unop.arg;
2001            if (arg->tag == Iex_Load) {
2002               /* This correctly zero extends because _LdSt32 is
2003                  defined to do a zero extending load. */
2004               HReg dst = newVRegI(env);
2005               ARM64AMode* am
2006                  = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
2007               addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2008               return dst;
2009            }
2010            /* else be lame and mask it  */
2011            HReg src  = iselIntExpr_R(env, arg);
2012            HReg dst  = widen_z_32_to_64(env, src);
2013            return dst;
2014         }
2015         case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
2016         case Iop_8Uto64: {
2017            IRExpr* arg = e->Iex.Unop.arg;
2018            if (arg->tag == Iex_Load) {
2019               /* This correctly zero extends because _LdSt8 is
2020                  defined to do a zero extending load. */
2021               HReg dst = newVRegI(env);
2022               ARM64AMode* am
2023                  = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
2024               addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2025               return dst;
2026            }
2027            /* else be lame and mask it  */
2028            HReg src = iselIntExpr_R(env, arg);
2029            HReg dst = widen_z_8_to_64(env, src);
2030            return dst;
2031         }
2032         case Iop_128HIto64: {
2033            HReg rHi, rLo;
2034            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2035            return rHi; /* and abandon rLo */
2036         }
2037         case Iop_8Sto32: case Iop_8Sto64: {
2038            IRExpr* arg = e->Iex.Unop.arg;
2039            HReg    src = iselIntExpr_R(env, arg);
2040            HReg    dst = widen_s_8_to_64(env, src);
2041            return dst;
2042         }
2043         case Iop_16Sto32: case Iop_16Sto64: {
2044            IRExpr* arg = e->Iex.Unop.arg;
2045            HReg    src = iselIntExpr_R(env, arg);
2046            HReg    dst = widen_s_16_to_64(env, src);
2047            return dst;
2048         }
2049         case Iop_32Sto64: {
2050            IRExpr* arg = e->Iex.Unop.arg;
2051            HReg    src = iselIntExpr_R(env, arg);
2052            HReg    dst = widen_s_32_to_64(env, src);
2053            return dst;
2054         }
2055         case Iop_Not32:
2056         case Iop_Not64: {
2057            HReg dst = newVRegI(env);
2058            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2059            addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
2060            return dst;
2061         }
2062         case Iop_Clz64: {
2063            HReg dst = newVRegI(env);
2064            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2065            addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
2066            return dst;
2067         }
2068         case Iop_Left32:
2069         case Iop_Left64: {
2070            /* Left64(src) = src | -src.  Left32 can use the same
2071               implementation since in that case we don't care what
2072               the upper 32 bits become. */
2073            HReg dst = newVRegI(env);
2074            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2075            addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2076            addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2077                                           ARM64lo_OR));
2078            return dst;
2079         }
2080         case Iop_CmpwNEZ64: {
2081           /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
2082                             = Left64(src) >>s 63 */
2083            HReg dst = newVRegI(env);
2084            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2085            addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2086            addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2087                                           ARM64lo_OR));
2088            addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2089                                           ARM64sh_SAR));
2090            return dst;
2091         }
2092         case Iop_CmpwNEZ32: {
2093            /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
2094                              = Left64(src & 0xFFFFFFFF) >>s 63 */
2095            HReg dst = newVRegI(env);
2096            HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
2097            HReg src = widen_z_32_to_64(env, pre);
2098            addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
2099            addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
2100                                           ARM64lo_OR));
2101            addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2102                                           ARM64sh_SAR));
2103            return dst;
2104         }
2105         case Iop_V128to64: case Iop_V128HIto64: {
2106            HReg dst    = newVRegI(env);
2107            HReg src    = iselV128Expr(env, e->Iex.Unop.arg);
2108            UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
2109            addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
2110            return dst;
2111         }
2112         case Iop_1Sto32:
2113         case Iop_1Sto64: {
2114            /* As with the iselStmt case for 'tmp:I1 = expr', we could
2115               do a lot better here if it ever became necessary. */
2116            HReg zero = newVRegI(env);
2117            HReg one  = newVRegI(env);
2118            HReg dst  = newVRegI(env);
2119            addInstr(env, ARM64Instr_Imm64(zero, 0));
2120            addInstr(env, ARM64Instr_Imm64(one,  1));
2121            ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
2122            addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2123            addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2124                                           ARM64sh_SHL));
2125            addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
2126                                           ARM64sh_SAR));
2127            return dst;
2128         }
2129         case Iop_NarrowUn16to8x8:
2130         case Iop_NarrowUn32to16x4:
2131         case Iop_NarrowUn64to32x2: {
2132            HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2133            HReg tmp = newVRegV(env);
2134            HReg dst = newVRegI(env);
2135            UInt dszBlg2 = 3; /* illegal */
2136            switch (e->Iex.Unop.op) {
2137               case Iop_NarrowUn16to8x8:  dszBlg2 = 0; break; // 16to8_x8
2138               case Iop_NarrowUn32to16x4: dszBlg2 = 1; break; // 32to16_x4
2139               case Iop_NarrowUn64to32x2: dszBlg2 = 2; break; // 64to32_x2
2140               default: vassert(0);
2141            }
2142            addInstr(env, ARM64Instr_VNarrowV(dszBlg2, tmp, src));
2143            addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
2144            return dst;
2145         }
2146//ZZ          case Iop_64HIto32: {
2147//ZZ             HReg rHi, rLo;
2148//ZZ             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2149//ZZ             return rHi; /* and abandon rLo .. poor wee thing :-) */
2150//ZZ          }
2151//ZZ          case Iop_64to32: {
2152//ZZ             HReg rHi, rLo;
2153//ZZ             iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2154//ZZ             return rLo; /* similar stupid comment to the above ... */
2155//ZZ          }
2156//ZZ          case Iop_64to8: {
2157//ZZ             HReg rHi, rLo;
2158//ZZ             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2159//ZZ                HReg tHi = newVRegI(env);
2160//ZZ                HReg tLo = newVRegI(env);
2161//ZZ                HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
2162//ZZ                addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2163//ZZ                rHi = tHi;
2164//ZZ                rLo = tLo;
2165//ZZ             } else {
2166//ZZ                iselInt64Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
2167//ZZ             }
2168//ZZ             return rLo;
2169//ZZ          }
2170
2171         case Iop_1Uto64: {
2172            /* 1Uto64(tmp). */
2173            HReg dst = newVRegI(env);
2174            if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
2175               ARM64RIL* one = mb_mkARM64RIL_I(1);
2176               HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
2177               vassert(one);
2178               addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
2179            } else {
2180               /* CLONE-01 */
2181               HReg zero = newVRegI(env);
2182               HReg one  = newVRegI(env);
2183               addInstr(env, ARM64Instr_Imm64(zero, 0));
2184               addInstr(env, ARM64Instr_Imm64(one,  1));
2185               ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
2186               addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
2187            }
2188            return dst;
2189         }
2190//ZZ          case Iop_1Uto8: {
2191//ZZ             HReg        dst  = newVRegI(env);
2192//ZZ             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2193//ZZ             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2194//ZZ             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2195//ZZ             return dst;
2196//ZZ          }
2197//ZZ
2198//ZZ          case Iop_1Sto32: {
2199//ZZ             HReg        dst  = newVRegI(env);
2200//ZZ             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2201//ZZ             ARMRI5*     amt  = ARMRI5_I5(31);
2202//ZZ             /* This is really rough.  We could do much better here;
2203//ZZ                perhaps mvn{cond} dst, #0 as the second insn?
2204//ZZ                (same applies to 1Sto64) */
2205//ZZ             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2206//ZZ             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2207//ZZ             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2208//ZZ             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2209//ZZ             return dst;
2210//ZZ          }
2211//ZZ
2212//ZZ          case Iop_Clz32: {
2213//ZZ             /* Count leading zeroes; easy on ARM. */
2214//ZZ             HReg dst = newVRegI(env);
2215//ZZ             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2216//ZZ             addInstr(env, ARMInstr_Unary(ARMun_CLZ, dst, src));
2217//ZZ             return dst;
2218//ZZ          }
2219//ZZ
2220//ZZ          case Iop_CmpwNEZ32: {
2221//ZZ             HReg dst = newVRegI(env);
2222//ZZ             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
2223//ZZ             addInstr(env, ARMInstr_Unary(ARMun_NEG, dst, src));
2224//ZZ             addInstr(env, ARMInstr_Alu(ARMalu_OR, dst, dst, ARMRI84_R(src)));
2225//ZZ             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, ARMRI5_I5(31)));
2226//ZZ             return dst;
2227//ZZ          }
2228//ZZ
2229//ZZ          case Iop_ReinterpF32asI32: {
2230//ZZ             HReg dst = newVRegI(env);
2231//ZZ             HReg src = iselFltExpr(env, e->Iex.Unop.arg);
2232//ZZ             addInstr(env, ARMInstr_VXferS(False/*!toS*/, src, dst));
2233//ZZ             return dst;
2234//ZZ          }
2235
2236         case Iop_64to32:
2237         case Iop_64to16:
2238         case Iop_64to8:
2239            /* These are no-ops. */
2240            return iselIntExpr_R(env, e->Iex.Unop.arg);
2241
2242         default:
2243            break;
2244      }
2245
2246//ZZ       /* All Unop cases involving host-side helper calls. */
2247//ZZ       void* fn = NULL;
2248//ZZ       switch (e->Iex.Unop.op) {
2249//ZZ          case Iop_CmpNEZ16x2:
2250//ZZ             fn = &h_generic_calc_CmpNEZ16x2; break;
2251//ZZ          case Iop_CmpNEZ8x4:
2252//ZZ             fn = &h_generic_calc_CmpNEZ8x4; break;
2253//ZZ          default:
2254//ZZ             break;
2255//ZZ       }
2256//ZZ
2257//ZZ       if (fn) {
2258//ZZ          HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2259//ZZ          HReg res = newVRegI(env);
2260//ZZ          addInstr(env, mk_iMOVds_RR(hregARM_R0(), arg));
2261//ZZ          addInstr(env, ARMInstr_Call( ARMcc_AL, (HWord)Ptr_to_ULong(fn),
2262//ZZ                                       1, RetLocInt ));
2263//ZZ          addInstr(env, mk_iMOVds_RR(res, hregARM_R0()));
2264//ZZ          return res;
2265//ZZ       }
2266
2267      break;
2268   }
2269
2270   /* --------- GET --------- */
2271   case Iex_Get: {
2272      if (ty == Ity_I64
2273          && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
2274         HReg        dst = newVRegI(env);
2275         ARM64AMode* am
2276            = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
2277         addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
2278         return dst;
2279      }
2280      if (ty == Ity_I32
2281          && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
2282         HReg        dst = newVRegI(env);
2283         ARM64AMode* am
2284            = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
2285         addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
2286         return dst;
2287      }
2288      if (ty == Ity_I16
2289          && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
2290         HReg        dst = newVRegI(env);
2291         ARM64AMode* am
2292            = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
2293         addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
2294         return dst;
2295      }
2296      if (ty == Ity_I8
2297          /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
2298         HReg        dst = newVRegI(env);
2299         ARM64AMode* am
2300            = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
2301         addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
2302         return dst;
2303      }
2304      break;
2305   }
2306
2307   /* --------- CCALL --------- */
2308   case Iex_CCall: {
2309      HReg    dst = newVRegI(env);
2310      vassert(ty == e->Iex.CCall.retty);
2311
2312      /* be very restrictive for now.  Only 64-bit ints allowed for
2313         args, and 64 bits for return type.  Don't forget to change
2314         the RetLoc if more types are allowed in future. */
2315      if (e->Iex.CCall.retty != Ity_I64)
2316         goto irreducible;
2317
2318      /* Marshal args, do the call, clear stack. */
2319      UInt   addToSp = 0;
2320      RetLoc rloc    = mk_RetLoc_INVALID();
2321      Bool   ok      = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2322                                     e->Iex.CCall.cee, e->Iex.CCall.retty,
2323                                     e->Iex.CCall.args );
2324      /* */
2325      if (ok) {
2326         vassert(is_sane_RetLoc(rloc));
2327         vassert(rloc.pri == RLPri_Int);
2328         vassert(addToSp == 0);
2329         addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2330         return dst;
2331      }
2332      /* else fall through; will hit the irreducible: label */
2333   }
2334
2335   /* --------- LITERAL --------- */
2336   /* 64-bit literals */
2337   case Iex_Const: {
2338      ULong u   = 0;
2339      HReg  dst = newVRegI(env);
2340      switch (e->Iex.Const.con->tag) {
2341         case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2342         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2343         case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2344         case Ico_U8:  u = e->Iex.Const.con->Ico.U8;  break;
2345         default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2346      }
2347      addInstr(env, ARM64Instr_Imm64(dst, u));
2348      return dst;
2349   }
2350
2351   /* --------- MULTIPLEX --------- */
2352   case Iex_ITE: {
2353      /* ITE(ccexpr, iftrue, iffalse) */
2354      if (ty == Ity_I64 || ty == Ity_I32) {
2355         ARM64CondCode cc;
2356         HReg r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2357         HReg r0  = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2358         HReg dst = newVRegI(env);
2359         cc = iselCondCode(env, e->Iex.ITE.cond);
2360         addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2361         return dst;
2362      }
2363      break;
2364   }
2365
2366   default:
2367   break;
2368   } /* switch (e->tag) */
2369
2370   /* We get here if no pattern matched. */
2371  irreducible:
2372   ppIRExpr(e);
2373   vpanic("iselIntExpr_R: cannot reduce tree");
2374}
2375
2376
2377/*---------------------------------------------------------*/
2378/*--- ISEL: Integer expressions (128 bit)               ---*/
2379/*---------------------------------------------------------*/
2380
2381/* Compute a 128-bit value into a register pair, which is returned as
2382   the first two parameters.  As with iselIntExpr_R, these may be
2383   either real or virtual regs; in any case they must not be changed
2384   by subsequent code emitted by the caller.  */
2385
2386static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2387                             ISelEnv* env, IRExpr* e )
2388{
2389   iselInt128Expr_wrk(rHi, rLo, env, e);
2390#  if 0
2391   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2392#  endif
2393   vassert(hregClass(*rHi) == HRcInt64);
2394   vassert(hregIsVirtual(*rHi));
2395   vassert(hregClass(*rLo) == HRcInt64);
2396   vassert(hregIsVirtual(*rLo));
2397}
2398
2399/* DO NOT CALL THIS DIRECTLY ! */
2400static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2401                                 ISelEnv* env, IRExpr* e )
2402{
2403   vassert(e);
2404   vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2405
2406   /* --------- BINARY ops --------- */
2407   if (e->tag == Iex_Binop) {
2408      switch (e->Iex.Binop.op) {
2409         /* 64 x 64 -> 128 multiply */
2410         case Iop_MullU64:
2411         case Iop_MullS64: {
2412            Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2413            HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
2414            HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
2415            HReg dstLo = newVRegI(env);
2416            HReg dstHi = newVRegI(env);
2417            addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2418                                         ARM64mul_PLAIN));
2419            addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2420                                         syned ? ARM64mul_SX : ARM64mul_ZX));
2421            *rHi = dstHi;
2422            *rLo = dstLo;
2423            return;
2424         }
2425         /* 64HLto128(e1,e2) */
2426         case Iop_64HLto128:
2427            *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2428            *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2429            return;
2430         default:
2431            break;
2432      }
2433   } /* if (e->tag == Iex_Binop) */
2434
2435   ppIRExpr(e);
2436   vpanic("iselInt128Expr(arm64)");
2437}
2438
2439
2440//ZZ /* -------------------- 64-bit -------------------- */
2441//ZZ
2442//ZZ /* Compute a 64-bit value into a register pair, which is returned as
2443//ZZ    the first two parameters.  As with iselIntExpr_R, these may be
2444//ZZ    either real or virtual regs; in any case they must not be changed
2445//ZZ    by subsequent code emitted by the caller.  */
2446//ZZ
2447//ZZ static void iselInt64Expr ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2448//ZZ {
2449//ZZ    iselInt64Expr_wrk(rHi, rLo, env, e);
2450//ZZ #  if 0
2451//ZZ    vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2452//ZZ #  endif
2453//ZZ    vassert(hregClass(*rHi) == HRcInt32);
2454//ZZ    vassert(hregIsVirtual(*rHi));
2455//ZZ    vassert(hregClass(*rLo) == HRcInt32);
2456//ZZ    vassert(hregIsVirtual(*rLo));
2457//ZZ }
2458//ZZ
2459//ZZ /* DO NOT CALL THIS DIRECTLY ! */
2460//ZZ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
2461//ZZ {
2462//ZZ    vassert(e);
2463//ZZ    vassert(typeOfIRExpr(env->type_env,e) == Ity_I64);
2464//ZZ
2465//ZZ    /* 64-bit literal */
2466//ZZ    if (e->tag == Iex_Const) {
2467//ZZ       ULong   w64 = e->Iex.Const.con->Ico.U64;
2468//ZZ       UInt    wHi = toUInt(w64 >> 32);
2469//ZZ       UInt    wLo = toUInt(w64);
2470//ZZ       HReg    tHi = newVRegI(env);
2471//ZZ       HReg    tLo = newVRegI(env);
2472//ZZ       vassert(e->Iex.Const.con->tag == Ico_U64);
2473//ZZ       addInstr(env, ARMInstr_Imm32(tHi, wHi));
2474//ZZ       addInstr(env, ARMInstr_Imm32(tLo, wLo));
2475//ZZ       *rHi = tHi;
2476//ZZ       *rLo = tLo;
2477//ZZ       return;
2478//ZZ    }
2479//ZZ
2480//ZZ    /* read 64-bit IRTemp */
2481//ZZ    if (e->tag == Iex_RdTmp) {
2482//ZZ       if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2483//ZZ          HReg tHi = newVRegI(env);
2484//ZZ          HReg tLo = newVRegI(env);
2485//ZZ          HReg tmp = iselNeon64Expr(env, e);
2486//ZZ          addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2487//ZZ          *rHi = tHi;
2488//ZZ          *rLo = tLo;
2489//ZZ       } else {
2490//ZZ          lookupIRTemp64( rHi, rLo, env, e->Iex.RdTmp.tmp);
2491//ZZ       }
2492//ZZ       return;
2493//ZZ    }
2494//ZZ
2495//ZZ    /* 64-bit load */
2496//ZZ    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2497//ZZ       HReg      tLo, tHi, rA;
2498//ZZ       vassert(e->Iex.Load.ty == Ity_I64);
2499//ZZ       rA  = iselIntExpr_R(env, e->Iex.Load.addr);
2500//ZZ       tHi = newVRegI(env);
2501//ZZ       tLo = newVRegI(env);
2502//ZZ       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2503//ZZ                                     tHi, ARMAMode1_RI(rA, 4)));
2504//ZZ       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/,
2505//ZZ                                     tLo, ARMAMode1_RI(rA, 0)));
2506//ZZ       *rHi = tHi;
2507//ZZ       *rLo = tLo;
2508//ZZ       return;
2509//ZZ    }
2510//ZZ
2511//ZZ    /* 64-bit GET */
2512//ZZ    if (e->tag == Iex_Get) {
2513//ZZ       ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 0);
2514//ZZ       ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(), e->Iex.Get.offset + 4);
2515//ZZ       HReg tHi = newVRegI(env);
2516//ZZ       HReg tLo = newVRegI(env);
2517//ZZ       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tHi, am4));
2518//ZZ       addInstr(env, ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, tLo, am0));
2519//ZZ       *rHi = tHi;
2520//ZZ       *rLo = tLo;
2521//ZZ       return;
2522//ZZ    }
2523//ZZ
2524//ZZ    /* --------- BINARY ops --------- */
2525//ZZ    if (e->tag == Iex_Binop) {
2526//ZZ       switch (e->Iex.Binop.op) {
2527//ZZ
2528//ZZ          /* 32 x 32 -> 64 multiply */
2529//ZZ          case Iop_MullS32:
2530//ZZ          case Iop_MullU32: {
2531//ZZ             HReg     argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2532//ZZ             HReg     argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2533//ZZ             HReg     tHi  = newVRegI(env);
2534//ZZ             HReg     tLo  = newVRegI(env);
2535//ZZ             ARMMulOp mop  = e->Iex.Binop.op == Iop_MullS32
2536//ZZ                                ? ARMmul_SX : ARMmul_ZX;
2537//ZZ             addInstr(env, mk_iMOVds_RR(hregARM_R2(), argL));
2538//ZZ             addInstr(env, mk_iMOVds_RR(hregARM_R3(), argR));
2539//ZZ             addInstr(env, ARMInstr_Mul(mop));
2540//ZZ             addInstr(env, mk_iMOVds_RR(tHi, hregARM_R1()));
2541//ZZ             addInstr(env, mk_iMOVds_RR(tLo, hregARM_R0()));
2542//ZZ             *rHi = tHi;
2543//ZZ             *rLo = tLo;
2544//ZZ             return;
2545//ZZ          }
2546//ZZ
2547//ZZ          case Iop_Or64: {
2548//ZZ             HReg xLo, xHi, yLo, yHi;
2549//ZZ             HReg tHi = newVRegI(env);
2550//ZZ             HReg tLo = newVRegI(env);
2551//ZZ             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2552//ZZ             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2553//ZZ             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, xHi, ARMRI84_R(yHi)));
2554//ZZ             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, xLo, ARMRI84_R(yLo)));
2555//ZZ             *rHi = tHi;
2556//ZZ             *rLo = tLo;
2557//ZZ             return;
2558//ZZ          }
2559//ZZ
2560//ZZ          case Iop_Add64: {
2561//ZZ             HReg xLo, xHi, yLo, yHi;
2562//ZZ             HReg tHi = newVRegI(env);
2563//ZZ             HReg tLo = newVRegI(env);
2564//ZZ             iselInt64Expr(&xHi, &xLo, env, e->Iex.Binop.arg1);
2565//ZZ             iselInt64Expr(&yHi, &yLo, env, e->Iex.Binop.arg2);
2566//ZZ             addInstr(env, ARMInstr_Alu(ARMalu_ADDS, tLo, xLo, ARMRI84_R(yLo)));
2567//ZZ             addInstr(env, ARMInstr_Alu(ARMalu_ADC,  tHi, xHi, ARMRI84_R(yHi)));
2568//ZZ             *rHi = tHi;
2569//ZZ             *rLo = tLo;
2570//ZZ             return;
2571//ZZ          }
2572//ZZ
2573//ZZ          /* 32HLto64(e1,e2) */
2574//ZZ          case Iop_32HLto64: {
2575//ZZ             *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2576//ZZ             *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2577//ZZ             return;
2578//ZZ          }
2579//ZZ
2580//ZZ          default:
2581//ZZ             break;
2582//ZZ       }
2583//ZZ    }
2584//ZZ
2585//ZZ    /* --------- UNARY ops --------- */
2586//ZZ    if (e->tag == Iex_Unop) {
2587//ZZ       switch (e->Iex.Unop.op) {
2588//ZZ
2589//ZZ          /* ReinterpF64asI64 */
2590//ZZ          case Iop_ReinterpF64asI64: {
2591//ZZ             HReg dstHi = newVRegI(env);
2592//ZZ             HReg dstLo = newVRegI(env);
2593//ZZ             HReg src   = iselDblExpr(env, e->Iex.Unop.arg);
2594//ZZ             addInstr(env, ARMInstr_VXferD(False/*!toD*/, src, dstHi, dstLo));
2595//ZZ             *rHi = dstHi;
2596//ZZ             *rLo = dstLo;
2597//ZZ             return;
2598//ZZ          }
2599//ZZ
2600//ZZ          /* Left64(e) */
2601//ZZ          case Iop_Left64: {
2602//ZZ             HReg yLo, yHi;
2603//ZZ             HReg tHi  = newVRegI(env);
2604//ZZ             HReg tLo  = newVRegI(env);
2605//ZZ             HReg zero = newVRegI(env);
2606//ZZ             /* yHi:yLo = arg */
2607//ZZ             iselInt64Expr(&yHi, &yLo, env, e->Iex.Unop.arg);
2608//ZZ             /* zero = 0 */
2609//ZZ             addInstr(env, ARMInstr_Imm32(zero, 0));
2610//ZZ             /* tLo = 0 - yLo, and set carry */
2611//ZZ             addInstr(env, ARMInstr_Alu(ARMalu_SUBS,
2612//ZZ                                        tLo, zero, ARMRI84_R(yLo)));
2613//ZZ             /* tHi = 0 - yHi - carry */
2614//ZZ             addInstr(env, ARMInstr_Alu(ARMalu_SBC,
2615//ZZ                                        tHi, zero, ARMRI84_R(yHi)));
2616//ZZ             /* So now we have tHi:tLo = -arg.  To finish off, or 'arg'
2617//ZZ                back in, so as to give the final result
2618//ZZ                tHi:tLo = arg | -arg. */
2619//ZZ             addInstr(env, ARMInstr_Alu(ARMalu_OR, tHi, tHi, ARMRI84_R(yHi)));
2620//ZZ             addInstr(env, ARMInstr_Alu(ARMalu_OR, tLo, tLo, ARMRI84_R(yLo)));
2621//ZZ             *rHi = tHi;
2622//ZZ             *rLo = tLo;
2623//ZZ             return;
2624//ZZ          }
2625//ZZ
2626//ZZ          /* CmpwNEZ64(e) */
2627//ZZ          case Iop_CmpwNEZ64: {
2628//ZZ             HReg srcLo, srcHi;
2629//ZZ             HReg tmp1 = newVRegI(env);
2630//ZZ             HReg tmp2 = newVRegI(env);
2631//ZZ             /* srcHi:srcLo = arg */
2632//ZZ             iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
2633//ZZ             /* tmp1 = srcHi | srcLo */
2634//ZZ             addInstr(env, ARMInstr_Alu(ARMalu_OR,
2635//ZZ                                        tmp1, srcHi, ARMRI84_R(srcLo)));
2636//ZZ             /* tmp2 = (tmp1 | -tmp1) >>s 31 */
2637//ZZ             addInstr(env, ARMInstr_Unary(ARMun_NEG, tmp2, tmp1));
2638//ZZ             addInstr(env, ARMInstr_Alu(ARMalu_OR,
2639//ZZ                                        tmp2, tmp2, ARMRI84_R(tmp1)));
2640//ZZ             addInstr(env, ARMInstr_Shift(ARMsh_SAR,
2641//ZZ                                          tmp2, tmp2, ARMRI5_I5(31)));
2642//ZZ             *rHi = tmp2;
2643//ZZ             *rLo = tmp2;
2644//ZZ             return;
2645//ZZ          }
2646//ZZ
2647//ZZ          case Iop_1Sto64: {
2648//ZZ             HReg        dst  = newVRegI(env);
2649//ZZ             ARMCondCode cond = iselCondCode(env, e->Iex.Unop.arg);
2650//ZZ             ARMRI5*     amt  = ARMRI5_I5(31);
2651//ZZ             /* This is really rough.  We could do much better here;
2652//ZZ                perhaps mvn{cond} dst, #0 as the second insn?
2653//ZZ                (same applies to 1Sto32) */
2654//ZZ             addInstr(env, ARMInstr_Mov(dst, ARMRI84_I84(0,0)));
2655//ZZ             addInstr(env, ARMInstr_CMov(cond, dst, ARMRI84_I84(1,0)));
2656//ZZ             addInstr(env, ARMInstr_Shift(ARMsh_SHL, dst, dst, amt));
2657//ZZ             addInstr(env, ARMInstr_Shift(ARMsh_SAR, dst, dst, amt));
2658//ZZ             *rHi = dst;
2659//ZZ             *rLo = dst;
2660//ZZ             return;
2661//ZZ          }
2662//ZZ
2663//ZZ          default:
2664//ZZ             break;
2665//ZZ       }
2666//ZZ    } /* if (e->tag == Iex_Unop) */
2667//ZZ
2668//ZZ    /* --------- MULTIPLEX --------- */
2669//ZZ    if (e->tag == Iex_ITE) { // VFD
2670//ZZ       IRType tyC;
2671//ZZ       HReg   r1hi, r1lo, r0hi, r0lo, dstHi, dstLo;
2672//ZZ       ARMCondCode cc;
2673//ZZ       tyC = typeOfIRExpr(env->type_env,e->Iex.ITE.cond);
2674//ZZ       vassert(tyC == Ity_I1);
2675//ZZ       iselInt64Expr(&r1hi, &r1lo, env, e->Iex.ITE.iftrue);
2676//ZZ       iselInt64Expr(&r0hi, &r0lo, env, e->Iex.ITE.iffalse);
2677//ZZ       dstHi = newVRegI(env);
2678//ZZ       dstLo = newVRegI(env);
2679//ZZ       addInstr(env, mk_iMOVds_RR(dstHi, r1hi));
2680//ZZ       addInstr(env, mk_iMOVds_RR(dstLo, r1lo));
2681//ZZ       cc = iselCondCode(env, e->Iex.ITE.cond);
2682//ZZ       addInstr(env, ARMInstr_CMov(cc ^ 1, dstHi, ARMRI84_R(r0hi)));
2683//ZZ       addInstr(env, ARMInstr_CMov(cc ^ 1, dstLo, ARMRI84_R(r0lo)));
2684//ZZ       *rHi = dstHi;
2685//ZZ       *rLo = dstLo;
2686//ZZ       return;
2687//ZZ    }
2688//ZZ
2689//ZZ    /* It is convenient sometimes to call iselInt64Expr even when we
2690//ZZ       have NEON support (e.g. in do_helper_call we need 64-bit
2691//ZZ       arguments as 2 x 32 regs). */
2692//ZZ    if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
2693//ZZ       HReg tHi = newVRegI(env);
2694//ZZ       HReg tLo = newVRegI(env);
2695//ZZ       HReg tmp = iselNeon64Expr(env, e);
2696//ZZ       addInstr(env, ARMInstr_VXferD(False, tmp, tHi, tLo));
2697//ZZ       *rHi = tHi;
2698//ZZ       *rLo = tLo;
2699//ZZ       return ;
2700//ZZ    }
2701//ZZ
2702//ZZ    ppIRExpr(e);
2703//ZZ    vpanic("iselInt64Expr");
2704//ZZ }
2705//ZZ
2706//ZZ
2707//ZZ /*---------------------------------------------------------*/
2708//ZZ /*--- ISEL: Vector (NEON) expressions (64 bit)          ---*/
2709//ZZ /*---------------------------------------------------------*/
2710//ZZ
2711//ZZ static HReg iselNeon64Expr ( ISelEnv* env, IRExpr* e )
2712//ZZ {
2713//ZZ    HReg r = iselNeon64Expr_wrk( env, e );
2714//ZZ    vassert(hregClass(r) == HRcFlt64);
2715//ZZ    vassert(hregIsVirtual(r));
2716//ZZ    return r;
2717//ZZ }
2718//ZZ
2719//ZZ /* DO NOT CALL THIS DIRECTLY */
2720//ZZ static HReg iselNeon64Expr_wrk ( ISelEnv* env, IRExpr* e )
2721//ZZ {
2722//ZZ    IRType ty = typeOfIRExpr(env->type_env, e);
2723//ZZ    MatchInfo mi;
2724//ZZ    vassert(e);
2725//ZZ    vassert(ty == Ity_I64);
2726//ZZ
2727//ZZ    if (e->tag == Iex_RdTmp) {
2728//ZZ       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2729//ZZ    }
2730//ZZ
2731//ZZ    if (e->tag == Iex_Const) {
2732//ZZ       HReg rLo, rHi;
2733//ZZ       HReg res = newVRegD(env);
2734//ZZ       iselInt64Expr(&rHi, &rLo, env, e);
2735//ZZ       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2736//ZZ       return res;
2737//ZZ    }
2738//ZZ
2739//ZZ    /* 64-bit load */
2740//ZZ    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
2741//ZZ       HReg res = newVRegD(env);
2742//ZZ       ARMAModeN* am = iselIntExpr_AModeN(env, e->Iex.Load.addr);
2743//ZZ       vassert(ty == Ity_I64);
2744//ZZ       addInstr(env, ARMInstr_NLdStD(True, res, am));
2745//ZZ       return res;
2746//ZZ    }
2747//ZZ
2748//ZZ    /* 64-bit GET */
2749//ZZ    if (e->tag == Iex_Get) {
2750//ZZ       HReg addr = newVRegI(env);
2751//ZZ       HReg res = newVRegD(env);
2752//ZZ       vassert(ty == Ity_I64);
2753//ZZ       addInstr(env, ARMInstr_Add32(addr, hregARM_R8(), e->Iex.Get.offset));
2754//ZZ       addInstr(env, ARMInstr_NLdStD(True, res, mkARMAModeN_R(addr)));
2755//ZZ       return res;
2756//ZZ    }
2757//ZZ
2758//ZZ    /* --------- BINARY ops --------- */
2759//ZZ    if (e->tag == Iex_Binop) {
2760//ZZ       switch (e->Iex.Binop.op) {
2761//ZZ
2762//ZZ          /* 32 x 32 -> 64 multiply */
2763//ZZ          case Iop_MullS32:
2764//ZZ          case Iop_MullU32: {
2765//ZZ             HReg rLo, rHi;
2766//ZZ             HReg res = newVRegD(env);
2767//ZZ             iselInt64Expr(&rHi, &rLo, env, e);
2768//ZZ             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2769//ZZ             return res;
2770//ZZ          }
2771//ZZ
2772//ZZ          case Iop_And64: {
2773//ZZ             HReg res = newVRegD(env);
2774//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2775//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2776//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
2777//ZZ                                            res, argL, argR, 4, False));
2778//ZZ             return res;
2779//ZZ          }
2780//ZZ          case Iop_Or64: {
2781//ZZ             HReg res = newVRegD(env);
2782//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2783//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2784//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
2785//ZZ                                            res, argL, argR, 4, False));
2786//ZZ             return res;
2787//ZZ          }
2788//ZZ          case Iop_Xor64: {
2789//ZZ             HReg res = newVRegD(env);
2790//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2791//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2792//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
2793//ZZ                                            res, argL, argR, 4, False));
2794//ZZ             return res;
2795//ZZ          }
2796//ZZ
2797//ZZ          /* 32HLto64(e1,e2) */
2798//ZZ          case Iop_32HLto64: {
2799//ZZ             HReg rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2800//ZZ             HReg rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2801//ZZ             HReg res = newVRegD(env);
2802//ZZ             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
2803//ZZ             return res;
2804//ZZ          }
2805//ZZ
2806//ZZ          case Iop_Add8x8:
2807//ZZ          case Iop_Add16x4:
2808//ZZ          case Iop_Add32x2:
2809//ZZ          case Iop_Add64: {
2810//ZZ             HReg res = newVRegD(env);
2811//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2812//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2813//ZZ             UInt size;
2814//ZZ             switch (e->Iex.Binop.op) {
2815//ZZ                case Iop_Add8x8: size = 0; break;
2816//ZZ                case Iop_Add16x4: size = 1; break;
2817//ZZ                case Iop_Add32x2: size = 2; break;
2818//ZZ                case Iop_Add64: size = 3; break;
2819//ZZ                default: vassert(0);
2820//ZZ             }
2821//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VADD,
2822//ZZ                                            res, argL, argR, size, False));
2823//ZZ             return res;
2824//ZZ          }
2825//ZZ          case Iop_Add32Fx2: {
2826//ZZ             HReg res = newVRegD(env);
2827//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2828//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2829//ZZ             UInt size = 0;
2830//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
2831//ZZ                                            res, argL, argR, size, False));
2832//ZZ             return res;
2833//ZZ          }
2834//ZZ          case Iop_Recps32Fx2: {
2835//ZZ             HReg res = newVRegD(env);
2836//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2837//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2838//ZZ             UInt size = 0;
2839//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
2840//ZZ                                            res, argL, argR, size, False));
2841//ZZ             return res;
2842//ZZ          }
2843//ZZ          case Iop_Rsqrts32Fx2: {
2844//ZZ             HReg res = newVRegD(env);
2845//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2846//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2847//ZZ             UInt size = 0;
2848//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
2849//ZZ                                            res, argL, argR, size, False));
2850//ZZ             return res;
2851//ZZ          }
2852//ZZ
2853//ZZ          // These 6 verified 18 Apr 2013
2854//ZZ          case Iop_InterleaveHI32x2:
2855//ZZ          case Iop_InterleaveLO32x2:
2856//ZZ          case Iop_InterleaveOddLanes8x8:
2857//ZZ          case Iop_InterleaveEvenLanes8x8:
2858//ZZ          case Iop_InterleaveOddLanes16x4:
2859//ZZ          case Iop_InterleaveEvenLanes16x4: {
2860//ZZ             HReg rD   = newVRegD(env);
2861//ZZ             HReg rM   = newVRegD(env);
2862//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2863//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2864//ZZ             UInt size;
2865//ZZ             Bool resRd;  // is the result in rD or rM ?
2866//ZZ             switch (e->Iex.Binop.op) {
2867//ZZ                case Iop_InterleaveOddLanes8x8:   resRd = False; size = 0; break;
2868//ZZ                case Iop_InterleaveEvenLanes8x8:  resRd = True;  size = 0; break;
2869//ZZ                case Iop_InterleaveOddLanes16x4:  resRd = False; size = 1; break;
2870//ZZ                case Iop_InterleaveEvenLanes16x4: resRd = True;  size = 1; break;
2871//ZZ                case Iop_InterleaveHI32x2:        resRd = False; size = 2; break;
2872//ZZ                case Iop_InterleaveLO32x2:        resRd = True;  size = 2; break;
2873//ZZ                default: vassert(0);
2874//ZZ             }
2875//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2876//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2877//ZZ             addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, False));
2878//ZZ             return resRd ? rD : rM;
2879//ZZ          }
2880//ZZ
2881//ZZ          // These 4 verified 18 Apr 2013
2882//ZZ          case Iop_InterleaveHI8x8:
2883//ZZ          case Iop_InterleaveLO8x8:
2884//ZZ          case Iop_InterleaveHI16x4:
2885//ZZ          case Iop_InterleaveLO16x4: {
2886//ZZ             HReg rD   = newVRegD(env);
2887//ZZ             HReg rM   = newVRegD(env);
2888//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2889//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2890//ZZ             UInt size;
2891//ZZ             Bool resRd;  // is the result in rD or rM ?
2892//ZZ             switch (e->Iex.Binop.op) {
2893//ZZ                case Iop_InterleaveHI8x8:  resRd = False; size = 0; break;
2894//ZZ                case Iop_InterleaveLO8x8:  resRd = True;  size = 0; break;
2895//ZZ                case Iop_InterleaveHI16x4: resRd = False; size = 1; break;
2896//ZZ                case Iop_InterleaveLO16x4: resRd = True;  size = 1; break;
2897//ZZ                default: vassert(0);
2898//ZZ             }
2899//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2900//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2901//ZZ             addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, False));
2902//ZZ             return resRd ? rD : rM;
2903//ZZ          }
2904//ZZ
2905//ZZ          // These 4 verified 18 Apr 2013
2906//ZZ          case Iop_CatOddLanes8x8:
2907//ZZ          case Iop_CatEvenLanes8x8:
2908//ZZ          case Iop_CatOddLanes16x4:
2909//ZZ          case Iop_CatEvenLanes16x4: {
2910//ZZ             HReg rD   = newVRegD(env);
2911//ZZ             HReg rM   = newVRegD(env);
2912//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2913//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2914//ZZ             UInt size;
2915//ZZ             Bool resRd;  // is the result in rD or rM ?
2916//ZZ             switch (e->Iex.Binop.op) {
2917//ZZ                case Iop_CatOddLanes8x8:   resRd = False; size = 0; break;
2918//ZZ                case Iop_CatEvenLanes8x8:  resRd = True;  size = 0; break;
2919//ZZ                case Iop_CatOddLanes16x4:  resRd = False; size = 1; break;
2920//ZZ                case Iop_CatEvenLanes16x4: resRd = True;  size = 1; break;
2921//ZZ                default: vassert(0);
2922//ZZ             }
2923//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, False));
2924//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, False));
2925//ZZ             addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, False));
2926//ZZ             return resRd ? rD : rM;
2927//ZZ          }
2928//ZZ
2929//ZZ          case Iop_QAdd8Ux8:
2930//ZZ          case Iop_QAdd16Ux4:
2931//ZZ          case Iop_QAdd32Ux2:
2932//ZZ          case Iop_QAdd64Ux1: {
2933//ZZ             HReg res = newVRegD(env);
2934//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2935//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2936//ZZ             UInt size;
2937//ZZ             switch (e->Iex.Binop.op) {
2938//ZZ                case Iop_QAdd8Ux8: size = 0; break;
2939//ZZ                case Iop_QAdd16Ux4: size = 1; break;
2940//ZZ                case Iop_QAdd32Ux2: size = 2; break;
2941//ZZ                case Iop_QAdd64Ux1: size = 3; break;
2942//ZZ                default: vassert(0);
2943//ZZ             }
2944//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
2945//ZZ                                            res, argL, argR, size, False));
2946//ZZ             return res;
2947//ZZ          }
2948//ZZ          case Iop_QAdd8Sx8:
2949//ZZ          case Iop_QAdd16Sx4:
2950//ZZ          case Iop_QAdd32Sx2:
2951//ZZ          case Iop_QAdd64Sx1: {
2952//ZZ             HReg res = newVRegD(env);
2953//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2954//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2955//ZZ             UInt size;
2956//ZZ             switch (e->Iex.Binop.op) {
2957//ZZ                case Iop_QAdd8Sx8: size = 0; break;
2958//ZZ                case Iop_QAdd16Sx4: size = 1; break;
2959//ZZ                case Iop_QAdd32Sx2: size = 2; break;
2960//ZZ                case Iop_QAdd64Sx1: size = 3; break;
2961//ZZ                default: vassert(0);
2962//ZZ             }
2963//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
2964//ZZ                                            res, argL, argR, size, False));
2965//ZZ             return res;
2966//ZZ          }
2967//ZZ          case Iop_Sub8x8:
2968//ZZ          case Iop_Sub16x4:
2969//ZZ          case Iop_Sub32x2:
2970//ZZ          case Iop_Sub64: {
2971//ZZ             HReg res = newVRegD(env);
2972//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2973//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2974//ZZ             UInt size;
2975//ZZ             switch (e->Iex.Binop.op) {
2976//ZZ                case Iop_Sub8x8: size = 0; break;
2977//ZZ                case Iop_Sub16x4: size = 1; break;
2978//ZZ                case Iop_Sub32x2: size = 2; break;
2979//ZZ                case Iop_Sub64: size = 3; break;
2980//ZZ                default: vassert(0);
2981//ZZ             }
2982//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
2983//ZZ                                            res, argL, argR, size, False));
2984//ZZ             return res;
2985//ZZ          }
2986//ZZ          case Iop_Sub32Fx2: {
2987//ZZ             HReg res = newVRegD(env);
2988//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
2989//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
2990//ZZ             UInt size = 0;
2991//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
2992//ZZ                                            res, argL, argR, size, False));
2993//ZZ             return res;
2994//ZZ          }
2995//ZZ          case Iop_QSub8Ux8:
2996//ZZ          case Iop_QSub16Ux4:
2997//ZZ          case Iop_QSub32Ux2:
2998//ZZ          case Iop_QSub64Ux1: {
2999//ZZ             HReg res = newVRegD(env);
3000//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3001//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3002//ZZ             UInt size;
3003//ZZ             switch (e->Iex.Binop.op) {
3004//ZZ                case Iop_QSub8Ux8: size = 0; break;
3005//ZZ                case Iop_QSub16Ux4: size = 1; break;
3006//ZZ                case Iop_QSub32Ux2: size = 2; break;
3007//ZZ                case Iop_QSub64Ux1: size = 3; break;
3008//ZZ                default: vassert(0);
3009//ZZ             }
3010//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
3011//ZZ                                            res, argL, argR, size, False));
3012//ZZ             return res;
3013//ZZ          }
3014//ZZ          case Iop_QSub8Sx8:
3015//ZZ          case Iop_QSub16Sx4:
3016//ZZ          case Iop_QSub32Sx2:
3017//ZZ          case Iop_QSub64Sx1: {
3018//ZZ             HReg res = newVRegD(env);
3019//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3020//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3021//ZZ             UInt size;
3022//ZZ             switch (e->Iex.Binop.op) {
3023//ZZ                case Iop_QSub8Sx8: size = 0; break;
3024//ZZ                case Iop_QSub16Sx4: size = 1; break;
3025//ZZ                case Iop_QSub32Sx2: size = 2; break;
3026//ZZ                case Iop_QSub64Sx1: size = 3; break;
3027//ZZ                default: vassert(0);
3028//ZZ             }
3029//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
3030//ZZ                                            res, argL, argR, size, False));
3031//ZZ             return res;
3032//ZZ          }
3033//ZZ          case Iop_Max8Ux8:
3034//ZZ          case Iop_Max16Ux4:
3035//ZZ          case Iop_Max32Ux2: {
3036//ZZ             HReg res = newVRegD(env);
3037//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3038//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3039//ZZ             UInt size;
3040//ZZ             switch (e->Iex.Binop.op) {
3041//ZZ                case Iop_Max8Ux8: size = 0; break;
3042//ZZ                case Iop_Max16Ux4: size = 1; break;
3043//ZZ                case Iop_Max32Ux2: size = 2; break;
3044//ZZ                default: vassert(0);
3045//ZZ             }
3046//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
3047//ZZ                                            res, argL, argR, size, False));
3048//ZZ             return res;
3049//ZZ          }
3050//ZZ          case Iop_Max8Sx8:
3051//ZZ          case Iop_Max16Sx4:
3052//ZZ          case Iop_Max32Sx2: {
3053//ZZ             HReg res = newVRegD(env);
3054//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3055//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3056//ZZ             UInt size;
3057//ZZ             switch (e->Iex.Binop.op) {
3058//ZZ                case Iop_Max8Sx8: size = 0; break;
3059//ZZ                case Iop_Max16Sx4: size = 1; break;
3060//ZZ                case Iop_Max32Sx2: size = 2; break;
3061//ZZ                default: vassert(0);
3062//ZZ             }
3063//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
3064//ZZ                                            res, argL, argR, size, False));
3065//ZZ             return res;
3066//ZZ          }
3067//ZZ          case Iop_Min8Ux8:
3068//ZZ          case Iop_Min16Ux4:
3069//ZZ          case Iop_Min32Ux2: {
3070//ZZ             HReg res = newVRegD(env);
3071//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3072//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3073//ZZ             UInt size;
3074//ZZ             switch (e->Iex.Binop.op) {
3075//ZZ                case Iop_Min8Ux8: size = 0; break;
3076//ZZ                case Iop_Min16Ux4: size = 1; break;
3077//ZZ                case Iop_Min32Ux2: size = 2; break;
3078//ZZ                default: vassert(0);
3079//ZZ             }
3080//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
3081//ZZ                                            res, argL, argR, size, False));
3082//ZZ             return res;
3083//ZZ          }
3084//ZZ          case Iop_Min8Sx8:
3085//ZZ          case Iop_Min16Sx4:
3086//ZZ          case Iop_Min32Sx2: {
3087//ZZ             HReg res = newVRegD(env);
3088//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3089//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3090//ZZ             UInt size;
3091//ZZ             switch (e->Iex.Binop.op) {
3092//ZZ                case Iop_Min8Sx8: size = 0; break;
3093//ZZ                case Iop_Min16Sx4: size = 1; break;
3094//ZZ                case Iop_Min32Sx2: size = 2; break;
3095//ZZ                default: vassert(0);
3096//ZZ             }
3097//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
3098//ZZ                                            res, argL, argR, size, False));
3099//ZZ             return res;
3100//ZZ          }
3101//ZZ          case Iop_Sar8x8:
3102//ZZ          case Iop_Sar16x4:
3103//ZZ          case Iop_Sar32x2: {
3104//ZZ             HReg res = newVRegD(env);
3105//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3106//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3107//ZZ             HReg argR2 = newVRegD(env);
3108//ZZ             HReg zero = newVRegD(env);
3109//ZZ             UInt size;
3110//ZZ             switch (e->Iex.Binop.op) {
3111//ZZ                case Iop_Sar8x8: size = 0; break;
3112//ZZ                case Iop_Sar16x4: size = 1; break;
3113//ZZ                case Iop_Sar32x2: size = 2; break;
3114//ZZ                case Iop_Sar64: size = 3; break;
3115//ZZ                default: vassert(0);
3116//ZZ             }
3117//ZZ             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
3118//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3119//ZZ                                            argR2, zero, argR, size, False));
3120//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3121//ZZ                                           res, argL, argR2, size, False));
3122//ZZ             return res;
3123//ZZ          }
3124//ZZ          case Iop_Sal8x8:
3125//ZZ          case Iop_Sal16x4:
3126//ZZ          case Iop_Sal32x2:
3127//ZZ          case Iop_Sal64x1: {
3128//ZZ             HReg res = newVRegD(env);
3129//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3130//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3131//ZZ             UInt size;
3132//ZZ             switch (e->Iex.Binop.op) {
3133//ZZ                case Iop_Sal8x8: size = 0; break;
3134//ZZ                case Iop_Sal16x4: size = 1; break;
3135//ZZ                case Iop_Sal32x2: size = 2; break;
3136//ZZ                case Iop_Sal64x1: size = 3; break;
3137//ZZ                default: vassert(0);
3138//ZZ             }
3139//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3140//ZZ                                           res, argL, argR, size, False));
3141//ZZ             return res;
3142//ZZ          }
3143//ZZ          case Iop_Shr8x8:
3144//ZZ          case Iop_Shr16x4:
3145//ZZ          case Iop_Shr32x2: {
3146//ZZ             HReg res = newVRegD(env);
3147//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3148//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3149//ZZ             HReg argR2 = newVRegD(env);
3150//ZZ             HReg zero = newVRegD(env);
3151//ZZ             UInt size;
3152//ZZ             switch (e->Iex.Binop.op) {
3153//ZZ                case Iop_Shr8x8: size = 0; break;
3154//ZZ                case Iop_Shr16x4: size = 1; break;
3155//ZZ                case Iop_Shr32x2: size = 2; break;
3156//ZZ                default: vassert(0);
3157//ZZ             }
3158//ZZ             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
3159//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3160//ZZ                                            argR2, zero, argR, size, False));
3161//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3162//ZZ                                           res, argL, argR2, size, False));
3163//ZZ             return res;
3164//ZZ          }
3165//ZZ          case Iop_Shl8x8:
3166//ZZ          case Iop_Shl16x4:
3167//ZZ          case Iop_Shl32x2: {
3168//ZZ             HReg res = newVRegD(env);
3169//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3170//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3171//ZZ             UInt size;
3172//ZZ             switch (e->Iex.Binop.op) {
3173//ZZ                case Iop_Shl8x8: size = 0; break;
3174//ZZ                case Iop_Shl16x4: size = 1; break;
3175//ZZ                case Iop_Shl32x2: size = 2; break;
3176//ZZ                default: vassert(0);
3177//ZZ             }
3178//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3179//ZZ                                           res, argL, argR, size, False));
3180//ZZ             return res;
3181//ZZ          }
3182//ZZ          case Iop_QShl8x8:
3183//ZZ          case Iop_QShl16x4:
3184//ZZ          case Iop_QShl32x2:
3185//ZZ          case Iop_QShl64x1: {
3186//ZZ             HReg res = newVRegD(env);
3187//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3188//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3189//ZZ             UInt size;
3190//ZZ             switch (e->Iex.Binop.op) {
3191//ZZ                case Iop_QShl8x8: size = 0; break;
3192//ZZ                case Iop_QShl16x4: size = 1; break;
3193//ZZ                case Iop_QShl32x2: size = 2; break;
3194//ZZ                case Iop_QShl64x1: size = 3; break;
3195//ZZ                default: vassert(0);
3196//ZZ             }
3197//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
3198//ZZ                                           res, argL, argR, size, False));
3199//ZZ             return res;
3200//ZZ          }
3201//ZZ          case Iop_QSal8x8:
3202//ZZ          case Iop_QSal16x4:
3203//ZZ          case Iop_QSal32x2:
3204//ZZ          case Iop_QSal64x1: {
3205//ZZ             HReg res = newVRegD(env);
3206//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3207//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3208//ZZ             UInt size;
3209//ZZ             switch (e->Iex.Binop.op) {
3210//ZZ                case Iop_QSal8x8: size = 0; break;
3211//ZZ                case Iop_QSal16x4: size = 1; break;
3212//ZZ                case Iop_QSal32x2: size = 2; break;
3213//ZZ                case Iop_QSal64x1: size = 3; break;
3214//ZZ                default: vassert(0);
3215//ZZ             }
3216//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
3217//ZZ                                           res, argL, argR, size, False));
3218//ZZ             return res;
3219//ZZ          }
3220//ZZ          case Iop_QShlN8x8:
3221//ZZ          case Iop_QShlN16x4:
3222//ZZ          case Iop_QShlN32x2:
3223//ZZ          case Iop_QShlN64x1: {
3224//ZZ             HReg res = newVRegD(env);
3225//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3226//ZZ             UInt size, imm;
3227//ZZ             if (e->Iex.Binop.arg2->tag != Iex_Const ||
3228//ZZ                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3229//ZZ                vpanic("ARM taget supports Iop_QShlNAxB with constant "
3230//ZZ                       "second argument only\n");
3231//ZZ             }
3232//ZZ             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3233//ZZ             switch (e->Iex.Binop.op) {
3234//ZZ                case Iop_QShlN8x8: size = 8 | imm; break;
3235//ZZ                case Iop_QShlN16x4: size = 16 | imm; break;
3236//ZZ                case Iop_QShlN32x2: size = 32 | imm; break;
3237//ZZ                case Iop_QShlN64x1: size = 64 | imm; break;
3238//ZZ                default: vassert(0);
3239//ZZ             }
3240//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
3241//ZZ                                           res, argL, size, False));
3242//ZZ             return res;
3243//ZZ          }
3244//ZZ          case Iop_QShlN8Sx8:
3245//ZZ          case Iop_QShlN16Sx4:
3246//ZZ          case Iop_QShlN32Sx2:
3247//ZZ          case Iop_QShlN64Sx1: {
3248//ZZ             HReg res = newVRegD(env);
3249//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3250//ZZ             UInt size, imm;
3251//ZZ             if (e->Iex.Binop.arg2->tag != Iex_Const ||
3252//ZZ                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3253//ZZ                vpanic("ARM taget supports Iop_QShlNAxB with constant "
3254//ZZ                       "second argument only\n");
3255//ZZ             }
3256//ZZ             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3257//ZZ             switch (e->Iex.Binop.op) {
3258//ZZ                case Iop_QShlN8Sx8: size = 8 | imm; break;
3259//ZZ                case Iop_QShlN16Sx4: size = 16 | imm; break;
3260//ZZ                case Iop_QShlN32Sx2: size = 32 | imm; break;
3261//ZZ                case Iop_QShlN64Sx1: size = 64 | imm; break;
3262//ZZ                default: vassert(0);
3263//ZZ             }
3264//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
3265//ZZ                                           res, argL, size, False));
3266//ZZ             return res;
3267//ZZ          }
3268//ZZ          case Iop_QSalN8x8:
3269//ZZ          case Iop_QSalN16x4:
3270//ZZ          case Iop_QSalN32x2:
3271//ZZ          case Iop_QSalN64x1: {
3272//ZZ             HReg res = newVRegD(env);
3273//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3274//ZZ             UInt size, imm;
3275//ZZ             if (e->Iex.Binop.arg2->tag != Iex_Const ||
3276//ZZ                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3277//ZZ                vpanic("ARM taget supports Iop_QShlNAxB with constant "
3278//ZZ                       "second argument only\n");
3279//ZZ             }
3280//ZZ             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3281//ZZ             switch (e->Iex.Binop.op) {
3282//ZZ                case Iop_QSalN8x8: size = 8 | imm; break;
3283//ZZ                case Iop_QSalN16x4: size = 16 | imm; break;
3284//ZZ                case Iop_QSalN32x2: size = 32 | imm; break;
3285//ZZ                case Iop_QSalN64x1: size = 64 | imm; break;
3286//ZZ                default: vassert(0);
3287//ZZ             }
3288//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
3289//ZZ                                           res, argL, size, False));
3290//ZZ             return res;
3291//ZZ          }
3292//ZZ          case Iop_ShrN8x8:
3293//ZZ          case Iop_ShrN16x4:
3294//ZZ          case Iop_ShrN32x2:
3295//ZZ          case Iop_Shr64: {
3296//ZZ             HReg res = newVRegD(env);
3297//ZZ             HReg tmp = newVRegD(env);
3298//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3299//ZZ             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3300//ZZ             HReg argR2 = newVRegI(env);
3301//ZZ             UInt size;
3302//ZZ             switch (e->Iex.Binop.op) {
3303//ZZ                case Iop_ShrN8x8: size = 0; break;
3304//ZZ                case Iop_ShrN16x4: size = 1; break;
3305//ZZ                case Iop_ShrN32x2: size = 2; break;
3306//ZZ                case Iop_Shr64: size = 3; break;
3307//ZZ                default: vassert(0);
3308//ZZ             }
3309//ZZ             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3310//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3311//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3312//ZZ                                           res, argL, tmp, size, False));
3313//ZZ             return res;
3314//ZZ          }
3315//ZZ          case Iop_ShlN8x8:
3316//ZZ          case Iop_ShlN16x4:
3317//ZZ          case Iop_ShlN32x2:
3318//ZZ          case Iop_Shl64: {
3319//ZZ             HReg res = newVRegD(env);
3320//ZZ             HReg tmp = newVRegD(env);
3321//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3322//ZZ             /* special-case Shl64(x, imm8) since the Neon front
3323//ZZ                end produces a lot of those for V{LD,ST}{1,2,3,4}. */
3324//ZZ             if (e->Iex.Binop.op == Iop_Shl64
3325//ZZ                 && e->Iex.Binop.arg2->tag == Iex_Const) {
3326//ZZ                vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
3327//ZZ                Int nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3328//ZZ                if (nshift >= 1 && nshift <= 63) {
3329//ZZ                   addInstr(env, ARMInstr_NShl64(res, argL, nshift));
3330//ZZ                   return res;
3331//ZZ                }
3332//ZZ                /* else fall through to general case */
3333//ZZ             }
3334//ZZ             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3335//ZZ             UInt size;
3336//ZZ             switch (e->Iex.Binop.op) {
3337//ZZ                case Iop_ShlN8x8:  size = 0; break;
3338//ZZ                case Iop_ShlN16x4: size = 1; break;
3339//ZZ                case Iop_ShlN32x2: size = 2; break;
3340//ZZ                case Iop_Shl64:    size = 3; break;
3341//ZZ                default: vassert(0);
3342//ZZ             }
3343//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_DUP,
3344//ZZ                                           tmp, argR, 0, False));
3345//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3346//ZZ                                           res, argL, tmp, size, False));
3347//ZZ             return res;
3348//ZZ          }
3349//ZZ          case Iop_SarN8x8:
3350//ZZ          case Iop_SarN16x4:
3351//ZZ          case Iop_SarN32x2:
3352//ZZ          case Iop_Sar64: {
3353//ZZ             HReg res = newVRegD(env);
3354//ZZ             HReg tmp = newVRegD(env);
3355//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3356//ZZ             HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
3357//ZZ             HReg argR2 = newVRegI(env);
3358//ZZ             UInt size;
3359//ZZ             switch (e->Iex.Binop.op) {
3360//ZZ                case Iop_SarN8x8: size = 0; break;
3361//ZZ                case Iop_SarN16x4: size = 1; break;
3362//ZZ                case Iop_SarN32x2: size = 2; break;
3363//ZZ                case Iop_Sar64: size = 3; break;
3364//ZZ                default: vassert(0);
3365//ZZ             }
3366//ZZ             addInstr(env, ARMInstr_Unary(ARMun_NEG, argR2, argR));
3367//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, tmp, argR2, 0, False));
3368//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
3369//ZZ                                           res, argL, tmp, size, False));
3370//ZZ             return res;
3371//ZZ          }
3372//ZZ          case Iop_CmpGT8Ux8:
3373//ZZ          case Iop_CmpGT16Ux4:
3374//ZZ          case Iop_CmpGT32Ux2: {
3375//ZZ             HReg res = newVRegD(env);
3376//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3377//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3378//ZZ             UInt size;
3379//ZZ             switch (e->Iex.Binop.op) {
3380//ZZ                case Iop_CmpGT8Ux8: size = 0; break;
3381//ZZ                case Iop_CmpGT16Ux4: size = 1; break;
3382//ZZ                case Iop_CmpGT32Ux2: size = 2; break;
3383//ZZ                default: vassert(0);
3384//ZZ             }
3385//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
3386//ZZ                                            res, argL, argR, size, False));
3387//ZZ             return res;
3388//ZZ          }
3389//ZZ          case Iop_CmpGT8Sx8:
3390//ZZ          case Iop_CmpGT16Sx4:
3391//ZZ          case Iop_CmpGT32Sx2: {
3392//ZZ             HReg res = newVRegD(env);
3393//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3394//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3395//ZZ             UInt size;
3396//ZZ             switch (e->Iex.Binop.op) {
3397//ZZ                case Iop_CmpGT8Sx8: size = 0; break;
3398//ZZ                case Iop_CmpGT16Sx4: size = 1; break;
3399//ZZ                case Iop_CmpGT32Sx2: size = 2; break;
3400//ZZ                default: vassert(0);
3401//ZZ             }
3402//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
3403//ZZ                                            res, argL, argR, size, False));
3404//ZZ             return res;
3405//ZZ          }
3406//ZZ          case Iop_CmpEQ8x8:
3407//ZZ          case Iop_CmpEQ16x4:
3408//ZZ          case Iop_CmpEQ32x2: {
3409//ZZ             HReg res = newVRegD(env);
3410//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3411//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3412//ZZ             UInt size;
3413//ZZ             switch (e->Iex.Binop.op) {
3414//ZZ                case Iop_CmpEQ8x8: size = 0; break;
3415//ZZ                case Iop_CmpEQ16x4: size = 1; break;
3416//ZZ                case Iop_CmpEQ32x2: size = 2; break;
3417//ZZ                default: vassert(0);
3418//ZZ             }
3419//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
3420//ZZ                                            res, argL, argR, size, False));
3421//ZZ             return res;
3422//ZZ          }
3423//ZZ          case Iop_Mul8x8:
3424//ZZ          case Iop_Mul16x4:
3425//ZZ          case Iop_Mul32x2: {
3426//ZZ             HReg res = newVRegD(env);
3427//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3428//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3429//ZZ             UInt size = 0;
3430//ZZ             switch(e->Iex.Binop.op) {
3431//ZZ                case Iop_Mul8x8: size = 0; break;
3432//ZZ                case Iop_Mul16x4: size = 1; break;
3433//ZZ                case Iop_Mul32x2: size = 2; break;
3434//ZZ                default: vassert(0);
3435//ZZ             }
3436//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
3437//ZZ                                            res, argL, argR, size, False));
3438//ZZ             return res;
3439//ZZ          }
3440//ZZ          case Iop_Mul32Fx2: {
3441//ZZ             HReg res = newVRegD(env);
3442//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3443//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3444//ZZ             UInt size = 0;
3445//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
3446//ZZ                                            res, argL, argR, size, False));
3447//ZZ             return res;
3448//ZZ          }
3449//ZZ          case Iop_QDMulHi16Sx4:
3450//ZZ          case Iop_QDMulHi32Sx2: {
3451//ZZ             HReg res = newVRegD(env);
3452//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3453//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3454//ZZ             UInt size = 0;
3455//ZZ             switch(e->Iex.Binop.op) {
3456//ZZ                case Iop_QDMulHi16Sx4: size = 1; break;
3457//ZZ                case Iop_QDMulHi32Sx2: size = 2; break;
3458//ZZ                default: vassert(0);
3459//ZZ             }
3460//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
3461//ZZ                                            res, argL, argR, size, False));
3462//ZZ             return res;
3463//ZZ          }
3464//ZZ
3465//ZZ          case Iop_QRDMulHi16Sx4:
3466//ZZ          case Iop_QRDMulHi32Sx2: {
3467//ZZ             HReg res = newVRegD(env);
3468//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3469//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3470//ZZ             UInt size = 0;
3471//ZZ             switch(e->Iex.Binop.op) {
3472//ZZ                case Iop_QRDMulHi16Sx4: size = 1; break;
3473//ZZ                case Iop_QRDMulHi32Sx2: size = 2; break;
3474//ZZ                default: vassert(0);
3475//ZZ             }
3476//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
3477//ZZ                                            res, argL, argR, size, False));
3478//ZZ             return res;
3479//ZZ          }
3480//ZZ
3481//ZZ          case Iop_PwAdd8x8:
3482//ZZ          case Iop_PwAdd16x4:
3483//ZZ          case Iop_PwAdd32x2: {
3484//ZZ             HReg res = newVRegD(env);
3485//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3486//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3487//ZZ             UInt size = 0;
3488//ZZ             switch(e->Iex.Binop.op) {
3489//ZZ                case Iop_PwAdd8x8: size = 0; break;
3490//ZZ                case Iop_PwAdd16x4: size = 1; break;
3491//ZZ                case Iop_PwAdd32x2: size = 2; break;
3492//ZZ                default: vassert(0);
3493//ZZ             }
3494//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
3495//ZZ                                            res, argL, argR, size, False));
3496//ZZ             return res;
3497//ZZ          }
3498//ZZ          case Iop_PwAdd32Fx2: {
3499//ZZ             HReg res = newVRegD(env);
3500//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3501//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3502//ZZ             UInt size = 0;
3503//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VPADDFP,
3504//ZZ                                            res, argL, argR, size, False));
3505//ZZ             return res;
3506//ZZ          }
3507//ZZ          case Iop_PwMin8Ux8:
3508//ZZ          case Iop_PwMin16Ux4:
3509//ZZ          case Iop_PwMin32Ux2: {
3510//ZZ             HReg res = newVRegD(env);
3511//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3512//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3513//ZZ             UInt size = 0;
3514//ZZ             switch(e->Iex.Binop.op) {
3515//ZZ                case Iop_PwMin8Ux8: size = 0; break;
3516//ZZ                case Iop_PwMin16Ux4: size = 1; break;
3517//ZZ                case Iop_PwMin32Ux2: size = 2; break;
3518//ZZ                default: vassert(0);
3519//ZZ             }
3520//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINU,
3521//ZZ                                            res, argL, argR, size, False));
3522//ZZ             return res;
3523//ZZ          }
3524//ZZ          case Iop_PwMin8Sx8:
3525//ZZ          case Iop_PwMin16Sx4:
3526//ZZ          case Iop_PwMin32Sx2: {
3527//ZZ             HReg res = newVRegD(env);
3528//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3529//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3530//ZZ             UInt size = 0;
3531//ZZ             switch(e->Iex.Binop.op) {
3532//ZZ                case Iop_PwMin8Sx8: size = 0; break;
3533//ZZ                case Iop_PwMin16Sx4: size = 1; break;
3534//ZZ                case Iop_PwMin32Sx2: size = 2; break;
3535//ZZ                default: vassert(0);
3536//ZZ             }
3537//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINS,
3538//ZZ                                            res, argL, argR, size, False));
3539//ZZ             return res;
3540//ZZ          }
3541//ZZ          case Iop_PwMax8Ux8:
3542//ZZ          case Iop_PwMax16Ux4:
3543//ZZ          case Iop_PwMax32Ux2: {
3544//ZZ             HReg res = newVRegD(env);
3545//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3546//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3547//ZZ             UInt size = 0;
3548//ZZ             switch(e->Iex.Binop.op) {
3549//ZZ                case Iop_PwMax8Ux8: size = 0; break;
3550//ZZ                case Iop_PwMax16Ux4: size = 1; break;
3551//ZZ                case Iop_PwMax32Ux2: size = 2; break;
3552//ZZ                default: vassert(0);
3553//ZZ             }
3554//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXU,
3555//ZZ                                            res, argL, argR, size, False));
3556//ZZ             return res;
3557//ZZ          }
3558//ZZ          case Iop_PwMax8Sx8:
3559//ZZ          case Iop_PwMax16Sx4:
3560//ZZ          case Iop_PwMax32Sx2: {
3561//ZZ             HReg res = newVRegD(env);
3562//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3563//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3564//ZZ             UInt size = 0;
3565//ZZ             switch(e->Iex.Binop.op) {
3566//ZZ                case Iop_PwMax8Sx8: size = 0; break;
3567//ZZ                case Iop_PwMax16Sx4: size = 1; break;
3568//ZZ                case Iop_PwMax32Sx2: size = 2; break;
3569//ZZ                default: vassert(0);
3570//ZZ             }
3571//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXS,
3572//ZZ                                            res, argL, argR, size, False));
3573//ZZ             return res;
3574//ZZ          }
3575//ZZ          case Iop_Perm8x8: {
3576//ZZ             HReg res = newVRegD(env);
3577//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3578//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3579//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VTBL,
3580//ZZ                                            res, argL, argR, 0, False));
3581//ZZ             return res;
3582//ZZ          }
3583//ZZ          case Iop_PolynomialMul8x8: {
3584//ZZ             HReg res = newVRegD(env);
3585//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3586//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3587//ZZ             UInt size = 0;
3588//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
3589//ZZ                                            res, argL, argR, size, False));
3590//ZZ             return res;
3591//ZZ          }
3592//ZZ          case Iop_Max32Fx2: {
3593//ZZ             HReg res = newVRegD(env);
3594//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3595//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3596//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
3597//ZZ                                            res, argL, argR, 2, False));
3598//ZZ             return res;
3599//ZZ          }
3600//ZZ          case Iop_Min32Fx2: {
3601//ZZ             HReg res = newVRegD(env);
3602//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3603//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3604//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
3605//ZZ                                            res, argL, argR, 2, False));
3606//ZZ             return res;
3607//ZZ          }
3608//ZZ          case Iop_PwMax32Fx2: {
3609//ZZ             HReg res = newVRegD(env);
3610//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3611//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3612//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
3613//ZZ                                            res, argL, argR, 2, False));
3614//ZZ             return res;
3615//ZZ          }
3616//ZZ          case Iop_PwMin32Fx2: {
3617//ZZ             HReg res = newVRegD(env);
3618//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3619//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3620//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
3621//ZZ                                            res, argL, argR, 2, False));
3622//ZZ             return res;
3623//ZZ          }
3624//ZZ          case Iop_CmpGT32Fx2: {
3625//ZZ             HReg res = newVRegD(env);
3626//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3627//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3628//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
3629//ZZ                                            res, argL, argR, 2, False));
3630//ZZ             return res;
3631//ZZ          }
3632//ZZ          case Iop_CmpGE32Fx2: {
3633//ZZ             HReg res = newVRegD(env);
3634//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3635//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3636//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
3637//ZZ                                            res, argL, argR, 2, False));
3638//ZZ             return res;
3639//ZZ          }
3640//ZZ          case Iop_CmpEQ32Fx2: {
3641//ZZ             HReg res = newVRegD(env);
3642//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3643//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
3644//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
3645//ZZ                                            res, argL, argR, 2, False));
3646//ZZ             return res;
3647//ZZ          }
3648//ZZ          case Iop_F32ToFixed32Ux2_RZ:
3649//ZZ          case Iop_F32ToFixed32Sx2_RZ:
3650//ZZ          case Iop_Fixed32UToF32x2_RN:
3651//ZZ          case Iop_Fixed32SToF32x2_RN: {
3652//ZZ             HReg res = newVRegD(env);
3653//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Binop.arg1);
3654//ZZ             ARMNeonUnOp op;
3655//ZZ             UInt imm6;
3656//ZZ             if (e->Iex.Binop.arg2->tag != Iex_Const ||
3657//ZZ                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3658//ZZ                   vpanic("ARM supports FP <-> Fixed conversion with constant "
3659//ZZ                          "second argument less than 33 only\n");
3660//ZZ             }
3661//ZZ             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3662//ZZ             vassert(imm6 <= 32 && imm6 > 0);
3663//ZZ             imm6 = 64 - imm6;
3664//ZZ             switch(e->Iex.Binop.op) {
3665//ZZ                case Iop_F32ToFixed32Ux2_RZ: op = ARMneon_VCVTFtoFixedU; break;
3666//ZZ                case Iop_F32ToFixed32Sx2_RZ: op = ARMneon_VCVTFtoFixedS; break;
3667//ZZ                case Iop_Fixed32UToF32x2_RN: op = ARMneon_VCVTFixedUtoF; break;
3668//ZZ                case Iop_Fixed32SToF32x2_RN: op = ARMneon_VCVTFixedStoF; break;
3669//ZZ                default: vassert(0);
3670//ZZ             }
3671//ZZ             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, False));
3672//ZZ             return res;
3673//ZZ          }
3674//ZZ          /*
3675//ZZ          FIXME: is this here or not?
3676//ZZ          case Iop_VDup8x8:
3677//ZZ          case Iop_VDup16x4:
3678//ZZ          case Iop_VDup32x2: {
3679//ZZ             HReg res = newVRegD(env);
3680//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
3681//ZZ             UInt index;
3682//ZZ             UInt imm4;
3683//ZZ             UInt size = 0;
3684//ZZ             if (e->Iex.Binop.arg2->tag != Iex_Const ||
3685//ZZ                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
3686//ZZ                   vpanic("ARM supports Iop_VDup with constant "
3687//ZZ                          "second argument less than 16 only\n");
3688//ZZ             }
3689//ZZ             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
3690//ZZ             switch(e->Iex.Binop.op) {
3691//ZZ                case Iop_VDup8x8: imm4 = (index << 1) + 1; break;
3692//ZZ                case Iop_VDup16x4: imm4 = (index << 2) + 2; break;
3693//ZZ                case Iop_VDup32x2: imm4 = (index << 3) + 4; break;
3694//ZZ                default: vassert(0);
3695//ZZ             }
3696//ZZ             if (imm4 >= 16) {
3697//ZZ                vpanic("ARM supports Iop_VDup with constant "
3698//ZZ                       "second argument less than 16 only\n");
3699//ZZ             }
3700//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
3701//ZZ                                           res, argL, imm4, False));
3702//ZZ             return res;
3703//ZZ          }
3704//ZZ          */
3705//ZZ          default:
3706//ZZ             break;
3707//ZZ       }
3708//ZZ    }
3709//ZZ
3710//ZZ    /* --------- UNARY ops --------- */
3711//ZZ    if (e->tag == Iex_Unop) {
3712//ZZ       switch (e->Iex.Unop.op) {
3713//ZZ
3714//ZZ          /* 32Uto64 */
3715//ZZ          case Iop_32Uto64: {
3716//ZZ             HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3717//ZZ             HReg rHi = newVRegI(env);
3718//ZZ             HReg res = newVRegD(env);
3719//ZZ             addInstr(env, ARMInstr_Imm32(rHi, 0));
3720//ZZ             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3721//ZZ             return res;
3722//ZZ          }
3723//ZZ
3724//ZZ          /* 32Sto64 */
3725//ZZ          case Iop_32Sto64: {
3726//ZZ             HReg rLo = iselIntExpr_R(env, e->Iex.Unop.arg);
3727//ZZ             HReg rHi = newVRegI(env);
3728//ZZ             addInstr(env, mk_iMOVds_RR(rHi, rLo));
3729//ZZ             addInstr(env, ARMInstr_Shift(ARMsh_SAR, rHi, rHi, ARMRI5_I5(31)));
3730//ZZ             HReg res = newVRegD(env);
3731//ZZ             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3732//ZZ             return res;
3733//ZZ          }
3734//ZZ
3735//ZZ          /* The next 3 are pass-throughs */
3736//ZZ          /* ReinterpF64asI64 */
3737//ZZ          case Iop_ReinterpF64asI64:
3738//ZZ          /* Left64(e) */
3739//ZZ          case Iop_Left64:
3740//ZZ          /* CmpwNEZ64(e) */
3741//ZZ          case Iop_1Sto64: {
3742//ZZ             HReg rLo, rHi;
3743//ZZ             HReg res = newVRegD(env);
3744//ZZ             iselInt64Expr(&rHi, &rLo, env, e);
3745//ZZ             addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
3746//ZZ             return res;
3747//ZZ          }
3748//ZZ
3749//ZZ          case Iop_Not64: {
3750//ZZ             DECLARE_PATTERN(p_veqz_8x8);
3751//ZZ             DECLARE_PATTERN(p_veqz_16x4);
3752//ZZ             DECLARE_PATTERN(p_veqz_32x2);
3753//ZZ             DECLARE_PATTERN(p_vcge_8sx8);
3754//ZZ             DECLARE_PATTERN(p_vcge_16sx4);
3755//ZZ             DECLARE_PATTERN(p_vcge_32sx2);
3756//ZZ             DECLARE_PATTERN(p_vcge_8ux8);
3757//ZZ             DECLARE_PATTERN(p_vcge_16ux4);
3758//ZZ             DECLARE_PATTERN(p_vcge_32ux2);
3759//ZZ             DEFINE_PATTERN(p_veqz_8x8,
3760//ZZ                   unop(Iop_Not64, unop(Iop_CmpNEZ8x8, bind(0))));
3761//ZZ             DEFINE_PATTERN(p_veqz_16x4,
3762//ZZ                   unop(Iop_Not64, unop(Iop_CmpNEZ16x4, bind(0))));
3763//ZZ             DEFINE_PATTERN(p_veqz_32x2,
3764//ZZ                   unop(Iop_Not64, unop(Iop_CmpNEZ32x2, bind(0))));
3765//ZZ             DEFINE_PATTERN(p_vcge_8sx8,
3766//ZZ                   unop(Iop_Not64, binop(Iop_CmpGT8Sx8, bind(1), bind(0))));
3767//ZZ             DEFINE_PATTERN(p_vcge_16sx4,
3768//ZZ                   unop(Iop_Not64, binop(Iop_CmpGT16Sx4, bind(1), bind(0))));
3769//ZZ             DEFINE_PATTERN(p_vcge_32sx2,
3770//ZZ                   unop(Iop_Not64, binop(Iop_CmpGT32Sx2, bind(1), bind(0))));
3771//ZZ             DEFINE_PATTERN(p_vcge_8ux8,
3772//ZZ                   unop(Iop_Not64, binop(Iop_CmpGT8Ux8, bind(1), bind(0))));
3773//ZZ             DEFINE_PATTERN(p_vcge_16ux4,
3774//ZZ                   unop(Iop_Not64, binop(Iop_CmpGT16Ux4, bind(1), bind(0))));
3775//ZZ             DEFINE_PATTERN(p_vcge_32ux2,
3776//ZZ                   unop(Iop_Not64, binop(Iop_CmpGT32Ux2, bind(1), bind(0))));
3777//ZZ             if (matchIRExpr(&mi, p_veqz_8x8, e)) {
3778//ZZ                HReg res = newVRegD(env);
3779//ZZ                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3780//ZZ                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, False));
3781//ZZ                return res;
3782//ZZ             } else if (matchIRExpr(&mi, p_veqz_16x4, e)) {
3783//ZZ                HReg res = newVRegD(env);
3784//ZZ                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3785//ZZ                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, False));
3786//ZZ                return res;
3787//ZZ             } else if (matchIRExpr(&mi, p_veqz_32x2, e)) {
3788//ZZ                HReg res = newVRegD(env);
3789//ZZ                HReg arg = iselNeon64Expr(env, mi.bindee[0]);
3790//ZZ                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, False));
3791//ZZ                return res;
3792//ZZ             } else if (matchIRExpr(&mi, p_vcge_8sx8, e)) {
3793//ZZ                HReg res = newVRegD(env);
3794//ZZ                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3795//ZZ                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3796//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3797//ZZ                                               res, argL, argR, 0, False));
3798//ZZ                return res;
3799//ZZ             } else if (matchIRExpr(&mi, p_vcge_16sx4, e)) {
3800//ZZ                HReg res = newVRegD(env);
3801//ZZ                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3802//ZZ                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3803//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3804//ZZ                                               res, argL, argR, 1, False));
3805//ZZ                return res;
3806//ZZ             } else if (matchIRExpr(&mi, p_vcge_32sx2, e)) {
3807//ZZ                HReg res = newVRegD(env);
3808//ZZ                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3809//ZZ                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3810//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
3811//ZZ                                               res, argL, argR, 2, False));
3812//ZZ                return res;
3813//ZZ             } else if (matchIRExpr(&mi, p_vcge_8ux8, e)) {
3814//ZZ                HReg res = newVRegD(env);
3815//ZZ                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3816//ZZ                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3817//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3818//ZZ                                               res, argL, argR, 0, False));
3819//ZZ                return res;
3820//ZZ             } else if (matchIRExpr(&mi, p_vcge_16ux4, e)) {
3821//ZZ                HReg res = newVRegD(env);
3822//ZZ                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3823//ZZ                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3824//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3825//ZZ                                               res, argL, argR, 1, False));
3826//ZZ                return res;
3827//ZZ             } else if (matchIRExpr(&mi, p_vcge_32ux2, e)) {
3828//ZZ                HReg res = newVRegD(env);
3829//ZZ                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
3830//ZZ                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
3831//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
3832//ZZ                                               res, argL, argR, 2, False));
3833//ZZ                return res;
3834//ZZ             } else {
3835//ZZ                HReg res = newVRegD(env);
3836//ZZ                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3837//ZZ                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, False));
3838//ZZ                return res;
3839//ZZ             }
3840//ZZ          }
3841//ZZ          case Iop_Dup8x8:
3842//ZZ          case Iop_Dup16x4:
3843//ZZ          case Iop_Dup32x2: {
3844//ZZ             HReg res, arg;
3845//ZZ             UInt size;
3846//ZZ             DECLARE_PATTERN(p_vdup_8x8);
3847//ZZ             DECLARE_PATTERN(p_vdup_16x4);
3848//ZZ             DECLARE_PATTERN(p_vdup_32x2);
3849//ZZ             DEFINE_PATTERN(p_vdup_8x8,
3850//ZZ                   unop(Iop_Dup8x8, binop(Iop_GetElem8x8, bind(0), bind(1))));
3851//ZZ             DEFINE_PATTERN(p_vdup_16x4,
3852//ZZ                   unop(Iop_Dup16x4, binop(Iop_GetElem16x4, bind(0), bind(1))));
3853//ZZ             DEFINE_PATTERN(p_vdup_32x2,
3854//ZZ                   unop(Iop_Dup32x2, binop(Iop_GetElem32x2, bind(0), bind(1))));
3855//ZZ             if (matchIRExpr(&mi, p_vdup_8x8, e)) {
3856//ZZ                UInt index;
3857//ZZ                UInt imm4;
3858//ZZ                if (mi.bindee[1]->tag == Iex_Const &&
3859//ZZ                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3860//ZZ                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3861//ZZ                   imm4 = (index << 1) + 1;
3862//ZZ                   if (index < 8) {
3863//ZZ                      res = newVRegD(env);
3864//ZZ                      arg = iselNeon64Expr(env, mi.bindee[0]);
3865//ZZ                      addInstr(env, ARMInstr_NUnaryS(
3866//ZZ                                       ARMneon_VDUP,
3867//ZZ                                       mkARMNRS(ARMNRS_Reg, res, 0),
3868//ZZ                                       mkARMNRS(ARMNRS_Scalar, arg, index),
3869//ZZ                                       imm4, False
3870//ZZ                              ));
3871//ZZ                      return res;
3872//ZZ                   }
3873//ZZ                }
3874//ZZ             } else if (matchIRExpr(&mi, p_vdup_16x4, e)) {
3875//ZZ                UInt index;
3876//ZZ                UInt imm4;
3877//ZZ                if (mi.bindee[1]->tag == Iex_Const &&
3878//ZZ                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3879//ZZ                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3880//ZZ                   imm4 = (index << 2) + 2;
3881//ZZ                   if (index < 4) {
3882//ZZ                      res = newVRegD(env);
3883//ZZ                      arg = iselNeon64Expr(env, mi.bindee[0]);
3884//ZZ                      addInstr(env, ARMInstr_NUnaryS(
3885//ZZ                                       ARMneon_VDUP,
3886//ZZ                                       mkARMNRS(ARMNRS_Reg, res, 0),
3887//ZZ                                       mkARMNRS(ARMNRS_Scalar, arg, index),
3888//ZZ                                       imm4, False
3889//ZZ                              ));
3890//ZZ                      return res;
3891//ZZ                   }
3892//ZZ                }
3893//ZZ             } else if (matchIRExpr(&mi, p_vdup_32x2, e)) {
3894//ZZ                UInt index;
3895//ZZ                UInt imm4;
3896//ZZ                if (mi.bindee[1]->tag == Iex_Const &&
3897//ZZ                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
3898//ZZ                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
3899//ZZ                   imm4 = (index << 3) + 4;
3900//ZZ                   if (index < 2) {
3901//ZZ                      res = newVRegD(env);
3902//ZZ                      arg = iselNeon64Expr(env, mi.bindee[0]);
3903//ZZ                      addInstr(env, ARMInstr_NUnaryS(
3904//ZZ                                       ARMneon_VDUP,
3905//ZZ                                       mkARMNRS(ARMNRS_Reg, res, 0),
3906//ZZ                                       mkARMNRS(ARMNRS_Scalar, arg, index),
3907//ZZ                                       imm4, False
3908//ZZ                              ));
3909//ZZ                      return res;
3910//ZZ                   }
3911//ZZ                }
3912//ZZ             }
3913//ZZ             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
3914//ZZ             res = newVRegD(env);
3915//ZZ             switch (e->Iex.Unop.op) {
3916//ZZ                case Iop_Dup8x8: size = 0; break;
3917//ZZ                case Iop_Dup16x4: size = 1; break;
3918//ZZ                case Iop_Dup32x2: size = 2; break;
3919//ZZ                default: vassert(0);
3920//ZZ             }
3921//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, False));
3922//ZZ             return res;
3923//ZZ          }
3924//ZZ          case Iop_Abs8x8:
3925//ZZ          case Iop_Abs16x4:
3926//ZZ          case Iop_Abs32x2: {
3927//ZZ             HReg res = newVRegD(env);
3928//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3929//ZZ             UInt size = 0;
3930//ZZ             switch(e->Iex.Binop.op) {
3931//ZZ                case Iop_Abs8x8: size = 0; break;
3932//ZZ                case Iop_Abs16x4: size = 1; break;
3933//ZZ                case Iop_Abs32x2: size = 2; break;
3934//ZZ                default: vassert(0);
3935//ZZ             }
3936//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, False));
3937//ZZ             return res;
3938//ZZ          }
3939//ZZ          case Iop_Reverse64_8x8:
3940//ZZ          case Iop_Reverse64_16x4:
3941//ZZ          case Iop_Reverse64_32x2: {
3942//ZZ             HReg res = newVRegD(env);
3943//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3944//ZZ             UInt size = 0;
3945//ZZ             switch(e->Iex.Binop.op) {
3946//ZZ                case Iop_Reverse64_8x8: size = 0; break;
3947//ZZ                case Iop_Reverse64_16x4: size = 1; break;
3948//ZZ                case Iop_Reverse64_32x2: size = 2; break;
3949//ZZ                default: vassert(0);
3950//ZZ             }
3951//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
3952//ZZ                                           res, arg, size, False));
3953//ZZ             return res;
3954//ZZ          }
3955//ZZ          case Iop_Reverse32_8x8:
3956//ZZ          case Iop_Reverse32_16x4: {
3957//ZZ             HReg res = newVRegD(env);
3958//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3959//ZZ             UInt size = 0;
3960//ZZ             switch(e->Iex.Binop.op) {
3961//ZZ                case Iop_Reverse32_8x8: size = 0; break;
3962//ZZ                case Iop_Reverse32_16x4: size = 1; break;
3963//ZZ                default: vassert(0);
3964//ZZ             }
3965//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
3966//ZZ                                           res, arg, size, False));
3967//ZZ             return res;
3968//ZZ          }
3969//ZZ          case Iop_Reverse16_8x8: {
3970//ZZ             HReg res = newVRegD(env);
3971//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3972//ZZ             UInt size = 0;
3973//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
3974//ZZ                                           res, arg, size, False));
3975//ZZ             return res;
3976//ZZ          }
3977//ZZ          case Iop_CmpwNEZ64: {
3978//ZZ             HReg x_lsh = newVRegD(env);
3979//ZZ             HReg x_rsh = newVRegD(env);
3980//ZZ             HReg lsh_amt = newVRegD(env);
3981//ZZ             HReg rsh_amt = newVRegD(env);
3982//ZZ             HReg zero = newVRegD(env);
3983//ZZ             HReg tmp = newVRegD(env);
3984//ZZ             HReg tmp2 = newVRegD(env);
3985//ZZ             HReg res = newVRegD(env);
3986//ZZ             HReg x = newVRegD(env);
3987//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
3988//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, False));
3989//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, False));
3990//ZZ             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
3991//ZZ             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
3992//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
3993//ZZ                                            rsh_amt, zero, lsh_amt, 2, False));
3994//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3995//ZZ                                           x_lsh, x, lsh_amt, 3, False));
3996//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
3997//ZZ                                           x_rsh, x, rsh_amt, 3, False));
3998//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
3999//ZZ                                            tmp, x_lsh, x_rsh, 0, False));
4000//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4001//ZZ                                            res, tmp, x, 0, False));
4002//ZZ             return res;
4003//ZZ          }
4004//ZZ          case Iop_CmpNEZ8x8:
4005//ZZ          case Iop_CmpNEZ16x4:
4006//ZZ          case Iop_CmpNEZ32x2: {
4007//ZZ             HReg res = newVRegD(env);
4008//ZZ             HReg tmp = newVRegD(env);
4009//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4010//ZZ             UInt size;
4011//ZZ             switch (e->Iex.Unop.op) {
4012//ZZ                case Iop_CmpNEZ8x8: size = 0; break;
4013//ZZ                case Iop_CmpNEZ16x4: size = 1; break;
4014//ZZ                case Iop_CmpNEZ32x2: size = 2; break;
4015//ZZ                default: vassert(0);
4016//ZZ             }
4017//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp, arg, size, False));
4018//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
4019//ZZ             return res;
4020//ZZ          }
4021//ZZ          case Iop_NarrowUn16to8x8:
4022//ZZ          case Iop_NarrowUn32to16x4:
4023//ZZ          case Iop_NarrowUn64to32x2: {
4024//ZZ             HReg res = newVRegD(env);
4025//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4026//ZZ             UInt size = 0;
4027//ZZ             switch(e->Iex.Binop.op) {
4028//ZZ                case Iop_NarrowUn16to8x8:  size = 0; break;
4029//ZZ                case Iop_NarrowUn32to16x4: size = 1; break;
4030//ZZ                case Iop_NarrowUn64to32x2: size = 2; break;
4031//ZZ                default: vassert(0);
4032//ZZ             }
4033//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
4034//ZZ                                           res, arg, size, False));
4035//ZZ             return res;
4036//ZZ          }
4037//ZZ          case Iop_QNarrowUn16Sto8Sx8:
4038//ZZ          case Iop_QNarrowUn32Sto16Sx4:
4039//ZZ          case Iop_QNarrowUn64Sto32Sx2: {
4040//ZZ             HReg res = newVRegD(env);
4041//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4042//ZZ             UInt size = 0;
4043//ZZ             switch(e->Iex.Binop.op) {
4044//ZZ                case Iop_QNarrowUn16Sto8Sx8:  size = 0; break;
4045//ZZ                case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
4046//ZZ                case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
4047//ZZ                default: vassert(0);
4048//ZZ             }
4049//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
4050//ZZ                                           res, arg, size, False));
4051//ZZ             return res;
4052//ZZ          }
4053//ZZ          case Iop_QNarrowUn16Sto8Ux8:
4054//ZZ          case Iop_QNarrowUn32Sto16Ux4:
4055//ZZ          case Iop_QNarrowUn64Sto32Ux2: {
4056//ZZ             HReg res = newVRegD(env);
4057//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4058//ZZ             UInt size = 0;
4059//ZZ             switch(e->Iex.Binop.op) {
4060//ZZ                case Iop_QNarrowUn16Sto8Ux8:  size = 0; break;
4061//ZZ                case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
4062//ZZ                case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
4063//ZZ                default: vassert(0);
4064//ZZ             }
4065//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
4066//ZZ                                           res, arg, size, False));
4067//ZZ             return res;
4068//ZZ          }
4069//ZZ          case Iop_QNarrowUn16Uto8Ux8:
4070//ZZ          case Iop_QNarrowUn32Uto16Ux4:
4071//ZZ          case Iop_QNarrowUn64Uto32Ux2: {
4072//ZZ             HReg res = newVRegD(env);
4073//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4074//ZZ             UInt size = 0;
4075//ZZ             switch(e->Iex.Binop.op) {
4076//ZZ                case Iop_QNarrowUn16Uto8Ux8:  size = 0; break;
4077//ZZ                case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
4078//ZZ                case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
4079//ZZ                default: vassert(0);
4080//ZZ             }
4081//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
4082//ZZ                                           res, arg, size, False));
4083//ZZ             return res;
4084//ZZ          }
4085//ZZ          case Iop_PwAddL8Sx8:
4086//ZZ          case Iop_PwAddL16Sx4:
4087//ZZ          case Iop_PwAddL32Sx2: {
4088//ZZ             HReg res = newVRegD(env);
4089//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4090//ZZ             UInt size = 0;
4091//ZZ             switch(e->Iex.Binop.op) {
4092//ZZ                case Iop_PwAddL8Sx8: size = 0; break;
4093//ZZ                case Iop_PwAddL16Sx4: size = 1; break;
4094//ZZ                case Iop_PwAddL32Sx2: size = 2; break;
4095//ZZ                default: vassert(0);
4096//ZZ             }
4097//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4098//ZZ                                           res, arg, size, False));
4099//ZZ             return res;
4100//ZZ          }
4101//ZZ          case Iop_PwAddL8Ux8:
4102//ZZ          case Iop_PwAddL16Ux4:
4103//ZZ          case Iop_PwAddL32Ux2: {
4104//ZZ             HReg res = newVRegD(env);
4105//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4106//ZZ             UInt size = 0;
4107//ZZ             switch(e->Iex.Binop.op) {
4108//ZZ                case Iop_PwAddL8Ux8: size = 0; break;
4109//ZZ                case Iop_PwAddL16Ux4: size = 1; break;
4110//ZZ                case Iop_PwAddL32Ux2: size = 2; break;
4111//ZZ                default: vassert(0);
4112//ZZ             }
4113//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4114//ZZ                                           res, arg, size, False));
4115//ZZ             return res;
4116//ZZ          }
4117//ZZ          case Iop_Cnt8x8: {
4118//ZZ             HReg res = newVRegD(env);
4119//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4120//ZZ             UInt size = 0;
4121//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_CNT,
4122//ZZ                                           res, arg, size, False));
4123//ZZ             return res;
4124//ZZ          }
4125//ZZ          case Iop_Clz8Sx8:
4126//ZZ          case Iop_Clz16Sx4:
4127//ZZ          case Iop_Clz32Sx2: {
4128//ZZ             HReg res = newVRegD(env);
4129//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4130//ZZ             UInt size = 0;
4131//ZZ             switch(e->Iex.Binop.op) {
4132//ZZ                case Iop_Clz8Sx8: size = 0; break;
4133//ZZ                case Iop_Clz16Sx4: size = 1; break;
4134//ZZ                case Iop_Clz32Sx2: size = 2; break;
4135//ZZ                default: vassert(0);
4136//ZZ             }
4137//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ,
4138//ZZ                                           res, arg, size, False));
4139//ZZ             return res;
4140//ZZ          }
4141//ZZ          case Iop_Cls8Sx8:
4142//ZZ          case Iop_Cls16Sx4:
4143//ZZ          case Iop_Cls32Sx2: {
4144//ZZ             HReg res = newVRegD(env);
4145//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4146//ZZ             UInt size = 0;
4147//ZZ             switch(e->Iex.Binop.op) {
4148//ZZ                case Iop_Cls8Sx8: size = 0; break;
4149//ZZ                case Iop_Cls16Sx4: size = 1; break;
4150//ZZ                case Iop_Cls32Sx2: size = 2; break;
4151//ZZ                default: vassert(0);
4152//ZZ             }
4153//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_CLS,
4154//ZZ                                           res, arg, size, False));
4155//ZZ             return res;
4156//ZZ          }
4157//ZZ          case Iop_FtoI32Sx2_RZ: {
4158//ZZ             HReg res = newVRegD(env);
4159//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4160//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4161//ZZ                                           res, arg, 2, False));
4162//ZZ             return res;
4163//ZZ          }
4164//ZZ          case Iop_FtoI32Ux2_RZ: {
4165//ZZ             HReg res = newVRegD(env);
4166//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4167//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4168//ZZ                                           res, arg, 2, False));
4169//ZZ             return res;
4170//ZZ          }
4171//ZZ          case Iop_I32StoFx2: {
4172//ZZ             HReg res = newVRegD(env);
4173//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4174//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4175//ZZ                                           res, arg, 2, False));
4176//ZZ             return res;
4177//ZZ          }
4178//ZZ          case Iop_I32UtoFx2: {
4179//ZZ             HReg res = newVRegD(env);
4180//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4181//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4182//ZZ                                           res, arg, 2, False));
4183//ZZ             return res;
4184//ZZ          }
4185//ZZ          case Iop_F32toF16x4: {
4186//ZZ             HReg res = newVRegD(env);
4187//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4188//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF32toF16,
4189//ZZ                                           res, arg, 2, False));
4190//ZZ             return res;
4191//ZZ          }
4192//ZZ          case Iop_Recip32Fx2: {
4193//ZZ             HReg res = newVRegD(env);
4194//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4195//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4196//ZZ                                           res, argL, 0, False));
4197//ZZ             return res;
4198//ZZ          }
4199//ZZ          case Iop_Recip32x2: {
4200//ZZ             HReg res = newVRegD(env);
4201//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
4202//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4203//ZZ                                           res, argL, 0, False));
4204//ZZ             return res;
4205//ZZ          }
4206//ZZ          case Iop_Abs32Fx2: {
4207//ZZ             DECLARE_PATTERN(p_vabd_32fx2);
4208//ZZ             DEFINE_PATTERN(p_vabd_32fx2,
4209//ZZ                            unop(Iop_Abs32Fx2,
4210//ZZ                                 binop(Iop_Sub32Fx2,
4211//ZZ                                       bind(0),
4212//ZZ                                       bind(1))));
4213//ZZ             if (matchIRExpr(&mi, p_vabd_32fx2, e)) {
4214//ZZ                HReg res = newVRegD(env);
4215//ZZ                HReg argL = iselNeon64Expr(env, mi.bindee[0]);
4216//ZZ                HReg argR = iselNeon64Expr(env, mi.bindee[1]);
4217//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VABDFP,
4218//ZZ                                               res, argL, argR, 0, False));
4219//ZZ                return res;
4220//ZZ             } else {
4221//ZZ                HReg res = newVRegD(env);
4222//ZZ                HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4223//ZZ                addInstr(env, ARMInstr_NUnary(ARMneon_VABSFP,
4224//ZZ                                              res, arg, 0, False));
4225//ZZ                return res;
4226//ZZ             }
4227//ZZ          }
4228//ZZ          case Iop_Rsqrte32Fx2: {
4229//ZZ             HReg res = newVRegD(env);
4230//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4231//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4232//ZZ                                           res, arg, 0, False));
4233//ZZ             return res;
4234//ZZ          }
4235//ZZ          case Iop_Rsqrte32x2: {
4236//ZZ             HReg res = newVRegD(env);
4237//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4238//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4239//ZZ                                           res, arg, 0, False));
4240//ZZ             return res;
4241//ZZ          }
4242//ZZ          case Iop_Neg32Fx2: {
4243//ZZ             HReg res = newVRegD(env);
4244//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4245//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VNEGF,
4246//ZZ                                           res, arg, 0, False));
4247//ZZ             return res;
4248//ZZ          }
4249//ZZ          default:
4250//ZZ             break;
4251//ZZ       }
4252//ZZ    } /* if (e->tag == Iex_Unop) */
4253//ZZ
4254//ZZ    if (e->tag == Iex_Triop) {
4255//ZZ       IRTriop *triop = e->Iex.Triop.details;
4256//ZZ
4257//ZZ       switch (triop->op) {
4258//ZZ          case Iop_Extract64: {
4259//ZZ             HReg res = newVRegD(env);
4260//ZZ             HReg argL = iselNeon64Expr(env, triop->arg1);
4261//ZZ             HReg argR = iselNeon64Expr(env, triop->arg2);
4262//ZZ             UInt imm4;
4263//ZZ             if (triop->arg3->tag != Iex_Const ||
4264//ZZ                 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
4265//ZZ                vpanic("ARM target supports Iop_Extract64 with constant "
4266//ZZ                       "third argument less than 16 only\n");
4267//ZZ             }
4268//ZZ             imm4 = triop->arg3->Iex.Const.con->Ico.U8;
4269//ZZ             if (imm4 >= 8) {
4270//ZZ                vpanic("ARM target supports Iop_Extract64 with constant "
4271//ZZ                       "third argument less than 16 only\n");
4272//ZZ             }
4273//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
4274//ZZ                                            res, argL, argR, imm4, False));
4275//ZZ             return res;
4276//ZZ          }
4277//ZZ          case Iop_SetElem8x8:
4278//ZZ          case Iop_SetElem16x4:
4279//ZZ          case Iop_SetElem32x2: {
4280//ZZ             HReg res = newVRegD(env);
4281//ZZ             HReg dreg = iselNeon64Expr(env, triop->arg1);
4282//ZZ             HReg arg = iselIntExpr_R(env, triop->arg3);
4283//ZZ             UInt index, size;
4284//ZZ             if (triop->arg2->tag != Iex_Const ||
4285//ZZ                 typeOfIRExpr(env->type_env, triop->arg2) != Ity_I8) {
4286//ZZ                vpanic("ARM target supports SetElem with constant "
4287//ZZ                       "second argument only\n");
4288//ZZ             }
4289//ZZ             index = triop->arg2->Iex.Const.con->Ico.U8;
4290//ZZ             switch (triop->op) {
4291//ZZ                case Iop_SetElem8x8: vassert(index < 8); size = 0; break;
4292//ZZ                case Iop_SetElem16x4: vassert(index < 4); size = 1; break;
4293//ZZ                case Iop_SetElem32x2: vassert(index < 2); size = 2; break;
4294//ZZ                default: vassert(0);
4295//ZZ             }
4296//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, res, dreg, 4, False));
4297//ZZ             addInstr(env, ARMInstr_NUnaryS(ARMneon_SETELEM,
4298//ZZ                                            mkARMNRS(ARMNRS_Scalar, res, index),
4299//ZZ                                            mkARMNRS(ARMNRS_Reg, arg, 0),
4300//ZZ                                            size, False));
4301//ZZ             return res;
4302//ZZ          }
4303//ZZ          default:
4304//ZZ             break;
4305//ZZ       }
4306//ZZ    }
4307//ZZ
4308//ZZ    /* --------- MULTIPLEX --------- */
4309//ZZ    if (e->tag == Iex_ITE) { // VFD
4310//ZZ       HReg rLo, rHi;
4311//ZZ       HReg res = newVRegD(env);
4312//ZZ       iselInt64Expr(&rHi, &rLo, env, e);
4313//ZZ       addInstr(env, ARMInstr_VXferD(True/*toD*/, res, rHi, rLo));
4314//ZZ       return res;
4315//ZZ    }
4316//ZZ
4317//ZZ    ppIRExpr(e);
4318//ZZ    vpanic("iselNeon64Expr");
4319//ZZ }
4320
4321
4322/*---------------------------------------------------------*/
4323/*--- ISEL: Vector (NEON) expressions (128 bit)         ---*/
4324/*---------------------------------------------------------*/
4325
4326static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
4327{
4328   HReg r = iselV128Expr_wrk( env, e );
4329   vassert(hregClass(r) == HRcVec128);
4330   vassert(hregIsVirtual(r));
4331   return r;
4332}
4333
4334/* DO NOT CALL THIS DIRECTLY */
4335static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
4336{
4337   IRType ty = typeOfIRExpr(env->type_env, e);
4338   vassert(e);
4339   vassert(ty == Ity_V128);
4340
4341   if (e->tag == Iex_RdTmp) {
4342      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
4343   }
4344
4345   if (e->tag == Iex_Const) {
4346      /* Only a very limited range of constants is handled. */
4347      vassert(e->Iex.Const.con->tag == Ico_V128);
4348      UShort con = e->Iex.Const.con->Ico.V128;
4349      if (con == 0x0000) {
4350         HReg res = newVRegV(env);
4351         addInstr(env, ARM64Instr_VImmQ(res, con));
4352         return res;
4353      }
4354      /* Unhandled */
4355      goto v128_expr_bad;
4356   }
4357
4358   if (e->tag == Iex_Load) {
4359      HReg res = newVRegV(env);
4360      HReg rN  = iselIntExpr_R(env, e->Iex.Load.addr);
4361      vassert(ty == Ity_V128);
4362      addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
4363      return res;
4364   }
4365
4366   if (e->tag == Iex_Get) {
4367      UInt offs = (UInt)e->Iex.Get.offset;
4368      if (offs < (1<<12)) {
4369         HReg addr = mk_baseblock_128bit_access_addr(env, offs);
4370         HReg res  = newVRegV(env);
4371         vassert(ty == Ity_V128);
4372         addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
4373         return res;
4374      }
4375      goto v128_expr_bad;
4376   }
4377
4378   if (e->tag == Iex_Unop) {
4379
4380     /* Iop_ZeroHIXXofV128 cases */
4381      UShort imm16 = 0;
4382      switch (e->Iex.Unop.op) {
4383         case Iop_ZeroHI64ofV128:  imm16 = 0x00FF; break;
4384         case Iop_ZeroHI96ofV128:  imm16 = 0x000F; break;
4385         case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
4386         case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
4387         default: break;
4388      }
4389      if (imm16 != 0) {
4390         HReg src = iselV128Expr(env, e->Iex.Unop.arg);
4391         HReg imm = newVRegV(env);
4392         HReg res = newVRegV(env);
4393         addInstr(env, ARM64Instr_VImmQ(imm, imm16));
4394         addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
4395         return res;
4396      }
4397
4398      /* Other cases */
4399      switch (e->Iex.Unop.op) {
4400         case Iop_Cnt8x16:
4401         case Iop_NotV128:
4402         case Iop_AddLV8Ux16:
4403         case Iop_AddLV16Ux8:
4404         case Iop_AddLV32Ux4:
4405         case Iop_AddLV8Sx16:
4406         case Iop_AddLV16Sx8:
4407         case Iop_AddLV32Sx4:
4408         case Iop_Abs64Fx2:
4409         case Iop_Abs32Fx4:
4410         case Iop_Neg64Fx2:
4411         case Iop_Neg32Fx4: {
4412            HReg res = newVRegV(env);
4413            HReg arg = iselV128Expr(env, e->Iex.Unop.arg);
4414            ARM64VecUnaryOp op = ARM64vecu_INVALID;
4415            switch (e->Iex.Unop.op) {
4416               case Iop_NotV128:  op = ARM64vecu_NOT;      break;
4417               case Iop_Cnt8x16:  op = ARM64vecu_CNT;      break;
4418               case Iop_Abs64Fx2: op = ARM64vecu_FABS64x2; break;
4419               case Iop_Abs32Fx4: op = ARM64vecu_FABS32x4; break;
4420               case Iop_Neg64Fx2: op = ARM64vecu_FNEG64x2; break;
4421               case Iop_Neg32Fx4: op = ARM64vecu_FNEG32x4; break;
4422               case Iop_AddLV8Ux16: op = ARM64vecu_UADDLV8x16; break;
4423               case Iop_AddLV16Ux8: op = ARM64vecu_UADDLV16x8; break;
4424               case Iop_AddLV32Ux4: op = ARM64vecu_UADDLV32x4; break;
4425               case Iop_AddLV8Sx16: op = ARM64vecu_SADDLV8x16; break;
4426               case Iop_AddLV16Sx8: op = ARM64vecu_SADDLV16x8; break;
4427               case Iop_AddLV32Sx4: op = ARM64vecu_SADDLV32x4; break;
4428               default: vassert(0);
4429            }
4430            addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
4431            return res;
4432         }
4433         case Iop_CmpNEZ8x16:
4434         case Iop_CmpNEZ16x8:
4435         case Iop_CmpNEZ32x4:
4436         case Iop_CmpNEZ64x2: {
4437            HReg arg  = iselV128Expr(env, e->Iex.Unop.arg);
4438            HReg zero = newVRegV(env);
4439            HReg res  = newVRegV(env);
4440            ARM64VecBinOp cmp = ARM64vecb_INVALID;
4441            switch (e->Iex.Unop.op) {
4442               case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
4443               case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
4444               case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
4445               case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
4446               default: vassert(0);
4447            }
4448            // This is pretty feeble.  Better: use CMP against zero
4449            // and avoid the extra instruction and extra register.
4450            addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
4451            addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
4452            addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
4453            return res;
4454         }
4455
4456         case Iop_Widen8Uto16x8:
4457         case Iop_Widen16Uto32x4:
4458         case Iop_Widen32Uto64x2:
4459         case Iop_Widen8Sto16x8:
4460         case Iop_Widen16Sto32x4:
4461         case Iop_Widen32Sto64x2: {
4462            HReg res = newVRegV(env);
4463            HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4464            ARM64VecUnaryOp wop = ARM64vecu_INVALID;
4465            switch (e->Iex.Unop.op) {
4466               case Iop_Widen8Uto16x8:  wop = ARM64vecu_VMOVL8U; break;
4467               case Iop_Widen16Uto32x4: wop = ARM64vecu_VMOVL16U; break;
4468               case Iop_Widen32Uto64x2: wop = ARM64vecu_VMOVL32U; break;
4469               case Iop_Widen8Sto16x8:  wop = ARM64vecu_VMOVL8S; break;
4470               case Iop_Widen16Sto32x4: wop = ARM64vecu_VMOVL16S; break;
4471               case Iop_Widen32Sto64x2: wop = ARM64vecu_VMOVL32S; break;
4472               default: vassert(0);
4473            }
4474            addInstr(env, ARM64Instr_VUnaryV(wop, res, arg));
4475            return res;
4476         }
4477//ZZ          case Iop_NotV128: {
4478//ZZ             DECLARE_PATTERN(p_veqz_8x16);
4479//ZZ             DECLARE_PATTERN(p_veqz_16x8);
4480//ZZ             DECLARE_PATTERN(p_veqz_32x4);
4481//ZZ             DECLARE_PATTERN(p_vcge_8sx16);
4482//ZZ             DECLARE_PATTERN(p_vcge_16sx8);
4483//ZZ             DECLARE_PATTERN(p_vcge_32sx4);
4484//ZZ             DECLARE_PATTERN(p_vcge_8ux16);
4485//ZZ             DECLARE_PATTERN(p_vcge_16ux8);
4486//ZZ             DECLARE_PATTERN(p_vcge_32ux4);
4487//ZZ             DEFINE_PATTERN(p_veqz_8x16,
4488//ZZ                   unop(Iop_NotV128, unop(Iop_CmpNEZ8x16, bind(0))));
4489//ZZ             DEFINE_PATTERN(p_veqz_16x8,
4490//ZZ                   unop(Iop_NotV128, unop(Iop_CmpNEZ16x8, bind(0))));
4491//ZZ             DEFINE_PATTERN(p_veqz_32x4,
4492//ZZ                   unop(Iop_NotV128, unop(Iop_CmpNEZ32x4, bind(0))));
4493//ZZ             DEFINE_PATTERN(p_vcge_8sx16,
4494//ZZ                   unop(Iop_NotV128, binop(Iop_CmpGT8Sx16, bind(1), bind(0))));
4495//ZZ             DEFINE_PATTERN(p_vcge_16sx8,
4496//ZZ                   unop(Iop_NotV128, binop(Iop_CmpGT16Sx8, bind(1), bind(0))));
4497//ZZ             DEFINE_PATTERN(p_vcge_32sx4,
4498//ZZ                   unop(Iop_NotV128, binop(Iop_CmpGT32Sx4, bind(1), bind(0))));
4499//ZZ             DEFINE_PATTERN(p_vcge_8ux16,
4500//ZZ                   unop(Iop_NotV128, binop(Iop_CmpGT8Ux16, bind(1), bind(0))));
4501//ZZ             DEFINE_PATTERN(p_vcge_16ux8,
4502//ZZ                   unop(Iop_NotV128, binop(Iop_CmpGT16Ux8, bind(1), bind(0))));
4503//ZZ             DEFINE_PATTERN(p_vcge_32ux4,
4504//ZZ                   unop(Iop_NotV128, binop(Iop_CmpGT32Ux4, bind(1), bind(0))));
4505//ZZ             if (matchIRExpr(&mi, p_veqz_8x16, e)) {
4506//ZZ                HReg res = newVRegV(env);
4507//ZZ                HReg arg = iselNeonExpr(env, mi.bindee[0]);
4508//ZZ                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 0, True));
4509//ZZ                return res;
4510//ZZ             } else if (matchIRExpr(&mi, p_veqz_16x8, e)) {
4511//ZZ                HReg res = newVRegV(env);
4512//ZZ                HReg arg = iselNeonExpr(env, mi.bindee[0]);
4513//ZZ                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 1, True));
4514//ZZ                return res;
4515//ZZ             } else if (matchIRExpr(&mi, p_veqz_32x4, e)) {
4516//ZZ                HReg res = newVRegV(env);
4517//ZZ                HReg arg = iselNeonExpr(env, mi.bindee[0]);
4518//ZZ                addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, res, arg, 2, True));
4519//ZZ                return res;
4520//ZZ             } else if (matchIRExpr(&mi, p_vcge_8sx16, e)) {
4521//ZZ                HReg res = newVRegV(env);
4522//ZZ                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4523//ZZ                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4524//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4525//ZZ                                               res, argL, argR, 0, True));
4526//ZZ                return res;
4527//ZZ             } else if (matchIRExpr(&mi, p_vcge_16sx8, e)) {
4528//ZZ                HReg res = newVRegV(env);
4529//ZZ                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4530//ZZ                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4531//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4532//ZZ                                               res, argL, argR, 1, True));
4533//ZZ                return res;
4534//ZZ             } else if (matchIRExpr(&mi, p_vcge_32sx4, e)) {
4535//ZZ                HReg res = newVRegV(env);
4536//ZZ                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4537//ZZ                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4538//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGES,
4539//ZZ                                               res, argL, argR, 2, True));
4540//ZZ                return res;
4541//ZZ             } else if (matchIRExpr(&mi, p_vcge_8ux16, e)) {
4542//ZZ                HReg res = newVRegV(env);
4543//ZZ                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4544//ZZ                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4545//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4546//ZZ                                               res, argL, argR, 0, True));
4547//ZZ                return res;
4548//ZZ             } else if (matchIRExpr(&mi, p_vcge_16ux8, e)) {
4549//ZZ                HReg res = newVRegV(env);
4550//ZZ                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4551//ZZ                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4552//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4553//ZZ                                               res, argL, argR, 1, True));
4554//ZZ                return res;
4555//ZZ             } else if (matchIRExpr(&mi, p_vcge_32ux4, e)) {
4556//ZZ                HReg res = newVRegV(env);
4557//ZZ                HReg argL = iselNeonExpr(env, mi.bindee[0]);
4558//ZZ                HReg argR = iselNeonExpr(env, mi.bindee[1]);
4559//ZZ                addInstr(env, ARMInstr_NBinary(ARMneon_VCGEU,
4560//ZZ                                               res, argL, argR, 2, True));
4561//ZZ                return res;
4562//ZZ             } else {
4563//ZZ                HReg res = newVRegV(env);
4564//ZZ                HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4565//ZZ                addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, arg, 4, True));
4566//ZZ                return res;
4567//ZZ             }
4568//ZZ          }
4569//ZZ          case Iop_Dup8x16:
4570//ZZ          case Iop_Dup16x8:
4571//ZZ          case Iop_Dup32x4: {
4572//ZZ             HReg res, arg;
4573//ZZ             UInt size;
4574//ZZ             DECLARE_PATTERN(p_vdup_8x16);
4575//ZZ             DECLARE_PATTERN(p_vdup_16x8);
4576//ZZ             DECLARE_PATTERN(p_vdup_32x4);
4577//ZZ             DEFINE_PATTERN(p_vdup_8x16,
4578//ZZ                   unop(Iop_Dup8x16, binop(Iop_GetElem8x8, bind(0), bind(1))));
4579//ZZ             DEFINE_PATTERN(p_vdup_16x8,
4580//ZZ                   unop(Iop_Dup16x8, binop(Iop_GetElem16x4, bind(0), bind(1))));
4581//ZZ             DEFINE_PATTERN(p_vdup_32x4,
4582//ZZ                   unop(Iop_Dup32x4, binop(Iop_GetElem32x2, bind(0), bind(1))));
4583//ZZ             if (matchIRExpr(&mi, p_vdup_8x16, e)) {
4584//ZZ                UInt index;
4585//ZZ                UInt imm4;
4586//ZZ                if (mi.bindee[1]->tag == Iex_Const &&
4587//ZZ                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4588//ZZ                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4589//ZZ                   imm4 = (index << 1) + 1;
4590//ZZ                   if (index < 8) {
4591//ZZ                      res = newVRegV(env);
4592//ZZ                      arg = iselNeon64Expr(env, mi.bindee[0]);
4593//ZZ                      addInstr(env, ARMInstr_NUnaryS(
4594//ZZ                                       ARMneon_VDUP,
4595//ZZ                                       mkARMNRS(ARMNRS_Reg, res, 0),
4596//ZZ                                       mkARMNRS(ARMNRS_Scalar, arg, index),
4597//ZZ                                       imm4, True
4598//ZZ                              ));
4599//ZZ                      return res;
4600//ZZ                   }
4601//ZZ                }
4602//ZZ             } else if (matchIRExpr(&mi, p_vdup_16x8, e)) {
4603//ZZ                UInt index;
4604//ZZ                UInt imm4;
4605//ZZ                if (mi.bindee[1]->tag == Iex_Const &&
4606//ZZ                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4607//ZZ                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4608//ZZ                   imm4 = (index << 2) + 2;
4609//ZZ                   if (index < 4) {
4610//ZZ                      res = newVRegV(env);
4611//ZZ                      arg = iselNeon64Expr(env, mi.bindee[0]);
4612//ZZ                      addInstr(env, ARMInstr_NUnaryS(
4613//ZZ                                       ARMneon_VDUP,
4614//ZZ                                       mkARMNRS(ARMNRS_Reg, res, 0),
4615//ZZ                                       mkARMNRS(ARMNRS_Scalar, arg, index),
4616//ZZ                                       imm4, True
4617//ZZ                              ));
4618//ZZ                      return res;
4619//ZZ                   }
4620//ZZ                }
4621//ZZ             } else if (matchIRExpr(&mi, p_vdup_32x4, e)) {
4622//ZZ                UInt index;
4623//ZZ                UInt imm4;
4624//ZZ                if (mi.bindee[1]->tag == Iex_Const &&
4625//ZZ                   typeOfIRExpr(env->type_env, mi.bindee[1]) == Ity_I8) {
4626//ZZ                   index = mi.bindee[1]->Iex.Const.con->Ico.U8;
4627//ZZ                   imm4 = (index << 3) + 4;
4628//ZZ                   if (index < 2) {
4629//ZZ                      res = newVRegV(env);
4630//ZZ                      arg = iselNeon64Expr(env, mi.bindee[0]);
4631//ZZ                      addInstr(env, ARMInstr_NUnaryS(
4632//ZZ                                       ARMneon_VDUP,
4633//ZZ                                       mkARMNRS(ARMNRS_Reg, res, 0),
4634//ZZ                                       mkARMNRS(ARMNRS_Scalar, arg, index),
4635//ZZ                                       imm4, True
4636//ZZ                              ));
4637//ZZ                      return res;
4638//ZZ                   }
4639//ZZ                }
4640//ZZ             }
4641//ZZ             arg = iselIntExpr_R(env, e->Iex.Unop.arg);
4642//ZZ             res = newVRegV(env);
4643//ZZ             switch (e->Iex.Unop.op) {
4644//ZZ                case Iop_Dup8x16: size = 0; break;
4645//ZZ                case Iop_Dup16x8: size = 1; break;
4646//ZZ                case Iop_Dup32x4: size = 2; break;
4647//ZZ                default: vassert(0);
4648//ZZ             }
4649//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_DUP, res, arg, size, True));
4650//ZZ             return res;
4651//ZZ          }
4652//ZZ          case Iop_Abs8x16:
4653//ZZ          case Iop_Abs16x8:
4654//ZZ          case Iop_Abs32x4: {
4655//ZZ             HReg res = newVRegV(env);
4656//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4657//ZZ             UInt size = 0;
4658//ZZ             switch(e->Iex.Binop.op) {
4659//ZZ                case Iop_Abs8x16: size = 0; break;
4660//ZZ                case Iop_Abs16x8: size = 1; break;
4661//ZZ                case Iop_Abs32x4: size = 2; break;
4662//ZZ                default: vassert(0);
4663//ZZ             }
4664//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_ABS, res, arg, size, True));
4665//ZZ             return res;
4666//ZZ          }
4667//ZZ          case Iop_Reverse64_8x16:
4668//ZZ          case Iop_Reverse64_16x8:
4669//ZZ          case Iop_Reverse64_32x4: {
4670//ZZ             HReg res = newVRegV(env);
4671//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4672//ZZ             UInt size = 0;
4673//ZZ             switch(e->Iex.Binop.op) {
4674//ZZ                case Iop_Reverse64_8x16: size = 0; break;
4675//ZZ                case Iop_Reverse64_16x8: size = 1; break;
4676//ZZ                case Iop_Reverse64_32x4: size = 2; break;
4677//ZZ                default: vassert(0);
4678//ZZ             }
4679//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_REV64,
4680//ZZ                                           res, arg, size, True));
4681//ZZ             return res;
4682//ZZ          }
4683//ZZ          case Iop_Reverse32_8x16:
4684//ZZ          case Iop_Reverse32_16x8: {
4685//ZZ             HReg res = newVRegV(env);
4686//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4687//ZZ             UInt size = 0;
4688//ZZ             switch(e->Iex.Binop.op) {
4689//ZZ                case Iop_Reverse32_8x16: size = 0; break;
4690//ZZ                case Iop_Reverse32_16x8: size = 1; break;
4691//ZZ                default: vassert(0);
4692//ZZ             }
4693//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_REV32,
4694//ZZ                                           res, arg, size, True));
4695//ZZ             return res;
4696//ZZ          }
4697//ZZ          case Iop_Reverse16_8x16: {
4698//ZZ             HReg res = newVRegV(env);
4699//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4700//ZZ             UInt size = 0;
4701//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_REV16,
4702//ZZ                                           res, arg, size, True));
4703//ZZ             return res;
4704//ZZ          }
4705//ZZ          case Iop_CmpNEZ64x2: {
4706//ZZ             HReg x_lsh = newVRegV(env);
4707//ZZ             HReg x_rsh = newVRegV(env);
4708//ZZ             HReg lsh_amt = newVRegV(env);
4709//ZZ             HReg rsh_amt = newVRegV(env);
4710//ZZ             HReg zero = newVRegV(env);
4711//ZZ             HReg tmp = newVRegV(env);
4712//ZZ             HReg tmp2 = newVRegV(env);
4713//ZZ             HReg res = newVRegV(env);
4714//ZZ             HReg x = newVRegV(env);
4715//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4716//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_EQZ, tmp2, arg, 2, True));
4717//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, x, tmp2, 4, True));
4718//ZZ             addInstr(env, ARMInstr_NeonImm(lsh_amt, ARMNImm_TI(0, 32)));
4719//ZZ             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0, 0)));
4720//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
4721//ZZ                                            rsh_amt, zero, lsh_amt, 2, True));
4722//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4723//ZZ                                           x_lsh, x, lsh_amt, 3, True));
4724//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
4725//ZZ                                           x_rsh, x, rsh_amt, 3, True));
4726//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4727//ZZ                                            tmp, x_lsh, x_rsh, 0, True));
4728//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4729//ZZ                                            res, tmp, x, 0, True));
4730//ZZ             return res;
4731//ZZ          }
4732//ZZ          case Iop_Widen8Sto16x8:
4733//ZZ          case Iop_Widen16Sto32x4:
4734//ZZ          case Iop_Widen32Sto64x2: {
4735//ZZ             HReg res = newVRegV(env);
4736//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4737//ZZ             UInt size;
4738//ZZ             switch (e->Iex.Unop.op) {
4739//ZZ                case Iop_Widen8Sto16x8:  size = 0; break;
4740//ZZ                case Iop_Widen16Sto32x4: size = 1; break;
4741//ZZ                case Iop_Widen32Sto64x2: size = 2; break;
4742//ZZ                default: vassert(0);
4743//ZZ             }
4744//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
4745//ZZ                                           res, arg, size, True));
4746//ZZ             return res;
4747//ZZ          }
4748//ZZ          case Iop_PwAddL8Sx16:
4749//ZZ          case Iop_PwAddL16Sx8:
4750//ZZ          case Iop_PwAddL32Sx4: {
4751//ZZ             HReg res = newVRegV(env);
4752//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4753//ZZ             UInt size = 0;
4754//ZZ             switch(e->Iex.Binop.op) {
4755//ZZ                case Iop_PwAddL8Sx16: size = 0; break;
4756//ZZ                case Iop_PwAddL16Sx8: size = 1; break;
4757//ZZ                case Iop_PwAddL32Sx4: size = 2; break;
4758//ZZ                default: vassert(0);
4759//ZZ             }
4760//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLS,
4761//ZZ                                           res, arg, size, True));
4762//ZZ             return res;
4763//ZZ          }
4764//ZZ          case Iop_PwAddL8Ux16:
4765//ZZ          case Iop_PwAddL16Ux8:
4766//ZZ          case Iop_PwAddL32Ux4: {
4767//ZZ             HReg res = newVRegV(env);
4768//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4769//ZZ             UInt size = 0;
4770//ZZ             switch(e->Iex.Binop.op) {
4771//ZZ                case Iop_PwAddL8Ux16: size = 0; break;
4772//ZZ                case Iop_PwAddL16Ux8: size = 1; break;
4773//ZZ                case Iop_PwAddL32Ux4: size = 2; break;
4774//ZZ                default: vassert(0);
4775//ZZ             }
4776//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_PADDLU,
4777//ZZ                                           res, arg, size, True));
4778//ZZ             return res;
4779//ZZ          }
4780//ZZ          case Iop_Cnt8x16: {
4781//ZZ             HReg res = newVRegV(env);
4782//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4783//ZZ             UInt size = 0;
4784//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_CNT, res, arg, size, True));
4785//ZZ             return res;
4786//ZZ          }
4787//ZZ          case Iop_Clz8Sx16:
4788//ZZ          case Iop_Clz16Sx8:
4789//ZZ          case Iop_Clz32Sx4: {
4790//ZZ             HReg res = newVRegV(env);
4791//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4792//ZZ             UInt size = 0;
4793//ZZ             switch(e->Iex.Binop.op) {
4794//ZZ                case Iop_Clz8Sx16: size = 0; break;
4795//ZZ                case Iop_Clz16Sx8: size = 1; break;
4796//ZZ                case Iop_Clz32Sx4: size = 2; break;
4797//ZZ                default: vassert(0);
4798//ZZ             }
4799//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_CLZ, res, arg, size, True));
4800//ZZ             return res;
4801//ZZ          }
4802//ZZ          case Iop_Cls8Sx16:
4803//ZZ          case Iop_Cls16Sx8:
4804//ZZ          case Iop_Cls32Sx4: {
4805//ZZ             HReg res = newVRegV(env);
4806//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4807//ZZ             UInt size = 0;
4808//ZZ             switch(e->Iex.Binop.op) {
4809//ZZ                case Iop_Cls8Sx16: size = 0; break;
4810//ZZ                case Iop_Cls16Sx8: size = 1; break;
4811//ZZ                case Iop_Cls32Sx4: size = 2; break;
4812//ZZ                default: vassert(0);
4813//ZZ             }
4814//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_CLS, res, arg, size, True));
4815//ZZ             return res;
4816//ZZ          }
4817//ZZ          case Iop_FtoI32Sx4_RZ: {
4818//ZZ             HReg res = newVRegV(env);
4819//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4820//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoS,
4821//ZZ                                           res, arg, 2, True));
4822//ZZ             return res;
4823//ZZ          }
4824//ZZ          case Iop_FtoI32Ux4_RZ: {
4825//ZZ             HReg res = newVRegV(env);
4826//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4827//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTFtoU,
4828//ZZ                                           res, arg, 2, True));
4829//ZZ             return res;
4830//ZZ          }
4831//ZZ          case Iop_I32StoFx4: {
4832//ZZ             HReg res = newVRegV(env);
4833//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4834//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTStoF,
4835//ZZ                                           res, arg, 2, True));
4836//ZZ             return res;
4837//ZZ          }
4838//ZZ          case Iop_I32UtoFx4: {
4839//ZZ             HReg res = newVRegV(env);
4840//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
4841//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTUtoF,
4842//ZZ                                           res, arg, 2, True));
4843//ZZ             return res;
4844//ZZ          }
4845//ZZ          case Iop_F16toF32x4: {
4846//ZZ             HReg res = newVRegV(env);
4847//ZZ             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
4848//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VCVTF16toF32,
4849//ZZ                                           res, arg, 2, True));
4850//ZZ             return res;
4851//ZZ          }
4852//ZZ          case Iop_Recip32Fx4: {
4853//ZZ             HReg res = newVRegV(env);
4854//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4855//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIPF,
4856//ZZ                                           res, argL, 0, True));
4857//ZZ             return res;
4858//ZZ          }
4859//ZZ          case Iop_Recip32x4: {
4860//ZZ             HReg res = newVRegV(env);
4861//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4862//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VRECIP,
4863//ZZ                                           res, argL, 0, True));
4864//ZZ             return res;
4865//ZZ          }
4866//ZZ          case Iop_Rsqrte32Fx4: {
4867//ZZ             HReg res = newVRegV(env);
4868//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4869//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTEFP,
4870//ZZ                                           res, argL, 0, True));
4871//ZZ             return res;
4872//ZZ          }
4873//ZZ          case Iop_Rsqrte32x4: {
4874//ZZ             HReg res = newVRegV(env);
4875//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Unop.arg);
4876//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VRSQRTE,
4877//ZZ                                           res, argL, 0, True));
4878//ZZ             return res;
4879//ZZ          }
4880         /* ... */
4881         default:
4882            break;
4883      } /* switch on the unop */
4884   } /* if (e->tag == Iex_Unop) */
4885
4886   if (e->tag == Iex_Binop) {
4887      switch (e->Iex.Binop.op) {
4888         case Iop_64HLtoV128: {
4889            HReg res  = newVRegV(env);
4890            HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
4891            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
4892            addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
4893            return res;
4894         }
4895//ZZ          case Iop_AndV128: {
4896//ZZ             HReg res = newVRegV(env);
4897//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4898//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4899//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VAND,
4900//ZZ                                            res, argL, argR, 4, True));
4901//ZZ             return res;
4902//ZZ          }
4903//ZZ          case Iop_OrV128: {
4904//ZZ             HReg res = newVRegV(env);
4905//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4906//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4907//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VORR,
4908//ZZ                                            res, argL, argR, 4, True));
4909//ZZ             return res;
4910//ZZ          }
4911//ZZ          case Iop_XorV128: {
4912//ZZ             HReg res = newVRegV(env);
4913//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
4914//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
4915//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VXOR,
4916//ZZ                                            res, argL, argR, 4, True));
4917//ZZ             return res;
4918//ZZ          }
4919//ZZ          case Iop_Add8x16:
4920//ZZ          case Iop_Add16x8:
4921//ZZ          case Iop_Add32x4:
4922         case Iop_AndV128:
4923         case Iop_OrV128:
4924         case Iop_XorV128:
4925         case Iop_Max32Ux4:
4926         case Iop_Max16Ux8:
4927         case Iop_Max8Ux16:
4928         case Iop_Min32Ux4:
4929         case Iop_Min16Ux8:
4930         case Iop_Min8Ux16:
4931         case Iop_Max32Sx4:
4932         case Iop_Max16Sx8:
4933         case Iop_Max8Sx16:
4934         case Iop_Min32Sx4:
4935         case Iop_Min16Sx8:
4936         case Iop_Min8Sx16:
4937         case Iop_Add64x2:
4938         case Iop_Add32x4:
4939         case Iop_Add16x8:
4940         case Iop_Add8x16:
4941         case Iop_Sub64x2:
4942         case Iop_Sub32x4:
4943         case Iop_Sub16x8:
4944         case Iop_Sub8x16:
4945         case Iop_Mul32x4:
4946         case Iop_Mul16x8:
4947         case Iop_Mul8x16:
4948         case Iop_CmpEQ64x2:
4949         case Iop_CmpEQ32x4:
4950         case Iop_CmpEQ16x8:
4951         case Iop_CmpEQ8x16:
4952         case Iop_CmpGT64Ux2:
4953         case Iop_CmpGT32Ux4:
4954         case Iop_CmpGT16Ux8:
4955         case Iop_CmpGT8Ux16:
4956         case Iop_CmpGT64Sx2:
4957         case Iop_CmpGT32Sx4:
4958         case Iop_CmpGT16Sx8:
4959         case Iop_CmpGT8Sx16:
4960         case Iop_CmpEQ64Fx2:
4961         case Iop_CmpEQ32Fx4:
4962         case Iop_CmpLE64Fx2:
4963         case Iop_CmpLE32Fx4:
4964         case Iop_CmpLT64Fx2:
4965         case Iop_CmpLT32Fx4:
4966         case Iop_Perm8x16:
4967         {
4968            HReg res  = newVRegV(env);
4969            HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
4970            HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
4971            Bool sw   = False;
4972            ARM64VecBinOp op = ARM64vecb_INVALID;
4973            switch (e->Iex.Binop.op) {
4974               case Iop_AndV128:    op = ARM64vecb_AND; break;
4975               case Iop_OrV128:     op = ARM64vecb_ORR; break;
4976               case Iop_XorV128:    op = ARM64vecb_XOR; break;
4977               case Iop_Max32Ux4:   op = ARM64vecb_UMAX32x4; break;
4978               case Iop_Max16Ux8:   op = ARM64vecb_UMAX16x8; break;
4979               case Iop_Max8Ux16:   op = ARM64vecb_UMAX8x16; break;
4980               case Iop_Min32Ux4:   op = ARM64vecb_UMIN32x4; break;
4981               case Iop_Min16Ux8:   op = ARM64vecb_UMIN16x8; break;
4982               case Iop_Min8Ux16:   op = ARM64vecb_UMIN8x16; break;
4983               case Iop_Max32Sx4:   op = ARM64vecb_SMAX32x4; break;
4984               case Iop_Max16Sx8:   op = ARM64vecb_SMAX16x8; break;
4985               case Iop_Max8Sx16:   op = ARM64vecb_SMAX8x16; break;
4986               case Iop_Min32Sx4:   op = ARM64vecb_SMIN32x4; break;
4987               case Iop_Min16Sx8:   op = ARM64vecb_SMIN16x8; break;
4988               case Iop_Min8Sx16:   op = ARM64vecb_SMIN8x16; break;
4989               case Iop_Add64x2:    op = ARM64vecb_ADD64x2; break;
4990               case Iop_Add32x4:    op = ARM64vecb_ADD32x4; break;
4991               case Iop_Add16x8:    op = ARM64vecb_ADD16x8; break;
4992               case Iop_Add8x16:    op = ARM64vecb_ADD8x16; break;
4993               case Iop_Sub64x2:    op = ARM64vecb_SUB64x2; break;
4994               case Iop_Sub32x4:    op = ARM64vecb_SUB32x4; break;
4995               case Iop_Sub16x8:    op = ARM64vecb_SUB16x8; break;
4996               case Iop_Sub8x16:    op = ARM64vecb_SUB8x16; break;
4997               case Iop_Mul32x4:    op = ARM64vecb_MUL32x4; break;
4998               case Iop_Mul16x8:    op = ARM64vecb_MUL16x8; break;
4999               case Iop_Mul8x16:    op = ARM64vecb_MUL8x16; break;
5000               case Iop_CmpEQ64x2:  op = ARM64vecb_CMEQ64x2; break;
5001               case Iop_CmpEQ32x4:  op = ARM64vecb_CMEQ32x4; break;
5002               case Iop_CmpEQ16x8:  op = ARM64vecb_CMEQ16x8; break;
5003               case Iop_CmpEQ8x16:  op = ARM64vecb_CMEQ8x16; break;
5004               case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
5005               case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
5006               case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
5007               case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
5008               case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
5009               case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
5010               case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
5011               case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
5012               case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
5013               case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
5014               case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
5015               case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
5016               case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
5017               case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
5018               case Iop_Perm8x16:   op = ARM64vecb_TBL1; break;
5019               default: vassert(0);
5020            }
5021            if (sw) {
5022               addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
5023            } else {
5024               addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
5025            }
5026            return res;
5027         }
5028//ZZ          case Iop_Add32Fx4: {
5029//ZZ             HReg res = newVRegV(env);
5030//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5031//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5032//ZZ             UInt size = 0;
5033//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VADDFP,
5034//ZZ                                            res, argL, argR, size, True));
5035//ZZ             return res;
5036//ZZ          }
5037//ZZ          case Iop_Recps32Fx4: {
5038//ZZ             HReg res = newVRegV(env);
5039//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5040//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5041//ZZ             UInt size = 0;
5042//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VRECPS,
5043//ZZ                                            res, argL, argR, size, True));
5044//ZZ             return res;
5045//ZZ          }
5046//ZZ          case Iop_Rsqrts32Fx4: {
5047//ZZ             HReg res = newVRegV(env);
5048//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5049//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5050//ZZ             UInt size = 0;
5051//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VRSQRTS,
5052//ZZ                                            res, argL, argR, size, True));
5053//ZZ             return res;
5054//ZZ          }
5055//ZZ
5056//ZZ          // These 6 verified 18 Apr 2013
5057//ZZ          case Iop_InterleaveEvenLanes8x16:
5058//ZZ          case Iop_InterleaveOddLanes8x16:
5059//ZZ          case Iop_InterleaveEvenLanes16x8:
5060//ZZ          case Iop_InterleaveOddLanes16x8:
5061//ZZ          case Iop_InterleaveEvenLanes32x4:
5062//ZZ          case Iop_InterleaveOddLanes32x4: {
5063//ZZ             HReg rD   = newVRegV(env);
5064//ZZ             HReg rM   = newVRegV(env);
5065//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5066//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5067//ZZ             UInt size;
5068//ZZ             Bool resRd;  // is the result in rD or rM ?
5069//ZZ             switch (e->Iex.Binop.op) {
5070//ZZ                case Iop_InterleaveOddLanes8x16:  resRd = False; size = 0; break;
5071//ZZ                case Iop_InterleaveEvenLanes8x16: resRd = True;  size = 0; break;
5072//ZZ                case Iop_InterleaveOddLanes16x8:  resRd = False; size = 1; break;
5073//ZZ                case Iop_InterleaveEvenLanes16x8: resRd = True;  size = 1; break;
5074//ZZ                case Iop_InterleaveOddLanes32x4:  resRd = False; size = 2; break;
5075//ZZ                case Iop_InterleaveEvenLanes32x4: resRd = True;  size = 2; break;
5076//ZZ                default: vassert(0);
5077//ZZ             }
5078//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
5079//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
5080//ZZ             addInstr(env, ARMInstr_NDual(ARMneon_TRN, rD, rM, size, True));
5081//ZZ             return resRd ? rD : rM;
5082//ZZ          }
5083//ZZ
5084//ZZ          // These 6 verified 18 Apr 2013
5085//ZZ          case Iop_InterleaveHI8x16:
5086//ZZ          case Iop_InterleaveLO8x16:
5087//ZZ          case Iop_InterleaveHI16x8:
5088//ZZ          case Iop_InterleaveLO16x8:
5089//ZZ          case Iop_InterleaveHI32x4:
5090//ZZ          case Iop_InterleaveLO32x4: {
5091//ZZ             HReg rD   = newVRegV(env);
5092//ZZ             HReg rM   = newVRegV(env);
5093//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5094//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5095//ZZ             UInt size;
5096//ZZ             Bool resRd;  // is the result in rD or rM ?
5097//ZZ             switch (e->Iex.Binop.op) {
5098//ZZ                case Iop_InterleaveHI8x16: resRd = False; size = 0; break;
5099//ZZ                case Iop_InterleaveLO8x16: resRd = True;  size = 0; break;
5100//ZZ                case Iop_InterleaveHI16x8: resRd = False; size = 1; break;
5101//ZZ                case Iop_InterleaveLO16x8: resRd = True;  size = 1; break;
5102//ZZ                case Iop_InterleaveHI32x4: resRd = False; size = 2; break;
5103//ZZ                case Iop_InterleaveLO32x4: resRd = True;  size = 2; break;
5104//ZZ                default: vassert(0);
5105//ZZ             }
5106//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
5107//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
5108//ZZ             addInstr(env, ARMInstr_NDual(ARMneon_ZIP, rD, rM, size, True));
5109//ZZ             return resRd ? rD : rM;
5110//ZZ          }
5111//ZZ
5112//ZZ          // These 6 verified 18 Apr 2013
5113//ZZ          case Iop_CatOddLanes8x16:
5114//ZZ          case Iop_CatEvenLanes8x16:
5115//ZZ          case Iop_CatOddLanes16x8:
5116//ZZ          case Iop_CatEvenLanes16x8:
5117//ZZ          case Iop_CatOddLanes32x4:
5118//ZZ          case Iop_CatEvenLanes32x4: {
5119//ZZ             HReg rD   = newVRegV(env);
5120//ZZ             HReg rM   = newVRegV(env);
5121//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5122//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5123//ZZ             UInt size;
5124//ZZ             Bool resRd;  // is the result in rD or rM ?
5125//ZZ             switch (e->Iex.Binop.op) {
5126//ZZ                case Iop_CatOddLanes8x16:  resRd = False; size = 0; break;
5127//ZZ                case Iop_CatEvenLanes8x16: resRd = True;  size = 0; break;
5128//ZZ                case Iop_CatOddLanes16x8:  resRd = False; size = 1; break;
5129//ZZ                case Iop_CatEvenLanes16x8: resRd = True;  size = 1; break;
5130//ZZ                case Iop_CatOddLanes32x4:  resRd = False; size = 2; break;
5131//ZZ                case Iop_CatEvenLanes32x4: resRd = True;  size = 2; break;
5132//ZZ                default: vassert(0);
5133//ZZ             }
5134//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rM, argL, 4, True));
5135//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_COPY, rD, argR, 4, True));
5136//ZZ             addInstr(env, ARMInstr_NDual(ARMneon_UZP, rD, rM, size, True));
5137//ZZ             return resRd ? rD : rM;
5138//ZZ          }
5139//ZZ
5140//ZZ          case Iop_QAdd8Ux16:
5141//ZZ          case Iop_QAdd16Ux8:
5142//ZZ          case Iop_QAdd32Ux4:
5143//ZZ          case Iop_QAdd64Ux2: {
5144//ZZ             HReg res = newVRegV(env);
5145//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5146//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5147//ZZ             UInt size;
5148//ZZ             switch (e->Iex.Binop.op) {
5149//ZZ                case Iop_QAdd8Ux16: size = 0; break;
5150//ZZ                case Iop_QAdd16Ux8: size = 1; break;
5151//ZZ                case Iop_QAdd32Ux4: size = 2; break;
5152//ZZ                case Iop_QAdd64Ux2: size = 3; break;
5153//ZZ                default:
5154//ZZ                   ppIROp(e->Iex.Binop.op);
5155//ZZ                   vpanic("Illegal element size in VQADDU");
5156//ZZ             }
5157//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDU,
5158//ZZ                                            res, argL, argR, size, True));
5159//ZZ             return res;
5160//ZZ          }
5161//ZZ          case Iop_QAdd8Sx16:
5162//ZZ          case Iop_QAdd16Sx8:
5163//ZZ          case Iop_QAdd32Sx4:
5164//ZZ          case Iop_QAdd64Sx2: {
5165//ZZ             HReg res = newVRegV(env);
5166//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5167//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5168//ZZ             UInt size;
5169//ZZ             switch (e->Iex.Binop.op) {
5170//ZZ                case Iop_QAdd8Sx16: size = 0; break;
5171//ZZ                case Iop_QAdd16Sx8: size = 1; break;
5172//ZZ                case Iop_QAdd32Sx4: size = 2; break;
5173//ZZ                case Iop_QAdd64Sx2: size = 3; break;
5174//ZZ                default:
5175//ZZ                   ppIROp(e->Iex.Binop.op);
5176//ZZ                   vpanic("Illegal element size in VQADDS");
5177//ZZ             }
5178//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQADDS,
5179//ZZ                                            res, argL, argR, size, True));
5180//ZZ             return res;
5181//ZZ          }
5182//ZZ          case Iop_Sub8x16:
5183//ZZ          case Iop_Sub16x8:
5184//ZZ          case Iop_Sub32x4:
5185//ZZ          case Iop_Sub64x2: {
5186//ZZ             HReg res = newVRegV(env);
5187//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5188//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5189//ZZ             UInt size;
5190//ZZ             switch (e->Iex.Binop.op) {
5191//ZZ                case Iop_Sub8x16: size = 0; break;
5192//ZZ                case Iop_Sub16x8: size = 1; break;
5193//ZZ                case Iop_Sub32x4: size = 2; break;
5194//ZZ                case Iop_Sub64x2: size = 3; break;
5195//ZZ                default:
5196//ZZ                   ppIROp(e->Iex.Binop.op);
5197//ZZ                   vpanic("Illegal element size in VSUB");
5198//ZZ             }
5199//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5200//ZZ                                            res, argL, argR, size, True));
5201//ZZ             return res;
5202//ZZ          }
5203//ZZ          case Iop_Sub32Fx4: {
5204//ZZ             HReg res = newVRegV(env);
5205//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5206//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5207//ZZ             UInt size = 0;
5208//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VSUBFP,
5209//ZZ                                            res, argL, argR, size, True));
5210//ZZ             return res;
5211//ZZ          }
5212//ZZ          case Iop_QSub8Ux16:
5213//ZZ          case Iop_QSub16Ux8:
5214//ZZ          case Iop_QSub32Ux4:
5215//ZZ          case Iop_QSub64Ux2: {
5216//ZZ             HReg res = newVRegV(env);
5217//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5218//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5219//ZZ             UInt size;
5220//ZZ             switch (e->Iex.Binop.op) {
5221//ZZ                case Iop_QSub8Ux16: size = 0; break;
5222//ZZ                case Iop_QSub16Ux8: size = 1; break;
5223//ZZ                case Iop_QSub32Ux4: size = 2; break;
5224//ZZ                case Iop_QSub64Ux2: size = 3; break;
5225//ZZ                default:
5226//ZZ                   ppIROp(e->Iex.Binop.op);
5227//ZZ                   vpanic("Illegal element size in VQSUBU");
5228//ZZ             }
5229//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBU,
5230//ZZ                                            res, argL, argR, size, True));
5231//ZZ             return res;
5232//ZZ          }
5233//ZZ          case Iop_QSub8Sx16:
5234//ZZ          case Iop_QSub16Sx8:
5235//ZZ          case Iop_QSub32Sx4:
5236//ZZ          case Iop_QSub64Sx2: {
5237//ZZ             HReg res = newVRegV(env);
5238//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5239//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5240//ZZ             UInt size;
5241//ZZ             switch (e->Iex.Binop.op) {
5242//ZZ                case Iop_QSub8Sx16: size = 0; break;
5243//ZZ                case Iop_QSub16Sx8: size = 1; break;
5244//ZZ                case Iop_QSub32Sx4: size = 2; break;
5245//ZZ                case Iop_QSub64Sx2: size = 3; break;
5246//ZZ                default:
5247//ZZ                   ppIROp(e->Iex.Binop.op);
5248//ZZ                   vpanic("Illegal element size in VQSUBS");
5249//ZZ             }
5250//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQSUBS,
5251//ZZ                                            res, argL, argR, size, True));
5252//ZZ             return res;
5253//ZZ          }
5254//ZZ          case Iop_Max8Ux16:
5255//ZZ          case Iop_Max16Ux8:
5256//ZZ          case Iop_Max32Ux4: {
5257//ZZ             HReg res = newVRegV(env);
5258//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5259//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5260//ZZ             UInt size;
5261//ZZ             switch (e->Iex.Binop.op) {
5262//ZZ                case Iop_Max8Ux16: size = 0; break;
5263//ZZ                case Iop_Max16Ux8: size = 1; break;
5264//ZZ                case Iop_Max32Ux4: size = 2; break;
5265//ZZ                default: vpanic("Illegal element size in VMAXU");
5266//ZZ             }
5267//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXU,
5268//ZZ                                            res, argL, argR, size, True));
5269//ZZ             return res;
5270//ZZ          }
5271//ZZ          case Iop_Max8Sx16:
5272//ZZ          case Iop_Max16Sx8:
5273//ZZ          case Iop_Max32Sx4: {
5274//ZZ             HReg res = newVRegV(env);
5275//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5276//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5277//ZZ             UInt size;
5278//ZZ             switch (e->Iex.Binop.op) {
5279//ZZ                case Iop_Max8Sx16: size = 0; break;
5280//ZZ                case Iop_Max16Sx8: size = 1; break;
5281//ZZ                case Iop_Max32Sx4: size = 2; break;
5282//ZZ                default: vpanic("Illegal element size in VMAXU");
5283//ZZ             }
5284//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXS,
5285//ZZ                                            res, argL, argR, size, True));
5286//ZZ             return res;
5287//ZZ          }
5288//ZZ          case Iop_Min8Ux16:
5289//ZZ          case Iop_Min16Ux8:
5290//ZZ          case Iop_Min32Ux4: {
5291//ZZ             HReg res = newVRegV(env);
5292//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5293//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5294//ZZ             UInt size;
5295//ZZ             switch (e->Iex.Binop.op) {
5296//ZZ                case Iop_Min8Ux16: size = 0; break;
5297//ZZ                case Iop_Min16Ux8: size = 1; break;
5298//ZZ                case Iop_Min32Ux4: size = 2; break;
5299//ZZ                default: vpanic("Illegal element size in VMAXU");
5300//ZZ             }
5301//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMINU,
5302//ZZ                                            res, argL, argR, size, True));
5303//ZZ             return res;
5304//ZZ          }
5305//ZZ          case Iop_Min8Sx16:
5306//ZZ          case Iop_Min16Sx8:
5307//ZZ          case Iop_Min32Sx4: {
5308//ZZ             HReg res = newVRegV(env);
5309//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5310//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5311//ZZ             UInt size;
5312//ZZ             switch (e->Iex.Binop.op) {
5313//ZZ                case Iop_Min8Sx16: size = 0; break;
5314//ZZ                case Iop_Min16Sx8: size = 1; break;
5315//ZZ                case Iop_Min32Sx4: size = 2; break;
5316//ZZ                default: vpanic("Illegal element size in VMAXU");
5317//ZZ             }
5318//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMINS,
5319//ZZ                                            res, argL, argR, size, True));
5320//ZZ             return res;
5321//ZZ          }
5322//ZZ          case Iop_Sar8x16:
5323//ZZ          case Iop_Sar16x8:
5324//ZZ          case Iop_Sar32x4:
5325//ZZ          case Iop_Sar64x2: {
5326//ZZ             HReg res = newVRegV(env);
5327//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5328//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5329//ZZ             HReg argR2 = newVRegV(env);
5330//ZZ             HReg zero = newVRegV(env);
5331//ZZ             UInt size;
5332//ZZ             switch (e->Iex.Binop.op) {
5333//ZZ                case Iop_Sar8x16: size = 0; break;
5334//ZZ                case Iop_Sar16x8: size = 1; break;
5335//ZZ                case Iop_Sar32x4: size = 2; break;
5336//ZZ                case Iop_Sar64x2: size = 3; break;
5337//ZZ                default: vassert(0);
5338//ZZ             }
5339//ZZ             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
5340//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5341//ZZ                                            argR2, zero, argR, size, True));
5342//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5343//ZZ                                           res, argL, argR2, size, True));
5344//ZZ             return res;
5345//ZZ          }
5346//ZZ          case Iop_Sal8x16:
5347//ZZ          case Iop_Sal16x8:
5348//ZZ          case Iop_Sal32x4:
5349//ZZ          case Iop_Sal64x2: {
5350//ZZ             HReg res = newVRegV(env);
5351//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5352//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5353//ZZ             UInt size;
5354//ZZ             switch (e->Iex.Binop.op) {
5355//ZZ                case Iop_Sal8x16: size = 0; break;
5356//ZZ                case Iop_Sal16x8: size = 1; break;
5357//ZZ                case Iop_Sal32x4: size = 2; break;
5358//ZZ                case Iop_Sal64x2: size = 3; break;
5359//ZZ                default: vassert(0);
5360//ZZ             }
5361//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSAL,
5362//ZZ                                           res, argL, argR, size, True));
5363//ZZ             return res;
5364//ZZ          }
5365//ZZ          case Iop_Shr8x16:
5366//ZZ          case Iop_Shr16x8:
5367//ZZ          case Iop_Shr32x4:
5368//ZZ          case Iop_Shr64x2: {
5369//ZZ             HReg res = newVRegV(env);
5370//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5371//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5372//ZZ             HReg argR2 = newVRegV(env);
5373//ZZ             HReg zero = newVRegV(env);
5374//ZZ             UInt size;
5375//ZZ             switch (e->Iex.Binop.op) {
5376//ZZ                case Iop_Shr8x16: size = 0; break;
5377//ZZ                case Iop_Shr16x8: size = 1; break;
5378//ZZ                case Iop_Shr32x4: size = 2; break;
5379//ZZ                case Iop_Shr64x2: size = 3; break;
5380//ZZ                default: vassert(0);
5381//ZZ             }
5382//ZZ             addInstr(env, ARMInstr_NeonImm(zero, ARMNImm_TI(0,0)));
5383//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VSUB,
5384//ZZ                                            argR2, zero, argR, size, True));
5385//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5386//ZZ                                           res, argL, argR2, size, True));
5387//ZZ             return res;
5388//ZZ          }
5389//ZZ          case Iop_Shl8x16:
5390//ZZ          case Iop_Shl16x8:
5391//ZZ          case Iop_Shl32x4:
5392//ZZ          case Iop_Shl64x2: {
5393//ZZ             HReg res = newVRegV(env);
5394//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5395//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5396//ZZ             UInt size;
5397//ZZ             switch (e->Iex.Binop.op) {
5398//ZZ                case Iop_Shl8x16: size = 0; break;
5399//ZZ                case Iop_Shl16x8: size = 1; break;
5400//ZZ                case Iop_Shl32x4: size = 2; break;
5401//ZZ                case Iop_Shl64x2: size = 3; break;
5402//ZZ                default: vassert(0);
5403//ZZ             }
5404//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VSHL,
5405//ZZ                                           res, argL, argR, size, True));
5406//ZZ             return res;
5407//ZZ          }
5408//ZZ          case Iop_QShl8x16:
5409//ZZ          case Iop_QShl16x8:
5410//ZZ          case Iop_QShl32x4:
5411//ZZ          case Iop_QShl64x2: {
5412//ZZ             HReg res = newVRegV(env);
5413//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5414//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5415//ZZ             UInt size;
5416//ZZ             switch (e->Iex.Binop.op) {
5417//ZZ                case Iop_QShl8x16: size = 0; break;
5418//ZZ                case Iop_QShl16x8: size = 1; break;
5419//ZZ                case Iop_QShl32x4: size = 2; break;
5420//ZZ                case Iop_QShl64x2: size = 3; break;
5421//ZZ                default: vassert(0);
5422//ZZ             }
5423//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VQSHL,
5424//ZZ                                           res, argL, argR, size, True));
5425//ZZ             return res;
5426//ZZ          }
5427//ZZ          case Iop_QSal8x16:
5428//ZZ          case Iop_QSal16x8:
5429//ZZ          case Iop_QSal32x4:
5430//ZZ          case Iop_QSal64x2: {
5431//ZZ             HReg res = newVRegV(env);
5432//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5433//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5434//ZZ             UInt size;
5435//ZZ             switch (e->Iex.Binop.op) {
5436//ZZ                case Iop_QSal8x16: size = 0; break;
5437//ZZ                case Iop_QSal16x8: size = 1; break;
5438//ZZ                case Iop_QSal32x4: size = 2; break;
5439//ZZ                case Iop_QSal64x2: size = 3; break;
5440//ZZ                default: vassert(0);
5441//ZZ             }
5442//ZZ             addInstr(env, ARMInstr_NShift(ARMneon_VQSAL,
5443//ZZ                                           res, argL, argR, size, True));
5444//ZZ             return res;
5445//ZZ          }
5446//ZZ          case Iop_QShlN8x16:
5447//ZZ          case Iop_QShlN16x8:
5448//ZZ          case Iop_QShlN32x4:
5449//ZZ          case Iop_QShlN64x2: {
5450//ZZ             HReg res = newVRegV(env);
5451//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5452//ZZ             UInt size, imm;
5453//ZZ             if (e->Iex.Binop.arg2->tag != Iex_Const ||
5454//ZZ                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5455//ZZ                vpanic("ARM taget supports Iop_QShlNAxB with constant "
5456//ZZ                       "second argument only\n");
5457//ZZ             }
5458//ZZ             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5459//ZZ             switch (e->Iex.Binop.op) {
5460//ZZ                case Iop_QShlN8x16: size = 8 | imm; break;
5461//ZZ                case Iop_QShlN16x8: size = 16 | imm; break;
5462//ZZ                case Iop_QShlN32x4: size = 32 | imm; break;
5463//ZZ                case Iop_QShlN64x2: size = 64 | imm; break;
5464//ZZ                default: vassert(0);
5465//ZZ             }
5466//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUU,
5467//ZZ                                           res, argL, size, True));
5468//ZZ             return res;
5469//ZZ          }
5470//ZZ          case Iop_QShlN8Sx16:
5471//ZZ          case Iop_QShlN16Sx8:
5472//ZZ          case Iop_QShlN32Sx4:
5473//ZZ          case Iop_QShlN64Sx2: {
5474//ZZ             HReg res = newVRegV(env);
5475//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5476//ZZ             UInt size, imm;
5477//ZZ             if (e->Iex.Binop.arg2->tag != Iex_Const ||
5478//ZZ                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5479//ZZ                vpanic("ARM taget supports Iop_QShlNASxB with constant "
5480//ZZ                       "second argument only\n");
5481//ZZ             }
5482//ZZ             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5483//ZZ             switch (e->Iex.Binop.op) {
5484//ZZ                case Iop_QShlN8Sx16: size = 8 | imm; break;
5485//ZZ                case Iop_QShlN16Sx8: size = 16 | imm; break;
5486//ZZ                case Iop_QShlN32Sx4: size = 32 | imm; break;
5487//ZZ                case Iop_QShlN64Sx2: size = 64 | imm; break;
5488//ZZ                default: vassert(0);
5489//ZZ             }
5490//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNUS,
5491//ZZ                                           res, argL, size, True));
5492//ZZ             return res;
5493//ZZ          }
5494//ZZ          case Iop_QSalN8x16:
5495//ZZ          case Iop_QSalN16x8:
5496//ZZ          case Iop_QSalN32x4:
5497//ZZ          case Iop_QSalN64x2: {
5498//ZZ             HReg res = newVRegV(env);
5499//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5500//ZZ             UInt size, imm;
5501//ZZ             if (e->Iex.Binop.arg2->tag != Iex_Const ||
5502//ZZ                 typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5503//ZZ                vpanic("ARM taget supports Iop_QShlNAxB with constant "
5504//ZZ                       "second argument only\n");
5505//ZZ             }
5506//ZZ             imm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5507//ZZ             switch (e->Iex.Binop.op) {
5508//ZZ                case Iop_QSalN8x16: size = 8 | imm; break;
5509//ZZ                case Iop_QSalN16x8: size = 16 | imm; break;
5510//ZZ                case Iop_QSalN32x4: size = 32 | imm; break;
5511//ZZ                case Iop_QSalN64x2: size = 64 | imm; break;
5512//ZZ                default: vassert(0);
5513//ZZ             }
5514//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VQSHLNSS,
5515//ZZ                                           res, argL, size, True));
5516//ZZ             return res;
5517//ZZ          }
5518         case Iop_ShrN64x2:
5519         case Iop_ShrN32x4:
5520         case Iop_ShrN16x8:
5521         case Iop_ShrN8x16:
5522         case Iop_SarN64x2:
5523         case Iop_SarN32x4:
5524         case Iop_SarN16x8:
5525         case Iop_SarN8x16:
5526         case Iop_ShlN64x2:
5527         case Iop_ShlN32x4:
5528         case Iop_ShlN16x8:
5529         case Iop_ShlN8x16:
5530         {
5531            IRExpr* argL = e->Iex.Binop.arg1;
5532            IRExpr* argR = e->Iex.Binop.arg2;
5533            if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
5534               UInt amt   = argR->Iex.Const.con->Ico.U8;
5535               UInt limit = 0;
5536               ARM64VecShiftOp op = ARM64vecsh_INVALID;
5537               switch (e->Iex.Binop.op) {
5538                  case Iop_ShrN64x2:
5539                     op = ARM64vecsh_USHR64x2; limit = 63; break;
5540                  case Iop_ShrN32x4:
5541                     op = ARM64vecsh_USHR32x4; limit = 31; break;
5542                  case Iop_ShrN16x8:
5543                     op = ARM64vecsh_USHR16x8; limit = 15; break;
5544                  case Iop_ShrN8x16:
5545                     op = ARM64vecsh_USHR8x16; limit = 7;  break;
5546                  case Iop_SarN64x2:
5547                     op = ARM64vecsh_SSHR64x2; limit = 63; break;
5548                  case Iop_SarN32x4:
5549                     op = ARM64vecsh_SSHR32x4; limit = 31; break;
5550                  case Iop_SarN16x8:
5551                     op = ARM64vecsh_SSHR16x8; limit = 15; break;
5552                  case Iop_SarN8x16:
5553                     op = ARM64vecsh_SSHR8x16; limit = 7;  break;
5554                  case Iop_ShlN64x2:
5555                     op = ARM64vecsh_SHL64x2;  limit = 63; break;
5556                  case Iop_ShlN32x4:
5557                     op = ARM64vecsh_SHL32x4;  limit = 31; break;
5558                  case Iop_ShlN16x8:
5559                     op = ARM64vecsh_SHL16x8;  limit = 15; break;
5560                  case Iop_ShlN8x16:
5561                     op = ARM64vecsh_SHL8x16;  limit = 7;  break;
5562                  default:
5563                     vassert(0);
5564               }
5565               if (op != ARM64vecsh_INVALID && amt >= 0 && amt <= limit) {
5566                  HReg src = iselV128Expr(env, argL);
5567                  HReg dst = newVRegV(env);
5568                  if (amt > 0) {
5569                     addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
5570                  } else {
5571                     dst = src;
5572                  }
5573                  return dst;
5574               }
5575            }
5576            /* else fall out; this is unhandled */
5577            break;
5578         }
5579//ZZ          case Iop_CmpGT8Ux16:
5580//ZZ          case Iop_CmpGT16Ux8:
5581//ZZ          case Iop_CmpGT32Ux4: {
5582//ZZ             HReg res = newVRegV(env);
5583//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5584//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5585//ZZ             UInt size;
5586//ZZ             switch (e->Iex.Binop.op) {
5587//ZZ                case Iop_CmpGT8Ux16: size = 0; break;
5588//ZZ                case Iop_CmpGT16Ux8: size = 1; break;
5589//ZZ                case Iop_CmpGT32Ux4: size = 2; break;
5590//ZZ                default: vassert(0);
5591//ZZ             }
5592//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTU,
5593//ZZ                                            res, argL, argR, size, True));
5594//ZZ             return res;
5595//ZZ          }
5596//ZZ          case Iop_CmpGT8Sx16:
5597//ZZ          case Iop_CmpGT16Sx8:
5598//ZZ          case Iop_CmpGT32Sx4: {
5599//ZZ             HReg res = newVRegV(env);
5600//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5601//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5602//ZZ             UInt size;
5603//ZZ             switch (e->Iex.Binop.op) {
5604//ZZ                case Iop_CmpGT8Sx16: size = 0; break;
5605//ZZ                case Iop_CmpGT16Sx8: size = 1; break;
5606//ZZ                case Iop_CmpGT32Sx4: size = 2; break;
5607//ZZ                default: vassert(0);
5608//ZZ             }
5609//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTS,
5610//ZZ                                            res, argL, argR, size, True));
5611//ZZ             return res;
5612//ZZ          }
5613//ZZ          case Iop_CmpEQ8x16:
5614//ZZ          case Iop_CmpEQ16x8:
5615//ZZ          case Iop_CmpEQ32x4: {
5616//ZZ             HReg res = newVRegV(env);
5617//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5618//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5619//ZZ             UInt size;
5620//ZZ             switch (e->Iex.Binop.op) {
5621//ZZ                case Iop_CmpEQ8x16: size = 0; break;
5622//ZZ                case Iop_CmpEQ16x8: size = 1; break;
5623//ZZ                case Iop_CmpEQ32x4: size = 2; break;
5624//ZZ                default: vassert(0);
5625//ZZ             }
5626//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQ,
5627//ZZ                                            res, argL, argR, size, True));
5628//ZZ             return res;
5629//ZZ          }
5630//ZZ          case Iop_Mul8x16:
5631//ZZ          case Iop_Mul16x8:
5632//ZZ          case Iop_Mul32x4: {
5633//ZZ             HReg res = newVRegV(env);
5634//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5635//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5636//ZZ             UInt size = 0;
5637//ZZ             switch(e->Iex.Binop.op) {
5638//ZZ                case Iop_Mul8x16: size = 0; break;
5639//ZZ                case Iop_Mul16x8: size = 1; break;
5640//ZZ                case Iop_Mul32x4: size = 2; break;
5641//ZZ                default: vassert(0);
5642//ZZ             }
5643//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMUL,
5644//ZZ                                            res, argL, argR, size, True));
5645//ZZ             return res;
5646//ZZ          }
5647//ZZ          case Iop_Mul32Fx4: {
5648//ZZ             HReg res = newVRegV(env);
5649//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5650//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5651//ZZ             UInt size = 0;
5652//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMULFP,
5653//ZZ                                            res, argL, argR, size, True));
5654//ZZ             return res;
5655//ZZ          }
5656         case Iop_Mull8Ux8:
5657         case Iop_Mull16Ux4:
5658         case Iop_Mull32Ux2: {
5659            HReg res = newVRegV(env);
5660            HReg argL = iselDblExpr(env, e->Iex.Binop.arg1);
5661            HReg argR = iselDblExpr(env, e->Iex.Binop.arg2);
5662            UInt size = 0;
5663            ARM64VecBinOp op = ARM64vecb_INVALID;
5664
5665            switch(e->Iex.Binop.op) {
5666               case Iop_Mull8Ux8: op = ARM64vecb_UMULL8x8; break;
5667               case Iop_Mull16Ux4: op = ARM64vecb_UMULL16x4; break;
5668               case Iop_Mull32Ux2: op = ARM64vecb_UMULL32x2; break;
5669               default: vassert(0);
5670            }
5671            addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
5672            return res;
5673         }
5674//ZZ
5675//ZZ          case Iop_Mull8Sx8:
5676//ZZ          case Iop_Mull16Sx4:
5677//ZZ          case Iop_Mull32Sx2: {
5678//ZZ             HReg res = newVRegV(env);
5679//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5680//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5681//ZZ             UInt size = 0;
5682//ZZ             switch(e->Iex.Binop.op) {
5683//ZZ                case Iop_Mull8Sx8: size = 0; break;
5684//ZZ                case Iop_Mull16Sx4: size = 1; break;
5685//ZZ                case Iop_Mull32Sx2: size = 2; break;
5686//ZZ                default: vassert(0);
5687//ZZ             }
5688//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLS,
5689//ZZ                                            res, argL, argR, size, True));
5690//ZZ             return res;
5691//ZZ          }
5692//ZZ
5693//ZZ          case Iop_QDMulHi16Sx8:
5694//ZZ          case Iop_QDMulHi32Sx4: {
5695//ZZ             HReg res = newVRegV(env);
5696//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5697//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5698//ZZ             UInt size = 0;
5699//ZZ             switch(e->Iex.Binop.op) {
5700//ZZ                case Iop_QDMulHi16Sx8: size = 1; break;
5701//ZZ                case Iop_QDMulHi32Sx4: size = 2; break;
5702//ZZ                default: vassert(0);
5703//ZZ             }
5704//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULH,
5705//ZZ                                            res, argL, argR, size, True));
5706//ZZ             return res;
5707//ZZ          }
5708//ZZ
5709//ZZ          case Iop_QRDMulHi16Sx8:
5710//ZZ          case Iop_QRDMulHi32Sx4: {
5711//ZZ             HReg res = newVRegV(env);
5712//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5713//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5714//ZZ             UInt size = 0;
5715//ZZ             switch(e->Iex.Binop.op) {
5716//ZZ                case Iop_QRDMulHi16Sx8: size = 1; break;
5717//ZZ                case Iop_QRDMulHi32Sx4: size = 2; break;
5718//ZZ                default: vassert(0);
5719//ZZ             }
5720//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQRDMULH,
5721//ZZ                                            res, argL, argR, size, True));
5722//ZZ             return res;
5723//ZZ          }
5724//ZZ
5725//ZZ          case Iop_QDMulLong16Sx4:
5726//ZZ          case Iop_QDMulLong32Sx2: {
5727//ZZ             HReg res = newVRegV(env);
5728//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5729//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5730//ZZ             UInt size = 0;
5731//ZZ             switch(e->Iex.Binop.op) {
5732//ZZ                case Iop_QDMulLong16Sx4: size = 1; break;
5733//ZZ                case Iop_QDMulLong32Sx2: size = 2; break;
5734//ZZ                default: vassert(0);
5735//ZZ             }
5736//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VQDMULL,
5737//ZZ                                            res, argL, argR, size, True));
5738//ZZ             return res;
5739//ZZ          }
5740//ZZ          case Iop_PolynomialMul8x16: {
5741//ZZ             HReg res = newVRegV(env);
5742//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5743//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5744//ZZ             UInt size = 0;
5745//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMULP,
5746//ZZ                                            res, argL, argR, size, True));
5747//ZZ             return res;
5748//ZZ          }
5749//ZZ          case Iop_Max32Fx4: {
5750//ZZ             HReg res = newVRegV(env);
5751//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5752//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5753//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMAXF,
5754//ZZ                                            res, argL, argR, 2, True));
5755//ZZ             return res;
5756//ZZ          }
5757//ZZ          case Iop_Min32Fx4: {
5758//ZZ             HReg res = newVRegV(env);
5759//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5760//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5761//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMINF,
5762//ZZ                                            res, argL, argR, 2, True));
5763//ZZ             return res;
5764//ZZ          }
5765//ZZ          case Iop_PwMax32Fx4: {
5766//ZZ             HReg res = newVRegV(env);
5767//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5768//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5769//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VPMAXF,
5770//ZZ                                            res, argL, argR, 2, True));
5771//ZZ             return res;
5772//ZZ          }
5773//ZZ          case Iop_PwMin32Fx4: {
5774//ZZ             HReg res = newVRegV(env);
5775//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5776//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5777//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VPMINF,
5778//ZZ                                            res, argL, argR, 2, True));
5779//ZZ             return res;
5780//ZZ          }
5781//ZZ          case Iop_CmpGT32Fx4: {
5782//ZZ             HReg res = newVRegV(env);
5783//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5784//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5785//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCGTF,
5786//ZZ                                            res, argL, argR, 2, True));
5787//ZZ             return res;
5788//ZZ          }
5789//ZZ          case Iop_CmpGE32Fx4: {
5790//ZZ             HReg res = newVRegV(env);
5791//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5792//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5793//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCGEF,
5794//ZZ                                            res, argL, argR, 2, True));
5795//ZZ             return res;
5796//ZZ          }
5797//ZZ          case Iop_CmpEQ32Fx4: {
5798//ZZ             HReg res = newVRegV(env);
5799//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5800//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5801//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VCEQF,
5802//ZZ                                            res, argL, argR, 2, True));
5803//ZZ             return res;
5804//ZZ          }
5805//ZZ
5806//ZZ          case Iop_PolynomialMull8x8: {
5807//ZZ             HReg res = newVRegV(env);
5808//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5809//ZZ             HReg argR = iselNeon64Expr(env, e->Iex.Binop.arg2);
5810//ZZ             UInt size = 0;
5811//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VMULLP,
5812//ZZ                                            res, argL, argR, size, True));
5813//ZZ             return res;
5814//ZZ          }
5815//ZZ          case Iop_F32ToFixed32Ux4_RZ:
5816//ZZ          case Iop_F32ToFixed32Sx4_RZ:
5817//ZZ          case Iop_Fixed32UToF32x4_RN:
5818//ZZ          case Iop_Fixed32SToF32x4_RN: {
5819//ZZ             HReg res = newVRegV(env);
5820//ZZ             HReg arg = iselNeonExpr(env, e->Iex.Binop.arg1);
5821//ZZ             ARMNeonUnOp op;
5822//ZZ             UInt imm6;
5823//ZZ             if (e->Iex.Binop.arg2->tag != Iex_Const ||
5824//ZZ                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5825//ZZ                   vpanic("ARM supports FP <-> Fixed conversion with constant "
5826//ZZ                          "second argument less than 33 only\n");
5827//ZZ             }
5828//ZZ             imm6 = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5829//ZZ             vassert(imm6 <= 32 && imm6 > 0);
5830//ZZ             imm6 = 64 - imm6;
5831//ZZ             switch(e->Iex.Binop.op) {
5832//ZZ                case Iop_F32ToFixed32Ux4_RZ: op = ARMneon_VCVTFtoFixedU; break;
5833//ZZ                case Iop_F32ToFixed32Sx4_RZ: op = ARMneon_VCVTFtoFixedS; break;
5834//ZZ                case Iop_Fixed32UToF32x4_RN: op = ARMneon_VCVTFixedUtoF; break;
5835//ZZ                case Iop_Fixed32SToF32x4_RN: op = ARMneon_VCVTFixedStoF; break;
5836//ZZ                default: vassert(0);
5837//ZZ             }
5838//ZZ             addInstr(env, ARMInstr_NUnary(op, res, arg, imm6, True));
5839//ZZ             return res;
5840//ZZ          }
5841//ZZ          /*
5842//ZZ          FIXME remove if not used
5843//ZZ          case Iop_VDup8x16:
5844//ZZ          case Iop_VDup16x8:
5845//ZZ          case Iop_VDup32x4: {
5846//ZZ             HReg res = newVRegV(env);
5847//ZZ             HReg argL = iselNeon64Expr(env, e->Iex.Binop.arg1);
5848//ZZ             UInt imm4;
5849//ZZ             UInt index;
5850//ZZ             if (e->Iex.Binop.arg2->tag != Iex_Const ||
5851//ZZ                typeOfIRExpr(env->type_env, e->Iex.Binop.arg2) != Ity_I8) {
5852//ZZ                   vpanic("ARM supports Iop_VDup with constant "
5853//ZZ                          "second argument less than 16 only\n");
5854//ZZ             }
5855//ZZ             index = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
5856//ZZ             switch(e->Iex.Binop.op) {
5857//ZZ                case Iop_VDup8x16: imm4 = (index << 1) + 1; break;
5858//ZZ                case Iop_VDup16x8: imm4 = (index << 2) + 2; break;
5859//ZZ                case Iop_VDup32x4: imm4 = (index << 3) + 4; break;
5860//ZZ                default: vassert(0);
5861//ZZ             }
5862//ZZ             if (imm4 >= 16) {
5863//ZZ                vpanic("ARM supports Iop_VDup with constant "
5864//ZZ                       "second argument less than 16 only\n");
5865//ZZ             }
5866//ZZ             addInstr(env, ARMInstr_NUnary(ARMneon_VDUP,
5867//ZZ                                           res, argL, imm4, True));
5868//ZZ             return res;
5869//ZZ          }
5870//ZZ          */
5871//ZZ          case Iop_PwAdd8x16:
5872//ZZ          case Iop_PwAdd16x8:
5873//ZZ          case Iop_PwAdd32x4: {
5874//ZZ             HReg res = newVRegV(env);
5875//ZZ             HReg argL = iselNeonExpr(env, e->Iex.Binop.arg1);
5876//ZZ             HReg argR = iselNeonExpr(env, e->Iex.Binop.arg2);
5877//ZZ             UInt size = 0;
5878//ZZ             switch(e->Iex.Binop.op) {
5879//ZZ                case Iop_PwAdd8x16: size = 0; break;
5880//ZZ                case Iop_PwAdd16x8: size = 1; break;
5881//ZZ                case Iop_PwAdd32x4: size = 2; break;
5882//ZZ                default: vassert(0);
5883//ZZ             }
5884//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VPADD,
5885//ZZ                                            res, argL, argR, size, True));
5886//ZZ             return res;
5887//ZZ          }
5888         /* ... */
5889         default:
5890            break;
5891      } /* switch on the binop */
5892   } /* if (e->tag == Iex_Binop) */
5893
5894   if (e->tag == Iex_Triop) {
5895      IRTriop*      triop  = e->Iex.Triop.details;
5896      ARM64VecBinOp vecbop = ARM64vecb_INVALID;
5897      switch (triop->op) {
5898         case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
5899         case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
5900         case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
5901         case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
5902         case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
5903         case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
5904         case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
5905         case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
5906         default: break;
5907      }
5908      if (vecbop != ARM64vecb_INVALID) {
5909         HReg argL = iselV128Expr(env, triop->arg2);
5910         HReg argR = iselV128Expr(env, triop->arg3);
5911         HReg dst  = newVRegV(env);
5912         set_FPCR_rounding_mode(env, triop->arg1);
5913         addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
5914         return dst;
5915      }
5916
5917//ZZ       switch (triop->op) {
5918//ZZ          case Iop_ExtractV128: {
5919//ZZ             HReg res = newVRegV(env);
5920//ZZ             HReg argL = iselNeonExpr(env, triop->arg1);
5921//ZZ             HReg argR = iselNeonExpr(env, triop->arg2);
5922//ZZ             UInt imm4;
5923//ZZ             if (triop->arg3->tag != Iex_Const ||
5924//ZZ                 typeOfIRExpr(env->type_env, triop->arg3) != Ity_I8) {
5925//ZZ                vpanic("ARM target supports Iop_ExtractV128 with constant "
5926//ZZ                       "third argument less than 16 only\n");
5927//ZZ             }
5928//ZZ             imm4 = triop->arg3->Iex.Const.con->Ico.U8;
5929//ZZ             if (imm4 >= 16) {
5930//ZZ                vpanic("ARM target supports Iop_ExtractV128 with constant "
5931//ZZ                       "third argument less than 16 only\n");
5932//ZZ             }
5933//ZZ             addInstr(env, ARMInstr_NBinary(ARMneon_VEXT,
5934//ZZ                                            res, argL, argR, imm4, True));
5935//ZZ             return res;
5936//ZZ          }
5937//ZZ          default:
5938//ZZ             break;
5939//ZZ       }
5940   }
5941
5942//ZZ    if (e->tag == Iex_ITE) { // VFD
5943//ZZ       ARMCondCode cc;
5944//ZZ       HReg r1  = iselNeonExpr(env, e->Iex.ITE.iftrue);
5945//ZZ       HReg r0  = iselNeonExpr(env, e->Iex.ITE.iffalse);
5946//ZZ       HReg dst = newVRegV(env);
5947//ZZ       addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, r1, 4, True));
5948//ZZ       cc = iselCondCode(env, e->Iex.ITE.cond);
5949//ZZ       addInstr(env, ARMInstr_NCMovQ(cc ^ 1, dst, r0));
5950//ZZ       return dst;
5951//ZZ    }
5952
5953  v128_expr_bad:
5954   ppIRExpr(e);
5955   vpanic("iselV128Expr_wrk");
5956}
5957
5958
5959/*---------------------------------------------------------*/
5960/*--- ISEL: Floating point expressions (64 bit)         ---*/
5961/*---------------------------------------------------------*/
5962
5963/* Compute a 64-bit floating point value into a register, the identity
5964   of which is returned.  As with iselIntExpr_R, the reg may be either
5965   real or virtual; in any case it must not be changed by subsequent
5966   code emitted by the caller.  */
5967
5968static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
5969{
5970   HReg r = iselDblExpr_wrk( env, e );
5971#  if 0
5972   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5973#  endif
5974   vassert(hregClass(r) == HRcFlt64);
5975   vassert(hregIsVirtual(r));
5976   return r;
5977}
5978
5979/* DO NOT CALL THIS DIRECTLY */
5980static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
5981{
5982   IRType ty = typeOfIRExpr(env->type_env,e);
5983   vassert(e);
5984   vassert(ty == Ity_F64 || ty == Ity_I64);
5985
5986   if (e->tag == Iex_RdTmp) {
5987      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
5988   }
5989
5990   if (e->tag == Iex_Const) {
5991      IRConst* con = e->Iex.Const.con;
5992      if (con->tag == Ico_F64i) {
5993         HReg src = newVRegI(env);
5994         HReg dst = newVRegD(env);
5995         addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
5996         addInstr(env, ARM64Instr_VDfromX(dst, src));
5997         return dst;
5998      }
5999   }
6000
6001   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
6002      vassert(e->Iex.Load.ty == Ity_F64 || e->Iex.Load.ty == Ity_I64);
6003      HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
6004      HReg res  = newVRegD(env);
6005      addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
6006      return res;
6007   }
6008
6009   if (e->tag == Iex_Get) {
6010      Int offs = e->Iex.Get.offset;
6011      if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
6012         HReg rD = newVRegD(env);
6013         HReg rN = get_baseblock_register();
6014         addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
6015         return rD;
6016      }
6017   }
6018
6019   if (e->tag == Iex_Unop) {
6020      switch (e->Iex.Unop.op) {
6021//ZZ          case Iop_ReinterpI64asF64: {
6022//ZZ             if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6023//ZZ                return iselNeon64Expr(env, e->Iex.Unop.arg);
6024//ZZ             } else {
6025//ZZ                HReg srcHi, srcLo;
6026//ZZ                HReg dst = newVRegD(env);
6027//ZZ                iselInt64Expr(&srcHi, &srcLo, env, e->Iex.Unop.arg);
6028//ZZ                addInstr(env, ARMInstr_VXferD(True/*toD*/, dst, srcHi, srcLo));
6029//ZZ                return dst;
6030//ZZ             }
6031//ZZ          }
6032         case Iop_NegF64: {
6033            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
6034            HReg dst = newVRegD(env);
6035            addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
6036            return dst;
6037         }
6038         case Iop_AbsF64: {
6039            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
6040            HReg dst = newVRegD(env);
6041            addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
6042            return dst;
6043         }
6044         case Iop_F32toF64: {
6045            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6046            HReg dst = newVRegD(env);
6047            addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
6048            return dst;
6049         }
6050         case Iop_I32UtoF64:
6051         case Iop_I32StoF64: {
6052            /* Rounding mode is not involved here, since the
6053               conversion can always be done without loss of
6054               precision. */
6055            HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
6056            HReg dst   = newVRegD(env);
6057            Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
6058            ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
6059            addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
6060            return dst;
6061         }
6062         default:
6063            break;
6064      }
6065   }
6066
6067   if (e->tag == Iex_Binop) {
6068      switch (e->Iex.Binop.op) {
6069         case Iop_RoundF64toInt: {
6070            HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
6071            HReg dst = newVRegD(env);
6072            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6073            addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_RINT, dst, src));
6074            return dst;
6075         }
6076         case Iop_SqrtF64: {
6077            HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
6078            HReg dst = newVRegD(env);
6079            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6080            addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_SQRT, dst, src));
6081            return dst;
6082         }
6083         case Iop_I64StoF64:
6084         case Iop_I64UtoF64: {
6085            ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
6086                                   ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
6087            HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
6088            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6089            HReg dstS = newVRegD(env);
6090            addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
6091            return dstS;
6092         }
6093         default:
6094            break;
6095      }
6096   }
6097
6098   if (e->tag == Iex_Triop) {
6099      IRTriop*     triop = e->Iex.Triop.details;
6100      ARM64FpBinOp dblop = ARM64fpb_INVALID;
6101      switch (triop->op) {
6102         case Iop_DivF64: dblop = ARM64fpb_DIV; break;
6103         case Iop_MulF64: dblop = ARM64fpb_MUL; break;
6104         case Iop_SubF64: dblop = ARM64fpb_SUB; break;
6105         case Iop_AddF64: dblop = ARM64fpb_ADD; break;
6106         default: break;
6107      }
6108      if (dblop != ARM64fpb_INVALID) {
6109         HReg argL = iselDblExpr(env, triop->arg2);
6110         HReg argR = iselDblExpr(env, triop->arg3);
6111         HReg dst  = newVRegD(env);
6112         set_FPCR_rounding_mode(env, triop->arg1);
6113         addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
6114         return dst;
6115      }
6116   }
6117
6118//ZZ    if (e->tag == Iex_ITE) { // VFD
6119//ZZ       if (ty == Ity_F64
6120//ZZ           && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
6121//ZZ          HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
6122//ZZ          HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
6123//ZZ          HReg dst = newVRegD(env);
6124//ZZ          addInstr(env, ARMInstr_VUnaryD(ARMvfpu_COPY, dst, r1));
6125//ZZ          ARMCondCode cc = iselCondCode(env, e->Iex.ITE.cond);
6126//ZZ          addInstr(env, ARMInstr_VCMovD(cc ^ 1, dst, r0));
6127//ZZ          return dst;
6128//ZZ       }
6129//ZZ    }
6130
6131   ppIRExpr(e);
6132   vpanic("iselDblExpr_wrk");
6133}
6134
6135
6136/*---------------------------------------------------------*/
6137/*--- ISEL: Floating point expressions (32 bit)         ---*/
6138/*---------------------------------------------------------*/
6139
6140/* Compute a 32-bit floating point value into a register, the identity
6141   of which is returned.  As with iselIntExpr_R, the reg may be either
6142   real or virtual; in any case it must not be changed by subsequent
6143   code emitted by the caller.  Values are generated into HRcFlt64
6144   registers despite the values themselves being Ity_F32s. */
6145
6146static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
6147{
6148   HReg r = iselFltExpr_wrk( env, e );
6149#  if 0
6150   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
6151#  endif
6152   vassert(hregClass(r) == HRcFlt64);
6153   vassert(hregIsVirtual(r));
6154   return r;
6155}
6156
6157/* DO NOT CALL THIS DIRECTLY */
6158static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
6159{
6160   IRType ty = typeOfIRExpr(env->type_env,e);
6161   vassert(e);
6162   vassert(ty == Ity_F32);
6163
6164   if (e->tag == Iex_RdTmp) {
6165      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
6166   }
6167
6168   if (e->tag == Iex_Const) {
6169      /* This is something of a kludge.  Since a 32 bit floating point
6170         zero is just .. all zeroes, just create a 64 bit zero word
6171         and transfer it.  This avoids having to create a SfromW
6172         instruction for this specific case. */
6173      IRConst* con = e->Iex.Const.con;
6174      if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
6175         HReg src = newVRegI(env);
6176         HReg dst = newVRegD(env);
6177         addInstr(env, ARM64Instr_Imm64(src, 0));
6178         addInstr(env, ARM64Instr_VDfromX(dst, src));
6179         return dst;
6180      }
6181   }
6182
6183//ZZ    if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
6184//ZZ       ARMAModeV* am;
6185//ZZ       HReg res = newVRegF(env);
6186//ZZ       vassert(e->Iex.Load.ty == Ity_F32);
6187//ZZ       am = iselIntExpr_AModeV(env, e->Iex.Load.addr);
6188//ZZ       addInstr(env, ARMInstr_VLdStS(True/*isLoad*/, res, am));
6189//ZZ       return res;
6190//ZZ    }
6191
6192   if (e->tag == Iex_Get) {
6193      Int offs = e->Iex.Get.offset;
6194      if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
6195         HReg rD = newVRegD(env);
6196         HReg rN = get_baseblock_register();
6197         addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
6198         return rD;
6199      }
6200   }
6201
6202   if (e->tag == Iex_Unop) {
6203      switch (e->Iex.Unop.op) {
6204//ZZ          case Iop_ReinterpI32asF32: {
6205//ZZ             HReg dst = newVRegF(env);
6206//ZZ             HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
6207//ZZ             addInstr(env, ARMInstr_VXferS(True/*toS*/, dst, src));
6208//ZZ             return dst;
6209//ZZ          }
6210         case Iop_NegF32: {
6211            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6212            HReg dst = newVRegD(env);
6213            addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
6214            return dst;
6215         }
6216         case Iop_AbsF32: {
6217            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
6218            HReg dst = newVRegD(env);
6219            addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
6220            return dst;
6221         }
6222         default:
6223            break;
6224      }
6225   }
6226
6227   if (e->tag == Iex_Binop) {
6228      switch (e->Iex.Binop.op) {
6229         case Iop_RoundF32toInt: {
6230            HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
6231            HReg dst = newVRegD(env);
6232            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6233            addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_RINT, dst, src));
6234            return dst;
6235         }
6236         case Iop_SqrtF32: {
6237            HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
6238            HReg dst = newVRegD(env);
6239            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6240            addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_SQRT, dst, src));
6241            return dst;
6242         }
6243         case Iop_F64toF32: {
6244            HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
6245            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6246            HReg dstS = newVRegD(env);
6247            addInstr(env, ARM64Instr_VCvtSD(False/*dToS*/, dstS, srcD));
6248            return dstS;
6249         }
6250         case Iop_I32UtoF32:
6251         case Iop_I32StoF32:
6252         case Iop_I64UtoF32:
6253         case Iop_I64StoF32: {
6254            ARM64CvtOp cvt_op = ARM64cvt_INVALID;
6255            switch (e->Iex.Binop.op) {
6256               case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
6257               case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
6258               case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
6259               case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
6260               default: vassert(0);
6261            }
6262            HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
6263            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
6264            HReg dstS = newVRegD(env);
6265            addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
6266            return dstS;
6267         }
6268         default:
6269            break;
6270      }
6271   }
6272
6273   if (e->tag == Iex_Triop) {
6274      IRTriop*     triop = e->Iex.Triop.details;
6275      ARM64FpBinOp sglop = ARM64fpb_INVALID;
6276      switch (triop->op) {
6277         case Iop_DivF32: sglop = ARM64fpb_DIV; break;
6278         case Iop_MulF32: sglop = ARM64fpb_MUL; break;
6279         case Iop_SubF32: sglop = ARM64fpb_SUB; break;
6280         case Iop_AddF32: sglop = ARM64fpb_ADD; break;
6281         default: break;
6282      }
6283      if (sglop != ARM64fpb_INVALID) {
6284         HReg argL = iselFltExpr(env, triop->arg2);
6285         HReg argR = iselFltExpr(env, triop->arg3);
6286         HReg dst  = newVRegD(env);
6287         set_FPCR_rounding_mode(env, triop->arg1);
6288         addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
6289         return dst;
6290      }
6291   }
6292
6293//ZZ
6294//ZZ    if (e->tag == Iex_ITE) { // VFD
6295//ZZ       if (ty == Ity_F32
6296//ZZ           && typeOfIRExpr(env->type_env,e->Iex.ITE.cond) == Ity_I1) {
6297//ZZ          ARMCondCode cc;
6298//ZZ          HReg r1  = iselFltExpr(env, e->Iex.ITE.iftrue);
6299//ZZ          HReg r0  = iselFltExpr(env, e->Iex.ITE.iffalse);
6300//ZZ          HReg dst = newVRegF(env);
6301//ZZ          addInstr(env, ARMInstr_VUnaryS(ARMvfpu_COPY, dst, r1));
6302//ZZ          cc = iselCondCode(env, e->Iex.ITE.cond);
6303//ZZ          addInstr(env, ARMInstr_VCMovS(cc ^ 1, dst, r0));
6304//ZZ          return dst;
6305//ZZ       }
6306//ZZ    }
6307
6308   ppIRExpr(e);
6309   vpanic("iselFltExpr_wrk");
6310}
6311
6312
6313/*---------------------------------------------------------*/
6314/*--- ISEL: Statements                                  ---*/
6315/*---------------------------------------------------------*/
6316
6317static void iselStmt ( ISelEnv* env, IRStmt* stmt )
6318{
6319   if (vex_traceflags & VEX_TRACE_VCODE) {
6320      vex_printf("\n-- ");
6321      ppIRStmt(stmt);
6322      vex_printf("\n");
6323   }
6324   switch (stmt->tag) {
6325
6326   /* --------- STORE --------- */
6327   /* little-endian write to memory */
6328   case Ist_Store: {
6329      IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
6330      IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
6331      IREndness end  = stmt->Ist.Store.end;
6332
6333      if (tya != Ity_I64 || end != Iend_LE)
6334         goto stmt_fail;
6335
6336      if (tyd == Ity_I64) {
6337         HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6338         ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6339         addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
6340         return;
6341      }
6342      if (tyd == Ity_I32) {
6343         HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6344         ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6345         addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
6346         return;
6347      }
6348      if (tyd == Ity_I16) {
6349         HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6350         ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6351         addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
6352         return;
6353      }
6354      if (tyd == Ity_I8) {
6355         HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6356         ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
6357         addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
6358         return;
6359      }
6360      if (tyd == Ity_V128) {
6361         HReg qD   = iselV128Expr(env, stmt->Ist.Store.data);
6362         HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6363         addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
6364         return;
6365      }
6366      if (tyd == Ity_F64) {
6367         HReg dD   = iselDblExpr(env, stmt->Ist.Store.data);
6368         HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6369         addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
6370         return;
6371      }
6372      if (tyd == Ity_F32) {
6373         HReg sD   = iselFltExpr(env, stmt->Ist.Store.data);
6374         HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
6375         addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
6376         return;
6377      }
6378
6379//ZZ       if (tyd == Ity_I16) {
6380//ZZ          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6381//ZZ          ARMAMode2* am = iselIntExpr_AMode2(env, stmt->Ist.Store.addr);
6382//ZZ          addInstr(env, ARMInstr_LdSt16(ARMcc_AL,
6383//ZZ                                        False/*!isLoad*/,
6384//ZZ                                        False/*!isSignedLoad*/, rD, am));
6385//ZZ          return;
6386//ZZ       }
6387//ZZ       if (tyd == Ity_I8) {
6388//ZZ          HReg       rD = iselIntExpr_R(env, stmt->Ist.Store.data);
6389//ZZ          ARMAMode1* am = iselIntExpr_AMode1(env, stmt->Ist.Store.addr);
6390//ZZ          addInstr(env, ARMInstr_LdSt8U(ARMcc_AL, False/*!isLoad*/, rD, am));
6391//ZZ          return;
6392//ZZ       }
6393//ZZ       if (tyd == Ity_I64) {
6394//ZZ          if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6395//ZZ             HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
6396//ZZ             ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
6397//ZZ             addInstr(env, ARMInstr_NLdStD(False, dD, am));
6398//ZZ          } else {
6399//ZZ             HReg rDhi, rDlo, rA;
6400//ZZ             iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Store.data);
6401//ZZ             rA = iselIntExpr_R(env, stmt->Ist.Store.addr);
6402//ZZ             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDhi,
6403//ZZ                                           ARMAMode1_RI(rA,4)));
6404//ZZ             addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!load*/, rDlo,
6405//ZZ                                           ARMAMode1_RI(rA,0)));
6406//ZZ          }
6407//ZZ          return;
6408//ZZ       }
6409//ZZ       if (tyd == Ity_F64) {
6410//ZZ          HReg       dD = iselDblExpr(env, stmt->Ist.Store.data);
6411//ZZ          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
6412//ZZ          addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, dD, am));
6413//ZZ          return;
6414//ZZ       }
6415//ZZ       if (tyd == Ity_F32) {
6416//ZZ          HReg       fD = iselFltExpr(env, stmt->Ist.Store.data);
6417//ZZ          ARMAModeV* am = iselIntExpr_AModeV(env, stmt->Ist.Store.addr);
6418//ZZ          addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, fD, am));
6419//ZZ          return;
6420//ZZ       }
6421//ZZ       if (tyd == Ity_V128) {
6422//ZZ          HReg       qD = iselNeonExpr(env, stmt->Ist.Store.data);
6423//ZZ          ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
6424//ZZ          addInstr(env, ARMInstr_NLdStQ(False, qD, am));
6425//ZZ          return;
6426//ZZ       }
6427
6428      break;
6429   }
6430
6431//ZZ    /* --------- CONDITIONAL STORE --------- */
6432//ZZ    /* conditional little-endian write to memory */
6433//ZZ    case Ist_StoreG: {
6434//ZZ       IRStoreG* sg   = stmt->Ist.StoreG.details;
6435//ZZ       IRType    tya  = typeOfIRExpr(env->type_env, sg->addr);
6436//ZZ       IRType    tyd  = typeOfIRExpr(env->type_env, sg->data);
6437//ZZ       IREndness end  = sg->end;
6438//ZZ
6439//ZZ       if (tya != Ity_I32 || end != Iend_LE)
6440//ZZ          goto stmt_fail;
6441//ZZ
6442//ZZ       switch (tyd) {
6443//ZZ          case Ity_I8:
6444//ZZ          case Ity_I32: {
6445//ZZ             HReg        rD = iselIntExpr_R(env, sg->data);
6446//ZZ             ARMAMode1*  am = iselIntExpr_AMode1(env, sg->addr);
6447//ZZ             ARMCondCode cc = iselCondCode(env, sg->guard);
6448//ZZ             addInstr(env, (tyd == Ity_I32 ? ARMInstr_LdSt32 : ARMInstr_LdSt8U)
6449//ZZ                              (cc, False/*!isLoad*/, rD, am));
6450//ZZ             return;
6451//ZZ          }
6452//ZZ          case Ity_I16: {
6453//ZZ             HReg        rD = iselIntExpr_R(env, sg->data);
6454//ZZ             ARMAMode2*  am = iselIntExpr_AMode2(env, sg->addr);
6455//ZZ             ARMCondCode cc = iselCondCode(env, sg->guard);
6456//ZZ             addInstr(env, ARMInstr_LdSt16(cc,
6457//ZZ                                           False/*!isLoad*/,
6458//ZZ                                           False/*!isSignedLoad*/, rD, am));
6459//ZZ             return;
6460//ZZ          }
6461//ZZ          default:
6462//ZZ             break;
6463//ZZ       }
6464//ZZ       break;
6465//ZZ    }
6466//ZZ
6467//ZZ    /* --------- CONDITIONAL LOAD --------- */
6468//ZZ    /* conditional little-endian load from memory */
6469//ZZ    case Ist_LoadG: {
6470//ZZ       IRLoadG*  lg   = stmt->Ist.LoadG.details;
6471//ZZ       IRType    tya  = typeOfIRExpr(env->type_env, lg->addr);
6472//ZZ       IREndness end  = lg->end;
6473//ZZ
6474//ZZ       if (tya != Ity_I32 || end != Iend_LE)
6475//ZZ          goto stmt_fail;
6476//ZZ
6477//ZZ       switch (lg->cvt) {
6478//ZZ          case ILGop_8Uto32:
6479//ZZ          case ILGop_Ident32: {
6480//ZZ             HReg        rAlt = iselIntExpr_R(env, lg->alt);
6481//ZZ             ARMAMode1*  am   = iselIntExpr_AMode1(env, lg->addr);
6482//ZZ             HReg        rD   = lookupIRTemp(env, lg->dst);
6483//ZZ             addInstr(env, mk_iMOVds_RR(rD, rAlt));
6484//ZZ             ARMCondCode cc   = iselCondCode(env, lg->guard);
6485//ZZ             addInstr(env, (lg->cvt == ILGop_Ident32 ? ARMInstr_LdSt32
6486//ZZ                                                     : ARMInstr_LdSt8U)
6487//ZZ                              (cc, True/*isLoad*/, rD, am));
6488//ZZ             return;
6489//ZZ          }
6490//ZZ          case ILGop_16Sto32:
6491//ZZ          case ILGop_16Uto32:
6492//ZZ          case ILGop_8Sto32: {
6493//ZZ             HReg        rAlt = iselIntExpr_R(env, lg->alt);
6494//ZZ             ARMAMode2*  am   = iselIntExpr_AMode2(env, lg->addr);
6495//ZZ             HReg        rD   = lookupIRTemp(env, lg->dst);
6496//ZZ             addInstr(env, mk_iMOVds_RR(rD, rAlt));
6497//ZZ             ARMCondCode cc   = iselCondCode(env, lg->guard);
6498//ZZ             if (lg->cvt == ILGop_8Sto32) {
6499//ZZ                addInstr(env, ARMInstr_Ld8S(cc, rD, am));
6500//ZZ             } else {
6501//ZZ                vassert(lg->cvt == ILGop_16Sto32 || lg->cvt == ILGop_16Uto32);
6502//ZZ                Bool sx = lg->cvt == ILGop_16Sto32;
6503//ZZ                addInstr(env, ARMInstr_LdSt16(cc, True/*isLoad*/, sx, rD, am));
6504//ZZ             }
6505//ZZ             return;
6506//ZZ          }
6507//ZZ          default:
6508//ZZ             break;
6509//ZZ       }
6510//ZZ       break;
6511//ZZ    }
6512
6513   /* --------- PUT --------- */
6514   /* write guest state, fixed offset */
6515   case Ist_Put: {
6516      IRType tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
6517      UInt   offs = (UInt)stmt->Ist.Put.offset;
6518      if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
6519         HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6520         ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
6521         addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
6522         return;
6523      }
6524      if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
6525         HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6526         ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
6527         addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
6528         return;
6529      }
6530      if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
6531         HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6532         ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
6533         addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
6534         return;
6535      }
6536      if (tyd == Ity_I8 && offs < (1<<12)) {
6537         HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
6538         ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
6539         addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
6540         return;
6541      }
6542      if (tyd == Ity_V128 && offs < (1<<12)) {
6543         HReg qD   = iselV128Expr(env, stmt->Ist.Put.data);
6544         HReg addr = mk_baseblock_128bit_access_addr(env, offs);
6545         addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
6546         return;
6547      }
6548      if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
6549         HReg dD   = iselDblExpr(env, stmt->Ist.Put.data);
6550         HReg bbp  = get_baseblock_register();
6551         addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
6552         return;
6553      }
6554      if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
6555         HReg dD   = iselFltExpr(env, stmt->Ist.Put.data);
6556         HReg bbp  = get_baseblock_register();
6557         addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, dD, bbp, offs));
6558         return;
6559      }
6560
6561//ZZ        if (tyd == Ity_I64) {
6562//ZZ           if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
6563//ZZ              HReg addr = newVRegI(env);
6564//ZZ              HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
6565//ZZ              addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
6566//ZZ                                                 stmt->Ist.Put.offset));
6567//ZZ              addInstr(env, ARMInstr_NLdStD(False, qD, mkARMAModeN_R(addr)));
6568//ZZ           } else {
6569//ZZ              HReg rDhi, rDlo;
6570//ZZ              ARMAMode1* am0 = ARMAMode1_RI(hregARM_R8(),
6571//ZZ                                            stmt->Ist.Put.offset + 0);
6572//ZZ              ARMAMode1* am4 = ARMAMode1_RI(hregARM_R8(),
6573//ZZ                                            stmt->Ist.Put.offset + 4);
6574//ZZ              iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.Put.data);
6575//ZZ              addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6576//ZZ                                            rDhi, am4));
6577//ZZ              addInstr(env, ARMInstr_LdSt32(ARMcc_AL, False/*!isLoad*/,
6578//ZZ                                            rDlo, am0));
6579//ZZ           }
6580//ZZ           return;
6581//ZZ        }
6582//ZZ        if (tyd == Ity_F64) {
6583//ZZ           // XXX This won't work if offset > 1020 or is not 0 % 4.
6584//ZZ           // In which case we'll have to generate more longwinded code.
6585//ZZ           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6586//ZZ           HReg       rD = iselDblExpr(env, stmt->Ist.Put.data);
6587//ZZ           addInstr(env, ARMInstr_VLdStD(False/*!isLoad*/, rD, am));
6588//ZZ           return;
6589//ZZ        }
6590//ZZ        if (tyd == Ity_F32) {
6591//ZZ           // XXX This won't work if offset > 1020 or is not 0 % 4.
6592//ZZ           // In which case we'll have to generate more longwinded code.
6593//ZZ           ARMAModeV* am = mkARMAModeV(hregARM_R8(), stmt->Ist.Put.offset);
6594//ZZ           HReg       rD = iselFltExpr(env, stmt->Ist.Put.data);
6595//ZZ           addInstr(env, ARMInstr_VLdStS(False/*!isLoad*/, rD, am));
6596//ZZ           return;
6597//ZZ        }
6598      break;
6599   }
6600
6601   /* --------- TMP --------- */
6602   /* assign value to temporary */
6603   case Ist_WrTmp: {
6604      IRTemp tmp = stmt->Ist.WrTmp.tmp;
6605      IRType ty  = typeOfIRTemp(env->type_env, tmp);
6606
6607      if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
6608         /* We could do a lot better here.  But for the time being: */
6609         HReg dst = lookupIRTemp(env, tmp);
6610         HReg rD  = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
6611         addInstr(env, ARM64Instr_MovI(dst, rD));
6612         return;
6613      }
6614      if (ty == Ity_I1) {
6615         /* Here, we are generating a I1 value into a 64 bit register.
6616            Make sure the value in the register is only zero or one,
6617            but no other.  This allows optimisation of the
6618            1Uto64(tmp:I1) case, by making it simply a copy of the
6619            register holding 'tmp'.  The point being that the value in
6620            the register holding 'tmp' can only have been created
6621            here.  LATER: that seems dangerous; safer to do 'tmp & 1'
6622            in that case.  Also, could do this just with a single CINC
6623            insn. */
6624         /* CLONE-01 */
6625         HReg zero = newVRegI(env);
6626         HReg one  = newVRegI(env);
6627         HReg dst  = lookupIRTemp(env, tmp);
6628         addInstr(env, ARM64Instr_Imm64(zero, 0));
6629         addInstr(env, ARM64Instr_Imm64(one,  1));
6630         ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
6631         addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
6632         return;
6633      }
6634      if (ty == Ity_F64) {
6635         HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
6636         HReg dst = lookupIRTemp(env, tmp);
6637         addInstr(env, ARM64Instr_VMov(8, dst, src));
6638         return;
6639      }
6640      if (ty == Ity_F32) {
6641         HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
6642         HReg dst = lookupIRTemp(env, tmp);
6643         addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
6644         return;
6645      }
6646      if (ty == Ity_V128) {
6647         HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
6648         HReg dst = lookupIRTemp(env, tmp);
6649         addInstr(env, ARM64Instr_VMov(16, dst, src));
6650         return;
6651      }
6652      break;
6653   }
6654
6655   /* --------- Call to DIRTY helper --------- */
6656   /* call complex ("dirty") helper function */
6657   case Ist_Dirty: {
6658      IRDirty* d = stmt->Ist.Dirty.details;
6659
6660      /* Figure out the return type, if any. */
6661      IRType retty = Ity_INVALID;
6662      if (d->tmp != IRTemp_INVALID)
6663         retty = typeOfIRTemp(env->type_env, d->tmp);
6664
6665      Bool retty_ok = False;
6666      switch (retty) {
6667         case Ity_INVALID: /* function doesn't return anything */
6668         case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
6669         case Ity_V128:
6670            retty_ok = True; break;
6671         default:
6672            break;
6673      }
6674      if (!retty_ok)
6675         break; /* will go to stmt_fail: */
6676
6677      /* Marshal args, do the call, and set the return value to 0x555..555
6678         if this is a conditional call that returns a value and the
6679         call is skipped. */
6680      UInt   addToSp = 0;
6681      RetLoc rloc    = mk_RetLoc_INVALID();
6682      doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
6683      vassert(is_sane_RetLoc(rloc));
6684
6685      /* Now figure out what to do with the returned value, if any. */
6686      switch (retty) {
6687         case Ity_INVALID: {
6688            /* No return value.  Nothing to do. */
6689            vassert(d->tmp == IRTemp_INVALID);
6690            vassert(rloc.pri == RLPri_None);
6691            vassert(addToSp == 0);
6692            return;
6693         }
6694         case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
6695            vassert(rloc.pri == RLPri_Int);
6696            vassert(addToSp == 0);
6697            /* The returned value is in x0.  Park it in the register
6698               associated with tmp. */
6699            HReg dst = lookupIRTemp(env, d->tmp);
6700            addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
6701            return;
6702         }
6703         case Ity_V128: {
6704            /* The returned value is on the stack, and *retloc tells
6705               us where.  Fish it off the stack and then move the
6706               stack pointer upwards to clear it, as directed by
6707               doHelperCall. */
6708            vassert(rloc.pri == RLPri_V128SpRel);
6709            vassert(rloc.spOff < 256); // stay sane
6710            vassert(addToSp >= 16); // ditto
6711            vassert(addToSp < 256); // ditto
6712            HReg dst = lookupIRTemp(env, d->tmp);
6713            HReg tmp = newVRegI(env); // the address of the returned value
6714            addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
6715            addInstr(env, ARM64Instr_Arith(tmp, tmp,
6716                                           ARM64RIA_I12((UShort)rloc.spOff, 0),
6717                                           True/*isAdd*/ ));
6718            addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
6719            addInstr(env, ARM64Instr_AddToSP(addToSp));
6720            return;
6721         }
6722         default:
6723            /*NOTREACHED*/
6724            vassert(0);
6725      }
6726      break;
6727   }
6728
6729   /* --------- Load Linked and Store Conditional --------- */
6730   case Ist_LLSC: {
6731      if (stmt->Ist.LLSC.storedata == NULL) {
6732         /* LL */
6733         IRTemp res = stmt->Ist.LLSC.result;
6734         IRType ty  = typeOfIRTemp(env->type_env, res);
6735         if (ty == Ity_I64 || ty == Ity_I32
6736             || ty == Ity_I16 || ty == Ity_I8) {
6737            Int  szB   = 0;
6738            HReg r_dst = lookupIRTemp(env, res);
6739            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6740            switch (ty) {
6741               case Ity_I8:  szB = 1; break;
6742               case Ity_I16: szB = 2; break;
6743               case Ity_I32: szB = 4; break;
6744               case Ity_I64: szB = 8; break;
6745               default:      vassert(0);
6746            }
6747            addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
6748            addInstr(env, ARM64Instr_LdrEX(szB));
6749            addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
6750            return;
6751         }
6752         goto stmt_fail;
6753      } else {
6754         /* SC */
6755         IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
6756         if (tyd == Ity_I64 || tyd == Ity_I32
6757             || tyd == Ity_I16 || tyd == Ity_I8) {
6758            Int  szB = 0;
6759            HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
6760            HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
6761            switch (tyd) {
6762               case Ity_I8:  szB = 1; break;
6763               case Ity_I16: szB = 2; break;
6764               case Ity_I32: szB = 4; break;
6765               case Ity_I64: szB = 8; break;
6766               default:      vassert(0);
6767            }
6768            addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
6769            addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
6770            addInstr(env, ARM64Instr_StrEX(szB));
6771         } else {
6772            goto stmt_fail;
6773         }
6774         /* now r0 is 1 if failed, 0 if success.  Change to IR
6775            conventions (0 is fail, 1 is success).  Also transfer
6776            result to r_res. */
6777         IRTemp    res   = stmt->Ist.LLSC.result;
6778         IRType    ty    = typeOfIRTemp(env->type_env, res);
6779         HReg      r_res = lookupIRTemp(env, res);
6780         ARM64RIL* one   = mb_mkARM64RIL_I(1);
6781         vassert(ty == Ity_I1);
6782         vassert(one);
6783         addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
6784                                        ARM64lo_XOR));
6785         /* And be conservative -- mask off all but the lowest bit. */
6786         addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
6787                                        ARM64lo_AND));
6788         return;
6789      }
6790      break;
6791   }
6792
6793   /* --------- MEM FENCE --------- */
6794   case Ist_MBE:
6795      switch (stmt->Ist.MBE.event) {
6796         case Imbe_Fence:
6797            addInstr(env, ARM64Instr_MFence());
6798            return;
6799//ZZ          case Imbe_CancelReservation:
6800//ZZ             addInstr(env, ARMInstr_CLREX());
6801//ZZ             return;
6802         default:
6803            break;
6804      }
6805      break;
6806
6807   /* --------- INSTR MARK --------- */
6808   /* Doesn't generate any executable code ... */
6809   case Ist_IMark:
6810       return;
6811
6812   /* --------- NO-OP --------- */
6813   case Ist_NoOp:
6814       return;
6815
6816   /* --------- EXIT --------- */
6817   case Ist_Exit: {
6818      if (stmt->Ist.Exit.dst->tag != Ico_U64)
6819         vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
6820
6821      ARM64CondCode cc
6822         = iselCondCode(env, stmt->Ist.Exit.guard);
6823      ARM64AMode* amPC
6824         = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
6825
6826      /* Case: boring transfer to known address */
6827      if (stmt->Ist.Exit.jk == Ijk_Boring
6828          /*ATC || stmt->Ist.Exit.jk == Ijk_Call */
6829          /*ATC || stmt->Ist.Exit.jk == Ijk_Ret */ ) {
6830         if (env->chainingAllowed) {
6831            /* .. almost always true .. */
6832            /* Skip the event check at the dst if this is a forwards
6833               edge. */
6834            Bool toFastEP
6835               = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
6836            if (0) vex_printf("%s", toFastEP ? "Y" : ",");
6837            addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
6838                                             amPC, cc, toFastEP));
6839         } else {
6840            /* .. very occasionally .. */
6841            /* We can't use chaining, so ask for an assisted transfer,
6842               as that's the only alternative that is allowable. */
6843            HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6844            addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
6845         }
6846         return;
6847      }
6848
6849//ZZ       /* Case: assisted transfer to arbitrary address */
6850//ZZ       switch (stmt->Ist.Exit.jk) {
6851//ZZ          /* Keep this list in sync with that in iselNext below */
6852//ZZ          case Ijk_ClientReq:
6853//ZZ          case Ijk_NoDecode:
6854//ZZ          case Ijk_NoRedir:
6855//ZZ          case Ijk_Sys_syscall:
6856//ZZ          case Ijk_InvalICache:
6857//ZZ          case Ijk_Yield:
6858//ZZ          {
6859//ZZ             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
6860//ZZ             addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
6861//ZZ                                              stmt->Ist.Exit.jk));
6862//ZZ             return;
6863//ZZ          }
6864//ZZ          default:
6865//ZZ             break;
6866//ZZ       }
6867
6868      /* Do we ever expect to see any other kind? */
6869      goto stmt_fail;
6870   }
6871
6872   default: break;
6873   }
6874  stmt_fail:
6875   ppIRStmt(stmt);
6876   vpanic("iselStmt");
6877}
6878
6879
6880/*---------------------------------------------------------*/
6881/*--- ISEL: Basic block terminators (Nexts)             ---*/
6882/*---------------------------------------------------------*/
6883
6884static void iselNext ( ISelEnv* env,
6885                       IRExpr* next, IRJumpKind jk, Int offsIP )
6886{
6887   if (vex_traceflags & VEX_TRACE_VCODE) {
6888      vex_printf( "\n-- PUT(%d) = ", offsIP);
6889      ppIRExpr( next );
6890      vex_printf( "; exit-");
6891      ppIRJumpKind(jk);
6892      vex_printf( "\n");
6893   }
6894
6895   /* Case: boring transfer to known address */
6896   if (next->tag == Iex_Const) {
6897      IRConst* cdst = next->Iex.Const.con;
6898      vassert(cdst->tag == Ico_U64);
6899      if (jk == Ijk_Boring || jk == Ijk_Call) {
6900         /* Boring transfer to known address */
6901         ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6902         if (env->chainingAllowed) {
6903            /* .. almost always true .. */
6904            /* Skip the event check at the dst if this is a forwards
6905               edge. */
6906            Bool toFastEP
6907               = ((Addr64)cdst->Ico.U64) > env->max_ga;
6908            if (0) vex_printf("%s", toFastEP ? "X" : ".");
6909            addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
6910                                             amPC, ARM64cc_AL,
6911                                             toFastEP));
6912         } else {
6913            /* .. very occasionally .. */
6914            /* We can't use chaining, so ask for an assisted transfer,
6915               as that's the only alternative that is allowable. */
6916            HReg r = iselIntExpr_R(env, next);
6917            addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
6918                                               Ijk_Boring));
6919         }
6920         return;
6921      }
6922   }
6923
6924   /* Case: call/return (==boring) transfer to any address */
6925   switch (jk) {
6926      case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
6927         HReg        r    = iselIntExpr_R(env, next);
6928         ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6929         if (env->chainingAllowed) {
6930            addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
6931         } else {
6932            addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
6933                                               Ijk_Boring));
6934         }
6935         return;
6936      }
6937      default:
6938         break;
6939   }
6940
6941   /* Case: assisted transfer to arbitrary address */
6942   switch (jk) {
6943      /* Keep this list in sync with that for Ist_Exit above */
6944      case Ijk_ClientReq:
6945      case Ijk_NoDecode:
6946      case Ijk_NoRedir:
6947      case Ijk_Sys_syscall:
6948      case Ijk_InvalICache:
6949      case Ijk_FlushDCache:
6950//ZZ       case Ijk_Yield:
6951      {
6952         HReg        r    = iselIntExpr_R(env, next);
6953         ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
6954         addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
6955         return;
6956      }
6957      default:
6958         break;
6959   }
6960
6961   vex_printf( "\n-- PUT(%d) = ", offsIP);
6962   ppIRExpr( next );
6963   vex_printf( "; exit-");
6964   ppIRJumpKind(jk);
6965   vex_printf( "\n");
6966   vassert(0); // are we expecting any other kind?
6967}
6968
6969
6970/*---------------------------------------------------------*/
6971/*--- Insn selector top-level                           ---*/
6972/*---------------------------------------------------------*/
6973
6974/* Translate an entire SB to arm64 code. */
6975
6976HInstrArray* iselSB_ARM64 ( IRSB* bb,
6977                            VexArch      arch_host,
6978                            VexArchInfo* archinfo_host,
6979                            VexAbiInfo*  vbi/*UNUSED*/,
6980                            Int offs_Host_EvC_Counter,
6981                            Int offs_Host_EvC_FailAddr,
6982                            Bool chainingAllowed,
6983                            Bool addProfInc,
6984                            Addr64 max_ga )
6985{
6986   Int        i, j;
6987   HReg       hreg, hregHI;
6988   ISelEnv*   env;
6989   UInt       hwcaps_host = archinfo_host->hwcaps;
6990   ARM64AMode *amCounter, *amFailAddr;
6991
6992   /* sanity ... */
6993   vassert(arch_host == VexArchARM64);
6994
6995   /* guard against unexpected space regressions */
6996   vassert(sizeof(ARM64Instr) <= 32);
6997
6998   /* Make up an initial environment to use. */
6999   env = LibVEX_Alloc(sizeof(ISelEnv));
7000   env->vreg_ctr = 0;
7001
7002   /* Set up output code array. */
7003   env->code = newHInstrArray();
7004
7005   /* Copy BB's type env. */
7006   env->type_env = bb->tyenv;
7007
7008   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
7009      change as we go along. */
7010   env->n_vregmap = bb->tyenv->types_used;
7011   env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
7012   env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
7013
7014   /* and finally ... */
7015   env->chainingAllowed = chainingAllowed;
7016   env->hwcaps          = hwcaps_host;
7017   env->previous_rm     = NULL;
7018   env->max_ga          = max_ga;
7019
7020   /* For each IR temporary, allocate a suitably-kinded virtual
7021      register. */
7022   j = 0;
7023   for (i = 0; i < env->n_vregmap; i++) {
7024      hregHI = hreg = INVALID_HREG;
7025      switch (bb->tyenv->types[i]) {
7026         case Ity_I1:
7027         case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
7028            hreg = mkHReg(j++, HRcInt64, True);
7029            break;
7030         case Ity_I128:
7031            hreg   = mkHReg(j++, HRcInt64, True);
7032            hregHI = mkHReg(j++, HRcInt64, True);
7033            break;
7034         case Ity_F32: // we'll use HRcFlt64 regs for F32 too
7035         case Ity_F64:
7036            hreg = mkHReg(j++, HRcFlt64, True);
7037            break;
7038         case Ity_V128:
7039            hreg = mkHReg(j++, HRcVec128, True);
7040            break;
7041         default:
7042            ppIRType(bb->tyenv->types[i]);
7043            vpanic("iselBB(arm64): IRTemp type");
7044      }
7045      env->vregmap[i]   = hreg;
7046      env->vregmapHI[i] = hregHI;
7047   }
7048   env->vreg_ctr = j;
7049
7050   /* The very first instruction must be an event check. */
7051   amCounter  = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
7052   amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
7053   addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
7054
7055   /* Possibly a block counter increment (for profiling).  At this
7056      point we don't know the address of the counter, so just pretend
7057      it is zero.  It will have to be patched later, but before this
7058      translation is used, by a call to LibVEX_patchProfCtr. */
7059   if (addProfInc) {
7060      vassert(0);
7061      //addInstr(env, ARM64Instr_ProfInc());
7062   }
7063
7064   /* Ok, finally we can iterate over the statements. */
7065   for (i = 0; i < bb->stmts_used; i++)
7066      iselStmt(env, bb->stmts[i]);
7067
7068   iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
7069
7070   /* record the number of vregs we used. */
7071   env->code->n_vregs = env->vreg_ctr;
7072   return env->code;
7073}
7074
7075
7076/*---------------------------------------------------------------*/
7077/*--- end                                   host_arm64_isel.c ---*/
7078/*---------------------------------------------------------------*/
7079