1
2/*---------------------------------------------------------------*/
3/*--- begin                                 host_arm64_isel.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2013-2015 OpenWorks
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "libvex_basictypes.h"
32#include "libvex_ir.h"
33#include "libvex.h"
34#include "ir_match.h"
35
36#include "main_util.h"
37#include "main_globals.h"
38#include "host_generic_regs.h"
39#include "host_generic_simd64.h"  // for 32-bit SIMD helpers
40#include "host_arm64_defs.h"
41
42
43/*---------------------------------------------------------*/
44/*--- ISelEnv                                           ---*/
45/*---------------------------------------------------------*/
46
47/* This carries around:
48
49   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
50     might encounter.  This is computed before insn selection starts,
51     and does not change.
52
53   - A mapping from IRTemp to HReg.  This tells the insn selector
54     which virtual register is associated with each IRTemp temporary.
55     This is computed before insn selection starts, and does not
56     change.  We expect this mapping to map precisely the same set of
57     IRTemps as the type mapping does.
58
59     |vregmap|   holds the primary register for the IRTemp.
60     |vregmapHI| is only used for 128-bit integer-typed
61                 IRTemps.  It holds the identity of a second
62                 64-bit virtual HReg, which holds the high half
63                 of the value.
64
65   - The code array, that is, the insns selected so far.
66
67   - A counter, for generating new virtual registers.
68
69   - The host hardware capabilities word.  This is set at the start
70     and does not change.
71
72   - A Bool for indicating whether we may generate chain-me
73     instructions for control flow transfers, or whether we must use
74     XAssisted.
75
76   - The maximum guest address of any guest insn in this block.
77     Actually, the address of the highest-addressed byte from any insn
78     in this block.  Is set at the start and does not change.  This is
79     used for detecting jumps which are definitely forward-edges from
80     this block, and therefore can be made (chained) to the fast entry
81     point of the destination, thereby avoiding the destination's
82     event check.
83
84    - An IRExpr*, which may be NULL, holding the IR expression (an
85      IRRoundingMode-encoded value) to which the FPU's rounding mode
86      was most recently set.  Setting to NULL is always safe.  Used to
87      avoid redundant settings of the FPU's rounding mode, as
88      described in set_FPCR_rounding_mode below.
89
90   Note, this is all (well, mostly) host-independent.
91*/
92
93typedef
94   struct {
95      /* Constant -- are set at the start and do not change. */
96      IRTypeEnv*   type_env;
97
98      HReg*        vregmap;
99      HReg*        vregmapHI;
100      Int          n_vregmap;
101
102      UInt         hwcaps;
103
104      Bool         chainingAllowed;
105      Addr64       max_ga;
106
107      /* These are modified as we go along. */
108      HInstrArray* code;
109      Int          vreg_ctr;
110
111      IRExpr*      previous_rm;
112   }
113   ISelEnv;
114
115static HReg lookupIRTemp ( ISelEnv* env, IRTemp tmp )
116{
117   vassert(tmp >= 0);
118   vassert(tmp < env->n_vregmap);
119   return env->vregmap[tmp];
120}
121
122static void lookupIRTempPair ( HReg* vrHI, HReg* vrLO,
123                               ISelEnv* env, IRTemp tmp )
124{
125   vassert(tmp >= 0);
126   vassert(tmp < env->n_vregmap);
127   vassert(! hregIsInvalid(env->vregmapHI[tmp]));
128   *vrLO = env->vregmap[tmp];
129   *vrHI = env->vregmapHI[tmp];
130}
131
132static void addInstr ( ISelEnv* env, ARM64Instr* instr )
133{
134   addHInstr(env->code, instr);
135   if (vex_traceflags & VEX_TRACE_VCODE) {
136      ppARM64Instr(instr);
137      vex_printf("\n");
138   }
139}
140
141static HReg newVRegI ( ISelEnv* env )
142{
143   HReg reg = mkHReg(True/*virtual reg*/, HRcInt64, 0, env->vreg_ctr);
144   env->vreg_ctr++;
145   return reg;
146}
147
148static HReg newVRegD ( ISelEnv* env )
149{
150   HReg reg = mkHReg(True/*virtual reg*/, HRcFlt64, 0, env->vreg_ctr);
151   env->vreg_ctr++;
152   return reg;
153}
154
155static HReg newVRegV ( ISelEnv* env )
156{
157   HReg reg = mkHReg(True/*virtual reg*/, HRcVec128, 0, env->vreg_ctr);
158   env->vreg_ctr++;
159   return reg;
160}
161
162
163/*---------------------------------------------------------*/
164/*--- ISEL: Forward declarations                        ---*/
165/*---------------------------------------------------------*/
166
167/* These are organised as iselXXX and iselXXX_wrk pairs.  The
168   iselXXX_wrk do the real work, but are not to be called directly.
169   For each XXX, iselXXX calls its iselXXX_wrk counterpart, then
170   checks that all returned registers are virtual.  You should not
171   call the _wrk version directly.
172
173   Because some forms of ARM64 memory amodes are implicitly scaled by
174   the access size, iselIntExpr_AMode takes an IRType which tells it
175   the type of the access for which the amode is to be used.  This
176   type needs to be correct, else you'll get incorrect code.
177*/
178static ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env,
179                                           IRExpr* e, IRType dty );
180static ARM64AMode* iselIntExpr_AMode     ( ISelEnv* env,
181                                           IRExpr* e, IRType dty );
182
183static ARM64RIA*   iselIntExpr_RIA_wrk   ( ISelEnv* env, IRExpr* e );
184static ARM64RIA*   iselIntExpr_RIA       ( ISelEnv* env, IRExpr* e );
185
186static ARM64RIL*   iselIntExpr_RIL_wrk   ( ISelEnv* env, IRExpr* e );
187static ARM64RIL*   iselIntExpr_RIL       ( ISelEnv* env, IRExpr* e );
188
189static ARM64RI6*   iselIntExpr_RI6_wrk   ( ISelEnv* env, IRExpr* e );
190static ARM64RI6*   iselIntExpr_RI6       ( ISelEnv* env, IRExpr* e );
191
192static ARM64CondCode iselCondCode_wrk    ( ISelEnv* env, IRExpr* e );
193static ARM64CondCode iselCondCode        ( ISelEnv* env, IRExpr* e );
194
195static HReg        iselIntExpr_R_wrk     ( ISelEnv* env, IRExpr* e );
196static HReg        iselIntExpr_R         ( ISelEnv* env, IRExpr* e );
197
198static void        iselInt128Expr_wrk    ( /*OUT*/HReg* rHi, HReg* rLo,
199                                           ISelEnv* env, IRExpr* e );
200static void        iselInt128Expr        ( /*OUT*/HReg* rHi, HReg* rLo,
201                                           ISelEnv* env, IRExpr* e );
202
203static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
204static HReg        iselDblExpr            ( ISelEnv* env, IRExpr* e );
205
206static HReg        iselFltExpr_wrk        ( ISelEnv* env, IRExpr* e );
207static HReg        iselFltExpr            ( ISelEnv* env, IRExpr* e );
208
209static HReg        iselF16Expr_wrk        ( ISelEnv* env, IRExpr* e );
210static HReg        iselF16Expr            ( ISelEnv* env, IRExpr* e );
211
212static HReg        iselV128Expr_wrk       ( ISelEnv* env, IRExpr* e );
213static HReg        iselV128Expr           ( ISelEnv* env, IRExpr* e );
214
215static void        iselV256Expr_wrk       ( /*OUT*/HReg* rHi, HReg* rLo,
216                                            ISelEnv* env, IRExpr* e );
217static void        iselV256Expr           ( /*OUT*/HReg* rHi, HReg* rLo,
218                                            ISelEnv* env, IRExpr* e );
219
220static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 );
221
222
223/*---------------------------------------------------------*/
224/*--- ISEL: Misc helpers                                ---*/
225/*---------------------------------------------------------*/
226
227/* Generate an amode suitable for a 64-bit sized access relative to
228   the baseblock register (X21).  This generates an RI12 amode, which
229   means its scaled by the access size, which is why the access size
230   -- 64 bit -- is stated explicitly here.  Consequently |off| needs
231   to be divisible by 8. */
232static ARM64AMode* mk_baseblock_64bit_access_amode ( UInt off )
233{
234   vassert(off < (8 << 12)); /* otherwise it's unrepresentable */
235   vassert((off & 7) == 0);  /* ditto */
236   return ARM64AMode_RI12(hregARM64_X21(), off >> 3, 8/*scale*/);
237}
238
239/* Ditto, for 32 bit accesses. */
240static ARM64AMode* mk_baseblock_32bit_access_amode ( UInt off )
241{
242   vassert(off < (4 << 12)); /* otherwise it's unrepresentable */
243   vassert((off & 3) == 0);  /* ditto */
244   return ARM64AMode_RI12(hregARM64_X21(), off >> 2, 4/*scale*/);
245}
246
247/* Ditto, for 16 bit accesses. */
248static ARM64AMode* mk_baseblock_16bit_access_amode ( UInt off )
249{
250   vassert(off < (2 << 12)); /* otherwise it's unrepresentable */
251   vassert((off & 1) == 0);  /* ditto */
252   return ARM64AMode_RI12(hregARM64_X21(), off >> 1, 2/*scale*/);
253}
254
255/* Ditto, for 8 bit accesses. */
256static ARM64AMode* mk_baseblock_8bit_access_amode ( UInt off )
257{
258   vassert(off < (1 << 12)); /* otherwise it's unrepresentable */
259   return ARM64AMode_RI12(hregARM64_X21(), off >> 0, 1/*scale*/);
260}
261
262static HReg mk_baseblock_128bit_access_addr ( ISelEnv* env, UInt off )
263{
264   vassert(off < (1<<12));
265   HReg r = newVRegI(env);
266   addInstr(env, ARM64Instr_Arith(r, hregARM64_X21(),
267                                     ARM64RIA_I12(off,0), True/*isAdd*/));
268   return r;
269}
270
271static HReg get_baseblock_register ( void )
272{
273   return hregARM64_X21();
274}
275
276/* Generate code to zero extend a 32 bit value in 'src' to 64 bits, in
277   a new register, and return the new register. */
278static HReg widen_z_32_to_64 ( ISelEnv* env, HReg src )
279{
280   HReg      dst  = newVRegI(env);
281   ARM64RIL* mask = ARM64RIL_I13(1, 0, 31); /* encodes 0xFFFFFFFF */
282   addInstr(env, ARM64Instr_Logic(dst, src, mask, ARM64lo_AND));
283   return dst;
284}
285
286/* Generate code to sign extend a 16 bit value in 'src' to 64 bits, in
287   a new register, and return the new register. */
288static HReg widen_s_16_to_64 ( ISelEnv* env, HReg src )
289{
290   HReg      dst = newVRegI(env);
291   ARM64RI6* n48 = ARM64RI6_I6(48);
292   addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
293   addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SAR));
294   return dst;
295}
296
297/* Generate code to zero extend a 16 bit value in 'src' to 64 bits, in
298   a new register, and return the new register. */
299static HReg widen_z_16_to_64 ( ISelEnv* env, HReg src )
300{
301   HReg      dst = newVRegI(env);
302   ARM64RI6* n48 = ARM64RI6_I6(48);
303   addInstr(env, ARM64Instr_Shift(dst, src, n48, ARM64sh_SHL));
304   addInstr(env, ARM64Instr_Shift(dst, dst, n48, ARM64sh_SHR));
305   return dst;
306}
307
308/* Generate code to sign extend a 32 bit value in 'src' to 64 bits, in
309   a new register, and return the new register. */
310static HReg widen_s_32_to_64 ( ISelEnv* env, HReg src )
311{
312   HReg      dst = newVRegI(env);
313   ARM64RI6* n32 = ARM64RI6_I6(32);
314   addInstr(env, ARM64Instr_Shift(dst, src, n32, ARM64sh_SHL));
315   addInstr(env, ARM64Instr_Shift(dst, dst, n32, ARM64sh_SAR));
316   return dst;
317}
318
319/* Generate code to sign extend a 8 bit value in 'src' to 64 bits, in
320   a new register, and return the new register. */
321static HReg widen_s_8_to_64 ( ISelEnv* env, HReg src )
322{
323   HReg      dst = newVRegI(env);
324   ARM64RI6* n56 = ARM64RI6_I6(56);
325   addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
326   addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SAR));
327   return dst;
328}
329
330static HReg widen_z_8_to_64 ( ISelEnv* env, HReg src )
331{
332   HReg      dst = newVRegI(env);
333   ARM64RI6* n56 = ARM64RI6_I6(56);
334   addInstr(env, ARM64Instr_Shift(dst, src, n56, ARM64sh_SHL));
335   addInstr(env, ARM64Instr_Shift(dst, dst, n56, ARM64sh_SHR));
336   return dst;
337}
338
339/* Is this IRExpr_Const(IRConst_U64(0)) ? */
340static Bool isZeroU64 ( IRExpr* e ) {
341   if (e->tag != Iex_Const) return False;
342   IRConst* con = e->Iex.Const.con;
343   vassert(con->tag == Ico_U64);
344   return con->Ico.U64 == 0;
345}
346
347
348/*---------------------------------------------------------*/
349/*--- ISEL: FP rounding mode helpers                    ---*/
350/*---------------------------------------------------------*/
351
352/* Set the FP rounding mode: 'mode' is an I32-typed expression
353   denoting a value in the range 0 .. 3, indicating a round mode
354   encoded as per type IRRoundingMode -- the first four values only
355   (Irrm_NEAREST, Irrm_NegINF, Irrm_PosINF, Irrm_ZERO).  Set the ARM64
356   FSCR to have the same rounding.
357
358   For speed & simplicity, we're setting the *entire* FPCR here.
359
360   Setting the rounding mode is expensive.  So this function tries to
361   avoid repeatedly setting the rounding mode to the same thing by
362   first comparing 'mode' to the 'mode' tree supplied in the previous
363   call to this function, if any.  (The previous value is stored in
364   env->previous_rm.)  If 'mode' is a single IR temporary 't' and
365   env->previous_rm is also just 't', then the setting is skipped.
366
367   This is safe because of the SSA property of IR: an IR temporary can
368   only be defined once and so will have the same value regardless of
369   where it appears in the block.  Cool stuff, SSA.
370
371   A safety condition: all attempts to set the RM must be aware of
372   this mechanism - by being routed through the functions here.
373
374   Of course this only helps if blocks where the RM is set more than
375   once and it is set to the same value each time, *and* that value is
376   held in the same IR temporary each time.  In order to assure the
377   latter as much as possible, the IR optimiser takes care to do CSE
378   on any block with any sign of floating point activity.
379*/
380static
381void set_FPCR_rounding_mode ( ISelEnv* env, IRExpr* mode )
382{
383   vassert(typeOfIRExpr(env->type_env,mode) == Ity_I32);
384
385   /* Do we need to do anything? */
386   if (env->previous_rm
387       && env->previous_rm->tag == Iex_RdTmp
388       && mode->tag == Iex_RdTmp
389       && env->previous_rm->Iex.RdTmp.tmp == mode->Iex.RdTmp.tmp) {
390      /* no - setting it to what it was before.  */
391      vassert(typeOfIRExpr(env->type_env, env->previous_rm) == Ity_I32);
392      return;
393   }
394
395   /* No luck - we better set it, and remember what we set it to. */
396   env->previous_rm = mode;
397
398   /* Only supporting the rounding-mode bits - the rest of FPCR is set
399      to zero - so we can set the whole register at once (faster). */
400
401   /* This isn't simple, because 'mode' carries an IR rounding
402      encoding, and we need to translate that to an ARM64 FP one:
403      The IR encoding:
404         00  to nearest (the default)
405         10  to +infinity
406         01  to -infinity
407         11  to zero
408      The ARM64 FP encoding:
409         00  to nearest
410         01  to +infinity
411         10  to -infinity
412         11  to zero
413      Easy enough to do; just swap the two bits.
414   */
415   HReg irrm = iselIntExpr_R(env, mode);
416   HReg tL   = newVRegI(env);
417   HReg tR   = newVRegI(env);
418   HReg t3   = newVRegI(env);
419   /* tL = irrm << 1;
420      tR = irrm >> 1;  if we're lucky, these will issue together
421      tL &= 2;
422      tR &= 1;         ditto
423      t3 = tL | tR;
424      t3 <<= 22;
425      fmxr fpscr, t3
426   */
427   ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
428   ARM64RIL* ril_two = mb_mkARM64RIL_I(2);
429   vassert(ril_one && ril_two);
430   addInstr(env, ARM64Instr_Shift(tL, irrm, ARM64RI6_I6(1), ARM64sh_SHL));
431   addInstr(env, ARM64Instr_Shift(tR, irrm, ARM64RI6_I6(1), ARM64sh_SHR));
432   addInstr(env, ARM64Instr_Logic(tL, tL, ril_two, ARM64lo_AND));
433   addInstr(env, ARM64Instr_Logic(tR, tR, ril_one, ARM64lo_AND));
434   addInstr(env, ARM64Instr_Logic(t3, tL, ARM64RIL_R(tR), ARM64lo_OR));
435   addInstr(env, ARM64Instr_Shift(t3, t3, ARM64RI6_I6(22), ARM64sh_SHL));
436   addInstr(env, ARM64Instr_FPCR(True/*toFPCR*/, t3));
437}
438
439
440/*---------------------------------------------------------*/
441/*--- ISEL: Function call helpers                       ---*/
442/*---------------------------------------------------------*/
443
444/* Used only in doHelperCall.  See big comment in doHelperCall re
445   handling of register-parameter args.  This function figures out
446   whether evaluation of an expression might require use of a fixed
447   register.  If in doubt return True (safe but suboptimal).
448*/
449static
450Bool mightRequireFixedRegs ( IRExpr* e )
451{
452   if (UNLIKELY(is_IRExpr_VECRET_or_BBPTR(e))) {
453      // These are always "safe" -- either a copy of SP in some
454      // arbitrary vreg, or a copy of x21, respectively.
455      return False;
456   }
457   /* Else it's a "normal" expression. */
458   switch (e->tag) {
459      case Iex_RdTmp: case Iex_Const: case Iex_Get:
460         return False;
461      default:
462         return True;
463   }
464}
465
466
467/* Do a complete function call.  |guard| is a Ity_Bit expression
468   indicating whether or not the call happens.  If guard==NULL, the
469   call is unconditional.  |retloc| is set to indicate where the
470   return value is after the call.  The caller (of this fn) must
471   generate code to add |stackAdjustAfterCall| to the stack pointer
472   after the call is done.  Returns True iff it managed to handle this
473   combination of arg/return types, else returns False. */
474
475static
476Bool doHelperCall ( /*OUT*/UInt*   stackAdjustAfterCall,
477                    /*OUT*/RetLoc* retloc,
478                    ISelEnv* env,
479                    IRExpr* guard,
480                    IRCallee* cee, IRType retTy, IRExpr** args )
481{
482   ARM64CondCode cc;
483   HReg          argregs[ARM64_N_ARGREGS];
484   HReg          tmpregs[ARM64_N_ARGREGS];
485   Bool          go_fast;
486   Int           n_args, i, nextArgReg;
487   Addr64        target;
488
489   vassert(ARM64_N_ARGREGS == 8);
490
491   /* Set default returns.  We'll update them later if needed. */
492   *stackAdjustAfterCall = 0;
493   *retloc               = mk_RetLoc_INVALID();
494
495   /* These are used for cross-checking that IR-level constraints on
496      the use of IRExpr_VECRET() and IRExpr_BBPTR() are observed. */
497   UInt nVECRETs = 0;
498   UInt nBBPTRs  = 0;
499
500   /* Marshal args for a call and do the call.
501
502      This function only deals with a tiny set of possibilities, which
503      cover all helpers in practice.  The restrictions are that only
504      arguments in registers are supported, hence only
505      ARM64_N_REGPARMS x 64 integer bits in total can be passed.  In
506      fact the only supported arg type is I64.
507
508      The return type can be I{64,32} or V128.  In the V128 case, it
509      is expected that |args| will contain the special node
510      IRExpr_VECRET(), in which case this routine generates code to
511      allocate space on the stack for the vector return value.  Since
512      we are not passing any scalars on the stack, it is enough to
513      preallocate the return space before marshalling any arguments,
514      in this case.
515
516      |args| may also contain IRExpr_BBPTR(), in which case the
517      value in x21 is passed as the corresponding argument.
518
519      Generating code which is both efficient and correct when
520      parameters are to be passed in registers is difficult, for the
521      reasons elaborated in detail in comments attached to
522      doHelperCall() in priv/host-x86/isel.c.  Here, we use a variant
523      of the method described in those comments.
524
525      The problem is split into two cases: the fast scheme and the
526      slow scheme.  In the fast scheme, arguments are computed
527      directly into the target (real) registers.  This is only safe
528      when we can be sure that computation of each argument will not
529      trash any real registers set by computation of any other
530      argument.
531
532      In the slow scheme, all args are first computed into vregs, and
533      once they are all done, they are moved to the relevant real
534      regs.  This always gives correct code, but it also gives a bunch
535      of vreg-to-rreg moves which are usually redundant but are hard
536      for the register allocator to get rid of.
537
538      To decide which scheme to use, all argument expressions are
539      first examined.  If they are all so simple that it is clear they
540      will be evaluated without use of any fixed registers, use the
541      fast scheme, else use the slow scheme.  Note also that only
542      unconditional calls may use the fast scheme, since having to
543      compute a condition expression could itself trash real
544      registers.
545
546      Note this requires being able to examine an expression and
547      determine whether or not evaluation of it might use a fixed
548      register.  That requires knowledge of how the rest of this insn
549      selector works.  Currently just the following 3 are regarded as
550      safe -- hopefully they cover the majority of arguments in
551      practice: IRExpr_Tmp IRExpr_Const IRExpr_Get.
552   */
553
554   /* Note that the cee->regparms field is meaningless on ARM64 hosts
555      (since there is only one calling convention) and so we always
556      ignore it. */
557
558   n_args = 0;
559   for (i = 0; args[i]; i++) {
560      IRExpr* arg = args[i];
561      if (UNLIKELY(arg->tag == Iex_VECRET)) {
562         nVECRETs++;
563      } else if (UNLIKELY(arg->tag == Iex_BBPTR)) {
564         nBBPTRs++;
565      }
566      n_args++;
567   }
568
569   /* If this fails, the IR is ill-formed */
570   vassert(nBBPTRs == 0 || nBBPTRs == 1);
571
572   /* If we have a VECRET, allocate space on the stack for the return
573      value, and record the stack pointer after that. */
574   HReg r_vecRetAddr = INVALID_HREG;
575   if (nVECRETs == 1) {
576      vassert(retTy == Ity_V128 || retTy == Ity_V256);
577      vassert(retTy != Ity_V256); // we don't handle that yet (if ever)
578      r_vecRetAddr = newVRegI(env);
579      addInstr(env, ARM64Instr_AddToSP(-16));
580      addInstr(env, ARM64Instr_FromSP(r_vecRetAddr));
581   } else {
582      // If either of these fail, the IR is ill-formed
583      vassert(retTy != Ity_V128 && retTy != Ity_V256);
584      vassert(nVECRETs == 0);
585   }
586
587   argregs[0] = hregARM64_X0();
588   argregs[1] = hregARM64_X1();
589   argregs[2] = hregARM64_X2();
590   argregs[3] = hregARM64_X3();
591   argregs[4] = hregARM64_X4();
592   argregs[5] = hregARM64_X5();
593   argregs[6] = hregARM64_X6();
594   argregs[7] = hregARM64_X7();
595
596   tmpregs[0] = tmpregs[1] = tmpregs[2] = tmpregs[3] = INVALID_HREG;
597   tmpregs[4] = tmpregs[5] = tmpregs[6] = tmpregs[7] = INVALID_HREG;
598
599   /* First decide which scheme (slow or fast) is to be used.  First
600      assume the fast scheme, and select slow if any contraindications
601      (wow) appear. */
602
603   go_fast = True;
604
605   if (guard) {
606      if (guard->tag == Iex_Const
607          && guard->Iex.Const.con->tag == Ico_U1
608          && guard->Iex.Const.con->Ico.U1 == True) {
609         /* unconditional */
610      } else {
611         /* Not manifestly unconditional -- be conservative. */
612         go_fast = False;
613      }
614   }
615
616   if (go_fast) {
617      for (i = 0; i < n_args; i++) {
618         if (mightRequireFixedRegs(args[i])) {
619            go_fast = False;
620            break;
621         }
622      }
623   }
624
625   if (go_fast) {
626      if (retTy == Ity_V128 || retTy == Ity_V256)
627         go_fast = False;
628   }
629
630   /* At this point the scheme to use has been established.  Generate
631      code to get the arg values into the argument rregs.  If we run
632      out of arg regs, give up. */
633
634   if (go_fast) {
635
636      /* FAST SCHEME */
637      nextArgReg = 0;
638
639      for (i = 0; i < n_args; i++) {
640         IRExpr* arg = args[i];
641
642         IRType  aTy = Ity_INVALID;
643         if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
644            aTy = typeOfIRExpr(env->type_env, args[i]);
645
646         if (nextArgReg >= ARM64_N_ARGREGS)
647            return False; /* out of argregs */
648
649         if (aTy == Ity_I64) {
650            addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
651                                           iselIntExpr_R(env, args[i]) ));
652            nextArgReg++;
653         }
654         else if (arg->tag == Iex_BBPTR) {
655            vassert(0); //ATC
656            addInstr(env, ARM64Instr_MovI( argregs[nextArgReg],
657                                           hregARM64_X21() ));
658            nextArgReg++;
659         }
660         else if (arg->tag == Iex_VECRET) {
661            // because of the go_fast logic above, we can't get here,
662            // since vector return values makes us use the slow path
663            // instead.
664            vassert(0);
665         }
666         else
667            return False; /* unhandled arg type */
668      }
669
670      /* Fast scheme only applies for unconditional calls.  Hence: */
671      cc = ARM64cc_AL;
672
673   } else {
674
675      /* SLOW SCHEME; move via temporaries */
676      nextArgReg = 0;
677
678      for (i = 0; i < n_args; i++) {
679         IRExpr* arg = args[i];
680
681         IRType  aTy = Ity_INVALID;
682         if (LIKELY(!is_IRExpr_VECRET_or_BBPTR(arg)))
683            aTy = typeOfIRExpr(env->type_env, args[i]);
684
685         if (nextArgReg >= ARM64_N_ARGREGS)
686            return False; /* out of argregs */
687
688         if (aTy == Ity_I64) {
689            tmpregs[nextArgReg] = iselIntExpr_R(env, args[i]);
690            nextArgReg++;
691         }
692         else if (arg->tag == Iex_BBPTR) {
693            vassert(0); //ATC
694            tmpregs[nextArgReg] = hregARM64_X21();
695            nextArgReg++;
696         }
697         else if (arg->tag == Iex_VECRET) {
698            vassert(!hregIsInvalid(r_vecRetAddr));
699            tmpregs[nextArgReg] = r_vecRetAddr;
700            nextArgReg++;
701         }
702         else
703            return False; /* unhandled arg type */
704      }
705
706      /* Now we can compute the condition.  We can't do it earlier
707         because the argument computations could trash the condition
708         codes.  Be a bit clever to handle the common case where the
709         guard is 1:Bit. */
710      cc = ARM64cc_AL;
711      if (guard) {
712         if (guard->tag == Iex_Const
713             && guard->Iex.Const.con->tag == Ico_U1
714             && guard->Iex.Const.con->Ico.U1 == True) {
715            /* unconditional -- do nothing */
716         } else {
717            cc = iselCondCode( env, guard );
718         }
719      }
720
721      /* Move the args to their final destinations. */
722      for (i = 0; i < nextArgReg; i++) {
723         vassert(!(hregIsInvalid(tmpregs[i])));
724         /* None of these insns, including any spill code that might
725            be generated, may alter the condition codes. */
726         addInstr( env, ARM64Instr_MovI( argregs[i], tmpregs[i] ) );
727      }
728
729   }
730
731   /* Should be assured by checks above */
732   vassert(nextArgReg <= ARM64_N_ARGREGS);
733
734   /* Do final checks, set the return values, and generate the call
735      instruction proper. */
736   vassert(nBBPTRs == 0 || nBBPTRs == 1);
737   vassert(nVECRETs == (retTy == Ity_V128 || retTy == Ity_V256) ? 1 : 0);
738   vassert(*stackAdjustAfterCall == 0);
739   vassert(is_RetLoc_INVALID(*retloc));
740   switch (retTy) {
741      case Ity_INVALID:
742         /* Function doesn't return a value. */
743         *retloc = mk_RetLoc_simple(RLPri_None);
744         break;
745      case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
746         *retloc = mk_RetLoc_simple(RLPri_Int);
747         break;
748      case Ity_V128:
749         *retloc = mk_RetLoc_spRel(RLPri_V128SpRel, 0);
750         *stackAdjustAfterCall = 16;
751         break;
752      case Ity_V256:
753         vassert(0); // ATC
754         *retloc = mk_RetLoc_spRel(RLPri_V256SpRel, 0);
755         *stackAdjustAfterCall = 32;
756         break;
757      default:
758         /* IR can denote other possible return types, but we don't
759            handle those here. */
760         vassert(0);
761   }
762
763   /* Finally, generate the call itself.  This needs the *retloc value
764      set in the switch above, which is why it's at the end. */
765
766   /* nextArgReg doles out argument registers.  Since these are
767      assigned in the order x0 .. x7, its numeric value at this point,
768      which must be between 0 and 8 inclusive, is going to be equal to
769      the number of arg regs in use for the call.  Hence bake that
770      number into the call (we'll need to know it when doing register
771      allocation, to know what regs the call reads.) */
772
773   target = (Addr)cee->addr;
774   addInstr(env, ARM64Instr_Call( cc, target, nextArgReg, *retloc ));
775
776   return True; /* success */
777}
778
779
780/*---------------------------------------------------------*/
781/*--- ISEL: Integer expressions (64/32 bit)             ---*/
782/*---------------------------------------------------------*/
783
784/* Select insns for an integer-typed expression, and add them to the
785   code list.  Return a reg holding the result.  This reg will be a
786   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
787   want to modify it, ask for a new vreg, copy it in there, and modify
788   the copy.  The register allocator will do its best to map both
789   vregs to the same real register, so the copies will often disappear
790   later in the game.
791
792   This should handle expressions of 64- and 32-bit type.  All results
793   are returned in a 64-bit register.  For 32-bit expressions, the
794   upper 32 bits are arbitrary, so you should mask or sign extend
795   partial values if necessary.
796*/
797
798/* --------------------- AMode --------------------- */
799
800/* Return an AMode which computes the value of the specified
801   expression, possibly also adding insns to the code list as a
802   result.  The expression may only be a 64-bit one.
803*/
804
805static Bool isValidScale ( UChar scale )
806{
807   switch (scale) {
808      case 1: case 2: case 4: case 8: /* case 16: ??*/ return True;
809      default: return False;
810   }
811}
812
813static Bool sane_AMode ( ARM64AMode* am )
814{
815   switch (am->tag) {
816      case ARM64am_RI9:
817         return
818            toBool( hregClass(am->ARM64am.RI9.reg) == HRcInt64
819                    && (hregIsVirtual(am->ARM64am.RI9.reg)
820                        /* || sameHReg(am->ARM64am.RI9.reg,
821                                       hregARM64_X21()) */ )
822                    && am->ARM64am.RI9.simm9 >= -256
823                    && am->ARM64am.RI9.simm9 <= 255 );
824      case ARM64am_RI12:
825         return
826            toBool( hregClass(am->ARM64am.RI12.reg) == HRcInt64
827                    && (hregIsVirtual(am->ARM64am.RI12.reg)
828                        /* || sameHReg(am->ARM64am.RI12.reg,
829                                       hregARM64_X21()) */ )
830                    && am->ARM64am.RI12.uimm12 < 4096
831                    && isValidScale(am->ARM64am.RI12.szB) );
832      case ARM64am_RR:
833         return
834            toBool( hregClass(am->ARM64am.RR.base) == HRcInt64
835                    && hregIsVirtual(am->ARM64am.RR.base)
836                    && hregClass(am->ARM64am.RR.index) == HRcInt64
837                    && hregIsVirtual(am->ARM64am.RR.index) );
838      default:
839         vpanic("sane_AMode: unknown ARM64 AMode1 tag");
840   }
841}
842
843static
844ARM64AMode* iselIntExpr_AMode ( ISelEnv* env, IRExpr* e, IRType dty )
845{
846   ARM64AMode* am = iselIntExpr_AMode_wrk(env, e, dty);
847   vassert(sane_AMode(am));
848   return am;
849}
850
851static
852ARM64AMode* iselIntExpr_AMode_wrk ( ISelEnv* env, IRExpr* e, IRType dty )
853{
854   IRType ty = typeOfIRExpr(env->type_env,e);
855   vassert(ty == Ity_I64);
856
857   ULong szBbits = 0;
858   switch (dty) {
859      case Ity_I64: szBbits = 3; break;
860      case Ity_I32: szBbits = 2; break;
861      case Ity_I16: szBbits = 1; break;
862      case Ity_I8:  szBbits = 0; break;
863      default: vassert(0);
864   }
865
866   /* {Add64,Sub64}(expr,simm9).  We don't care about |dty| here since
867      we're going to create an amode suitable for LDU* or STU*
868      instructions, which use unscaled immediate offsets.  */
869   if (e->tag == Iex_Binop
870       && (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Sub64)
871       && e->Iex.Binop.arg2->tag == Iex_Const
872       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
873      Long simm = (Long)e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
874      if (simm >= -255 && simm <= 255) {
875         /* Although the gating condition might seem to be
876               simm >= -256 && simm <= 255
877            we will need to negate simm in the case where the op is Sub64.
878            Hence limit the lower value to -255 in order that its negation
879            is representable. */
880         HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
881         if (e->Iex.Binop.op == Iop_Sub64) simm = -simm;
882         return ARM64AMode_RI9(reg, (Int)simm);
883      }
884   }
885
886   /* Add64(expr, uimm12 * transfer-size) */
887   if (e->tag == Iex_Binop
888       && e->Iex.Binop.op == Iop_Add64
889       && e->Iex.Binop.arg2->tag == Iex_Const
890       && e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U64) {
891      ULong uimm = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
892      ULong szB  = 1 << szBbits;
893      if (0 == (uimm & (szB-1)) /* "uimm is szB-aligned" */
894          && (uimm >> szBbits) < 4096) {
895         HReg reg = iselIntExpr_R(env, e->Iex.Binop.arg1);
896         return ARM64AMode_RI12(reg, (UInt)(uimm >> szBbits), (UChar)szB);
897      }
898   }
899
900   /* Add64(expr1, expr2) */
901   if (e->tag == Iex_Binop
902       && e->Iex.Binop.op == Iop_Add64) {
903      HReg reg1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
904      HReg reg2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
905      return ARM64AMode_RR(reg1, reg2);
906   }
907
908   /* Doesn't match anything in particular.  Generate it into
909      a register and use that. */
910   HReg reg = iselIntExpr_R(env, e);
911   return ARM64AMode_RI9(reg, 0);
912}
913
914
915/* --------------------- RIA --------------------- */
916
917/* Select instructions to generate 'e' into a RIA. */
918
919static ARM64RIA* iselIntExpr_RIA ( ISelEnv* env, IRExpr* e )
920{
921   ARM64RIA* ri = iselIntExpr_RIA_wrk(env, e);
922   /* sanity checks ... */
923   switch (ri->tag) {
924      case ARM64riA_I12:
925         vassert(ri->ARM64riA.I12.imm12 < 4096);
926         vassert(ri->ARM64riA.I12.shift == 0 || ri->ARM64riA.I12.shift == 12);
927         return ri;
928      case ARM64riA_R:
929         vassert(hregClass(ri->ARM64riA.R.reg) == HRcInt64);
930         vassert(hregIsVirtual(ri->ARM64riA.R.reg));
931         return ri;
932      default:
933         vpanic("iselIntExpr_RIA: unknown arm RIA tag");
934   }
935}
936
937/* DO NOT CALL THIS DIRECTLY ! */
938static ARM64RIA* iselIntExpr_RIA_wrk ( ISelEnv* env, IRExpr* e )
939{
940   IRType ty = typeOfIRExpr(env->type_env,e);
941   vassert(ty == Ity_I64 || ty == Ity_I32);
942
943   /* special case: immediate */
944   if (e->tag == Iex_Const) {
945      ULong u = 0xF000000ULL; /* invalid */
946      switch (e->Iex.Const.con->tag) {
947         case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
948         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
949         default: vpanic("iselIntExpr_RIA.Iex_Const(arm64)");
950      }
951      if (0 == (u & ~(0xFFFULL << 0)))
952         return ARM64RIA_I12((UShort)((u >> 0) & 0xFFFULL), 0);
953      if (0 == (u & ~(0xFFFULL << 12)))
954         return ARM64RIA_I12((UShort)((u >> 12) & 0xFFFULL), 12);
955      /* else fail, fall through to default case */
956   }
957
958   /* default case: calculate into a register and return that */
959   {
960      HReg r = iselIntExpr_R ( env, e );
961      return ARM64RIA_R(r);
962   }
963}
964
965
966/* --------------------- RIL --------------------- */
967
968/* Select instructions to generate 'e' into a RIL.  At this point we
969   have to deal with the strange bitfield-immediate encoding for logic
970   instructions. */
971
972
973// The following four functions
974//    CountLeadingZeros CountTrailingZeros CountSetBits isImmLogical
975// are copied, with modifications, from
976// https://github.com/armvixl/vixl/blob/master/src/a64/assembler-a64.cc
977// which has the following copyright notice:
978/*
979   Copyright 2013, ARM Limited
980   All rights reserved.
981
982   Redistribution and use in source and binary forms, with or without
983   modification, are permitted provided that the following conditions are met:
984
985   * Redistributions of source code must retain the above copyright notice,
986     this list of conditions and the following disclaimer.
987   * Redistributions in binary form must reproduce the above copyright notice,
988     this list of conditions and the following disclaimer in the documentation
989     and/or other materials provided with the distribution.
990   * Neither the name of ARM Limited nor the names of its contributors may be
991     used to endorse or promote products derived from this software without
992     specific prior written permission.
993
994   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
995   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
996   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
997   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
998   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
999   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1000   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
1001   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
1002   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1003   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1004*/
1005
1006static Int CountLeadingZeros(ULong value, Int width)
1007{
1008   vassert(width == 32 || width == 64);
1009   Int count = 0;
1010   ULong bit_test = 1ULL << (width - 1);
1011   while ((count < width) && ((bit_test & value) == 0)) {
1012      count++;
1013      bit_test >>= 1;
1014   }
1015   return count;
1016}
1017
1018static Int CountTrailingZeros(ULong value, Int width)
1019{
1020   vassert(width == 32 || width == 64);
1021   Int count = 0;
1022   while ((count < width) && (((value >> count) & 1) == 0)) {
1023      count++;
1024   }
1025   return count;
1026}
1027
1028static Int CountSetBits(ULong value, Int width)
1029{
1030   // TODO: Other widths could be added here, as the implementation already
1031   // supports them.
1032   vassert(width == 32 || width == 64);
1033
1034   // Mask out unused bits to ensure that they are not counted.
1035   value &= (0xffffffffffffffffULL >> (64-width));
1036
1037   // Add up the set bits.
1038   // The algorithm works by adding pairs of bit fields together iteratively,
1039   // where the size of each bit field doubles each time.
1040   // An example for an 8-bit value:
1041   // Bits: h g f e d c b a
1042   // \ | \ | \ | \ |
1043   // value = h+g f+e d+c b+a
1044   // \ | \ |
1045   // value = h+g+f+e d+c+b+a
1046   // \ |
1047   // value = h+g+f+e+d+c+b+a
1048   value = ((value >>  1) & 0x5555555555555555ULL)
1049                 + (value & 0x5555555555555555ULL);
1050   value = ((value >>  2) & 0x3333333333333333ULL)
1051                 + (value & 0x3333333333333333ULL);
1052   value = ((value >>  4) & 0x0f0f0f0f0f0f0f0fULL)
1053                 + (value & 0x0f0f0f0f0f0f0f0fULL);
1054   value = ((value >>  8) & 0x00ff00ff00ff00ffULL)
1055                 + (value & 0x00ff00ff00ff00ffULL);
1056   value = ((value >> 16) & 0x0000ffff0000ffffULL)
1057                 + (value & 0x0000ffff0000ffffULL);
1058   value = ((value >> 32) & 0x00000000ffffffffULL)
1059                 + (value & 0x00000000ffffffffULL);
1060
1061   return value;
1062}
1063
1064static Bool isImmLogical ( /*OUT*/UInt* n,
1065                           /*OUT*/UInt* imm_s, /*OUT*/UInt* imm_r,
1066                           ULong value, UInt width )
1067{
1068  // Test if a given value can be encoded in the immediate field of a
1069  // logical instruction.
1070
1071  // If it can be encoded, the function returns true, and values
1072  // pointed to by n, imm_s and imm_r are updated with immediates
1073  // encoded in the format required by the corresponding fields in the
1074  // logical instruction.  If it can not be encoded, the function
1075  // returns false, and the values pointed to by n, imm_s and imm_r
1076  // are undefined.
1077  vassert(n != NULL && imm_s != NULL && imm_r != NULL);
1078  vassert(width == 32 || width == 64);
1079
1080  // Logical immediates are encoded using parameters n, imm_s and imm_r using
1081  // the following table:
1082  //
1083  // N imms immr size S R
1084  // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
1085  // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
1086  // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
1087  // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
1088  // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
1089  // 0 11110s xxxxxr 2 UInt(s) UInt(r)
1090  // (s bits must not be all set)
1091  //
1092  // A pattern is constructed of size bits, where the least significant S+1
1093  // bits are set. The pattern is rotated right by R, and repeated across a
1094  // 32 or 64-bit value, depending on destination register width.
1095  //
1096  // To test if an arbitrary immediate can be encoded using this scheme, an
1097  // iterative algorithm is used.
1098  //
1099  // TODO: This code does not consider using X/W register overlap to support
1100  // 64-bit immediates where the top 32-bits are zero, and the bottom 32-bits
1101  // are an encodable logical immediate.
1102
1103  // 1. If the value has all set or all clear bits, it can't be encoded.
1104  if ((value == 0) || (value == 0xffffffffffffffffULL) ||
1105      ((width == 32) && (value == 0xffffffff))) {
1106    return False;
1107  }
1108
1109  UInt lead_zero = CountLeadingZeros(value, width);
1110  UInt lead_one = CountLeadingZeros(~value, width);
1111  UInt trail_zero = CountTrailingZeros(value, width);
1112  UInt trail_one = CountTrailingZeros(~value, width);
1113  UInt set_bits = CountSetBits(value, width);
1114
1115  // The fixed bits in the immediate s field.
1116  // If width == 64 (X reg), start at 0xFFFFFF80.
1117  // If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
1118  // widths won't be executed.
1119  Int imm_s_fixed = (width == 64) ? -128 : -64;
1120  Int imm_s_mask = 0x3F;
1121
1122  for (;;) {
1123    // 2. If the value is two bits wide, it can be encoded.
1124    if (width == 2) {
1125      *n = 0;
1126      *imm_s = 0x3C;
1127      *imm_r = (value & 3) - 1;
1128      return True;
1129    }
1130
1131    *n = (width == 64) ? 1 : 0;
1132    *imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
1133    if ((lead_zero + set_bits) == width) {
1134      *imm_r = 0;
1135    } else {
1136      *imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
1137    }
1138
1139    // 3. If the sum of leading zeros, trailing zeros and set bits is equal to
1140    // the bit width of the value, it can be encoded.
1141    if (lead_zero + trail_zero + set_bits == width) {
1142      return True;
1143    }
1144
1145    // 4. If the sum of leading ones, trailing ones and unset bits in the
1146    // value is equal to the bit width of the value, it can be encoded.
1147    if (lead_one + trail_one + (width - set_bits) == width) {
1148      return True;
1149    }
1150
1151    // 5. If the most-significant half of the bitwise value is equal to the
1152    // least-significant half, return to step 2 using the least-significant
1153    // half of the value.
1154    ULong mask = (1ULL << (width >> 1)) - 1;
1155    if ((value & mask) == ((value >> (width >> 1)) & mask)) {
1156      width >>= 1;
1157      set_bits >>= 1;
1158      imm_s_fixed >>= 1;
1159      continue;
1160    }
1161
1162    // 6. Otherwise, the value can't be encoded.
1163    return False;
1164  }
1165}
1166
1167
1168/* Create a RIL for the given immediate, if it is representable, or
1169   return NULL if not. */
1170
1171static ARM64RIL* mb_mkARM64RIL_I ( ULong imm64 )
1172{
1173   UInt n = 0, imm_s = 0, imm_r = 0;
1174   Bool ok = isImmLogical(&n, &imm_s, &imm_r, imm64, 64);
1175   if (!ok) return NULL;
1176   vassert(n < 2 && imm_s < 64 && imm_r < 64);
1177   return ARM64RIL_I13(n, imm_r, imm_s);
1178}
1179
1180/* So, finally .. */
1181
1182static ARM64RIL* iselIntExpr_RIL ( ISelEnv* env, IRExpr* e )
1183{
1184   ARM64RIL* ri = iselIntExpr_RIL_wrk(env, e);
1185   /* sanity checks ... */
1186   switch (ri->tag) {
1187      case ARM64riL_I13:
1188         vassert(ri->ARM64riL.I13.bitN < 2);
1189         vassert(ri->ARM64riL.I13.immR < 64);
1190         vassert(ri->ARM64riL.I13.immS < 64);
1191         return ri;
1192      case ARM64riL_R:
1193         vassert(hregClass(ri->ARM64riL.R.reg) == HRcInt64);
1194         vassert(hregIsVirtual(ri->ARM64riL.R.reg));
1195         return ri;
1196      default:
1197         vpanic("iselIntExpr_RIL: unknown arm RIL tag");
1198   }
1199}
1200
1201/* DO NOT CALL THIS DIRECTLY ! */
1202static ARM64RIL* iselIntExpr_RIL_wrk ( ISelEnv* env, IRExpr* e )
1203{
1204   IRType ty = typeOfIRExpr(env->type_env,e);
1205   vassert(ty == Ity_I64 || ty == Ity_I32);
1206
1207   /* special case: immediate */
1208   if (e->tag == Iex_Const) {
1209      ARM64RIL* maybe = NULL;
1210      if (ty == Ity_I64) {
1211         vassert(e->Iex.Const.con->tag == Ico_U64);
1212         maybe = mb_mkARM64RIL_I(e->Iex.Const.con->Ico.U64);
1213      } else {
1214         vassert(ty == Ity_I32);
1215         vassert(e->Iex.Const.con->tag == Ico_U32);
1216         UInt  u32 = e->Iex.Const.con->Ico.U32;
1217         ULong u64 = (ULong)u32;
1218         /* First try with 32 leading zeroes. */
1219         maybe = mb_mkARM64RIL_I(u64);
1220         /* If that doesn't work, try with 2 copies, since it doesn't
1221            matter what winds up in the upper 32 bits. */
1222         if (!maybe) {
1223            maybe = mb_mkARM64RIL_I((u64 << 32) | u64);
1224         }
1225      }
1226      if (maybe) return maybe;
1227      /* else fail, fall through to default case */
1228   }
1229
1230   /* default case: calculate into a register and return that */
1231   {
1232      HReg r = iselIntExpr_R ( env, e );
1233      return ARM64RIL_R(r);
1234   }
1235}
1236
1237
1238/* --------------------- RI6 --------------------- */
1239
1240/* Select instructions to generate 'e' into a RI6. */
1241
1242static ARM64RI6* iselIntExpr_RI6 ( ISelEnv* env, IRExpr* e )
1243{
1244   ARM64RI6* ri = iselIntExpr_RI6_wrk(env, e);
1245   /* sanity checks ... */
1246   switch (ri->tag) {
1247      case ARM64ri6_I6:
1248         vassert(ri->ARM64ri6.I6.imm6 < 64);
1249         vassert(ri->ARM64ri6.I6.imm6 > 0);
1250         return ri;
1251      case ARM64ri6_R:
1252         vassert(hregClass(ri->ARM64ri6.R.reg) == HRcInt64);
1253         vassert(hregIsVirtual(ri->ARM64ri6.R.reg));
1254         return ri;
1255      default:
1256         vpanic("iselIntExpr_RI6: unknown arm RI6 tag");
1257   }
1258}
1259
1260/* DO NOT CALL THIS DIRECTLY ! */
1261static ARM64RI6* iselIntExpr_RI6_wrk ( ISelEnv* env, IRExpr* e )
1262{
1263   IRType ty = typeOfIRExpr(env->type_env,e);
1264   vassert(ty == Ity_I64 || ty == Ity_I8);
1265
1266   /* special case: immediate */
1267   if (e->tag == Iex_Const) {
1268      switch (e->Iex.Const.con->tag) {
1269         case Ico_U8: {
1270            UInt u = e->Iex.Const.con->Ico.U8;
1271            if (u > 0 && u < 64)
1272              return ARM64RI6_I6(u);
1273            break;
1274         default:
1275            break;
1276         }
1277      }
1278      /* else fail, fall through to default case */
1279   }
1280
1281   /* default case: calculate into a register and return that */
1282   {
1283      HReg r = iselIntExpr_R ( env, e );
1284      return ARM64RI6_R(r);
1285   }
1286}
1287
1288
1289/* ------------------- CondCode ------------------- */
1290
1291/* Generate code to evaluated a bit-typed expression, returning the
1292   condition code which would correspond when the expression would
1293   notionally have returned 1. */
1294
1295static ARM64CondCode iselCondCode ( ISelEnv* env, IRExpr* e )
1296{
1297   ARM64CondCode cc = iselCondCode_wrk(env,e);
1298   vassert(cc != ARM64cc_NV);
1299   return cc;
1300}
1301
1302static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
1303{
1304   vassert(e);
1305   vassert(typeOfIRExpr(env->type_env,e) == Ity_I1);
1306
1307   /* var */
1308   if (e->tag == Iex_RdTmp) {
1309      HReg rTmp = lookupIRTemp(env, e->Iex.RdTmp.tmp);
1310      /* Cmp doesn't modify rTmp; so this is OK. */
1311      ARM64RIL* one = mb_mkARM64RIL_I(1);
1312      vassert(one);
1313      addInstr(env, ARM64Instr_Test(rTmp, one));
1314      return ARM64cc_NE;
1315   }
1316
1317   /* Not1(e) */
1318   if (e->tag == Iex_Unop && e->Iex.Unop.op == Iop_Not1) {
1319      /* Generate code for the arg, and negate the test condition */
1320      ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1321      if (cc == ARM64cc_AL || cc == ARM64cc_NV) {
1322        return ARM64cc_AL;
1323      } else {
1324        return 1 ^ cc;
1325      }
1326   }
1327
1328   /* --- patterns rooted at: 64to1 --- */
1329
1330   if (e->tag == Iex_Unop
1331       && e->Iex.Unop.op == Iop_64to1) {
1332      HReg      rTmp = iselIntExpr_R(env, e->Iex.Unop.arg);
1333      ARM64RIL* one  = mb_mkARM64RIL_I(1);
1334      vassert(one); /* '1' must be representable */
1335      addInstr(env, ARM64Instr_Test(rTmp, one));
1336      return ARM64cc_NE;
1337   }
1338
1339   /* --- patterns rooted at: CmpNEZ8 --- */
1340
1341   if (e->tag == Iex_Unop
1342       && e->Iex.Unop.op == Iop_CmpNEZ8) {
1343      HReg      r1  = iselIntExpr_R(env, e->Iex.Unop.arg);
1344      ARM64RIL* xFF = mb_mkARM64RIL_I(0xFF);
1345      addInstr(env, ARM64Instr_Test(r1, xFF));
1346      return ARM64cc_NE;
1347   }
1348
1349   /* --- patterns rooted at: CmpNEZ16 --- */
1350
1351   if (e->tag == Iex_Unop
1352       && e->Iex.Unop.op == Iop_CmpNEZ16) {
1353      HReg      r1    = iselIntExpr_R(env, e->Iex.Unop.arg);
1354      ARM64RIL* xFFFF = mb_mkARM64RIL_I(0xFFFF);
1355      addInstr(env, ARM64Instr_Test(r1, xFFFF));
1356      return ARM64cc_NE;
1357   }
1358
1359   /* --- patterns rooted at: CmpNEZ64 --- */
1360
1361   if (e->tag == Iex_Unop
1362       && e->Iex.Unop.op == Iop_CmpNEZ64) {
1363      HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1364      ARM64RIA* zero = ARM64RIA_I12(0,0);
1365      addInstr(env, ARM64Instr_Cmp(r1, zero, True/*is64*/));
1366      return ARM64cc_NE;
1367   }
1368
1369   /* --- patterns rooted at: CmpNEZ32 --- */
1370
1371   if (e->tag == Iex_Unop
1372       && e->Iex.Unop.op == Iop_CmpNEZ32) {
1373      HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
1374      ARM64RIA* zero = ARM64RIA_I12(0,0);
1375      addInstr(env, ARM64Instr_Cmp(r1, zero, False/*!is64*/));
1376      return ARM64cc_NE;
1377   }
1378
1379   /* --- Cmp*64*(x,y) --- */
1380   if (e->tag == Iex_Binop
1381       && (e->Iex.Binop.op == Iop_CmpEQ64
1382           || e->Iex.Binop.op == Iop_CmpNE64
1383           || e->Iex.Binop.op == Iop_CmpLT64S
1384           || e->Iex.Binop.op == Iop_CmpLT64U
1385           || e->Iex.Binop.op == Iop_CmpLE64S
1386           || e->Iex.Binop.op == Iop_CmpLE64U)) {
1387      HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1388      ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1389      addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
1390      switch (e->Iex.Binop.op) {
1391         case Iop_CmpEQ64:  return ARM64cc_EQ;
1392         case Iop_CmpNE64:  return ARM64cc_NE;
1393         case Iop_CmpLT64S: return ARM64cc_LT;
1394         case Iop_CmpLT64U: return ARM64cc_CC;
1395         case Iop_CmpLE64S: return ARM64cc_LE;
1396         case Iop_CmpLE64U: return ARM64cc_LS;
1397         default: vpanic("iselCondCode(arm64): CmpXX64");
1398      }
1399   }
1400
1401   /* --- Cmp*32*(x,y) --- */
1402   if (e->tag == Iex_Binop
1403       && (e->Iex.Binop.op == Iop_CmpEQ32
1404           || e->Iex.Binop.op == Iop_CmpNE32
1405           || e->Iex.Binop.op == Iop_CmpLT32S
1406           || e->Iex.Binop.op == Iop_CmpLT32U
1407           || e->Iex.Binop.op == Iop_CmpLE32S
1408           || e->Iex.Binop.op == Iop_CmpLE32U)) {
1409      HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1410      ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1411      addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
1412      switch (e->Iex.Binop.op) {
1413         case Iop_CmpEQ32:  return ARM64cc_EQ;
1414         case Iop_CmpNE32:  return ARM64cc_NE;
1415         case Iop_CmpLT32S: return ARM64cc_LT;
1416         case Iop_CmpLT32U: return ARM64cc_CC;
1417         case Iop_CmpLE32S: return ARM64cc_LE;
1418         case Iop_CmpLE32U: return ARM64cc_LS;
1419         default: vpanic("iselCondCode(arm64): CmpXX32");
1420      }
1421   }
1422
1423   ppIRExpr(e);
1424   vpanic("iselCondCode");
1425}
1426
1427
1428/* --------------------- Reg --------------------- */
1429
1430static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e )
1431{
1432   HReg r = iselIntExpr_R_wrk(env, e);
1433   /* sanity checks ... */
1434#  if 0
1435   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1436#  endif
1437   vassert(hregClass(r) == HRcInt64);
1438   vassert(hregIsVirtual(r));
1439   return r;
1440}
1441
1442/* DO NOT CALL THIS DIRECTLY ! */
1443static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
1444{
1445   IRType ty = typeOfIRExpr(env->type_env,e);
1446   vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8);
1447
1448   switch (e->tag) {
1449
1450   /* --------- TEMP --------- */
1451   case Iex_RdTmp: {
1452      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
1453   }
1454
1455   /* --------- LOAD --------- */
1456   case Iex_Load: {
1457      HReg dst  = newVRegI(env);
1458
1459      if (e->Iex.Load.end != Iend_LE)
1460         goto irreducible;
1461
1462      if (ty == Ity_I64) {
1463         ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1464         addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, amode));
1465         return dst;
1466      }
1467      if (ty == Ity_I32) {
1468         ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1469         addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, amode));
1470         return dst;
1471      }
1472      if (ty == Ity_I16) {
1473         ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1474         addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, amode));
1475         return dst;
1476      }
1477      if (ty == Ity_I8) {
1478         ARM64AMode* amode = iselIntExpr_AMode ( env, e->Iex.Load.addr, ty );
1479         addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, amode));
1480         return dst;
1481      }
1482      break;
1483   }
1484
1485   /* --------- BINARY OP --------- */
1486   case Iex_Binop: {
1487
1488      ARM64LogicOp lop = 0; /* invalid */
1489      ARM64ShiftOp sop = 0; /* invalid */
1490
1491      /* Special-case 0-x into a Neg instruction.  Not because it's
1492         particularly useful but more so as to give value flow using
1493         this instruction, so as to check its assembly correctness for
1494         implementation of Left32/Left64. */
1495      switch (e->Iex.Binop.op) {
1496         case Iop_Sub64:
1497            if (isZeroU64(e->Iex.Binop.arg1)) {
1498               HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1499               HReg dst  = newVRegI(env);
1500               addInstr(env, ARM64Instr_Unary(dst, argR, ARM64un_NEG));
1501               return dst;
1502            }
1503            break;
1504         default:
1505            break;
1506      }
1507
1508      /* ADD/SUB */
1509      switch (e->Iex.Binop.op) {
1510         case Iop_Add64: case Iop_Add32:
1511         case Iop_Sub64: case Iop_Sub32: {
1512            Bool      isAdd = e->Iex.Binop.op == Iop_Add64
1513                              || e->Iex.Binop.op == Iop_Add32;
1514            HReg      dst   = newVRegI(env);
1515            HReg      argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
1516            ARM64RIA* argR  = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
1517            addInstr(env, ARM64Instr_Arith(dst, argL, argR, isAdd));
1518            return dst;
1519         }
1520         default:
1521            break;
1522      }
1523
1524      /* AND/OR/XOR */
1525      switch (e->Iex.Binop.op) {
1526         case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
1527         case Iop_Or64:  case Iop_Or32:  lop = ARM64lo_OR;  goto log_binop;
1528         case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
1529         log_binop: {
1530            HReg      dst  = newVRegI(env);
1531            HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1532            ARM64RIL* argR = iselIntExpr_RIL(env, e->Iex.Binop.arg2);
1533            addInstr(env, ARM64Instr_Logic(dst, argL, argR, lop));
1534            return dst;
1535         }
1536         default:
1537            break;
1538      }
1539
1540      /* SHL/SHR/SAR */
1541      switch (e->Iex.Binop.op) {
1542         case Iop_Shr64:                 sop = ARM64sh_SHR; goto sh_binop;
1543         case Iop_Sar64:                 sop = ARM64sh_SAR; goto sh_binop;
1544         case Iop_Shl64: case Iop_Shl32: sop = ARM64sh_SHL; goto sh_binop;
1545         sh_binop: {
1546            HReg      dst  = newVRegI(env);
1547            HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1548            ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1549            addInstr(env, ARM64Instr_Shift(dst, argL, argR, sop));
1550            return dst;
1551         }
1552         case Iop_Shr32:
1553         case Iop_Sar32: {
1554            Bool      zx   = e->Iex.Binop.op == Iop_Shr32;
1555            HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1556            ARM64RI6* argR = iselIntExpr_RI6(env, e->Iex.Binop.arg2);
1557            HReg      dst  = zx ? widen_z_32_to_64(env, argL)
1558                                : widen_s_32_to_64(env, argL);
1559            addInstr(env, ARM64Instr_Shift(dst, dst, argR, ARM64sh_SHR));
1560            return dst;
1561         }
1562         default: break;
1563      }
1564
1565      /* MUL */
1566      if (e->Iex.Binop.op == Iop_Mul64 || e->Iex.Binop.op == Iop_Mul32) {
1567         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1568         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1569         HReg dst  = newVRegI(env);
1570         addInstr(env, ARM64Instr_Mul(dst, argL, argR, ARM64mul_PLAIN));
1571         return dst;
1572      }
1573
1574      /* MULL */
1575      if (e->Iex.Binop.op == Iop_MullU32 || e->Iex.Binop.op == Iop_MullS32) {
1576         Bool isS  = e->Iex.Binop.op == Iop_MullS32;
1577         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1578         HReg extL = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argL);
1579         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1580         HReg extR = (isS ? widen_s_32_to_64 : widen_z_32_to_64)(env, argR);
1581         HReg dst  = newVRegI(env);
1582         addInstr(env, ARM64Instr_Mul(dst, extL, extR, ARM64mul_PLAIN));
1583         return dst;
1584      }
1585
1586      /* Handle misc other ops. */
1587
1588      if (e->Iex.Binop.op == Iop_Max32U) {
1589         HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1590         HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1591         HReg dst  = newVRegI(env);
1592         addInstr(env, ARM64Instr_Cmp(argL, ARM64RIA_R(argR), False/*!is64*/));
1593         addInstr(env, ARM64Instr_CSel(dst, argL, argR, ARM64cc_CS));
1594         return dst;
1595      }
1596
1597      if (e->Iex.Binop.op == Iop_32HLto64) {
1598         HReg hi32s = iselIntExpr_R(env, e->Iex.Binop.arg1);
1599         HReg lo32s = iselIntExpr_R(env, e->Iex.Binop.arg2);
1600         HReg lo32  = widen_z_32_to_64(env, lo32s);
1601         HReg hi32  = newVRegI(env);
1602         addInstr(env, ARM64Instr_Shift(hi32, hi32s, ARM64RI6_I6(32),
1603                                        ARM64sh_SHL));
1604         addInstr(env, ARM64Instr_Logic(hi32, hi32, ARM64RIL_R(lo32),
1605                                        ARM64lo_OR));
1606         return hi32;
1607      }
1608
1609      if (e->Iex.Binop.op == Iop_CmpF64 || e->Iex.Binop.op == Iop_CmpF32) {
1610         Bool isD = e->Iex.Binop.op == Iop_CmpF64;
1611         HReg dL  = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg1);
1612         HReg dR  = (isD ? iselDblExpr : iselFltExpr)(env, e->Iex.Binop.arg2);
1613         HReg dst = newVRegI(env);
1614         HReg imm = newVRegI(env);
1615         /* Do the compare (FCMP), which sets NZCV in PSTATE.  Then
1616            create in dst, the IRCmpF64Result encoded result. */
1617         addInstr(env, (isD ? ARM64Instr_VCmpD : ARM64Instr_VCmpS)(dL, dR));
1618         addInstr(env, ARM64Instr_Imm64(dst, 0));
1619         addInstr(env, ARM64Instr_Imm64(imm, 0x40)); // 0x40 = Ircr_EQ
1620         addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_EQ));
1621         addInstr(env, ARM64Instr_Imm64(imm, 0x01)); // 0x01 = Ircr_LT
1622         addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_MI));
1623         addInstr(env, ARM64Instr_Imm64(imm, 0x00)); // 0x00 = Ircr_GT
1624         addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_GT));
1625         addInstr(env, ARM64Instr_Imm64(imm, 0x45)); // 0x45 = Ircr_UN
1626         addInstr(env, ARM64Instr_CSel(dst, imm, dst, ARM64cc_VS));
1627         return dst;
1628      }
1629
1630      { /* local scope */
1631        ARM64CvtOp cvt_op = ARM64cvt_INVALID;
1632        Bool       srcIsD = False;
1633        switch (e->Iex.Binop.op) {
1634           case Iop_F64toI64S:
1635              cvt_op = ARM64cvt_F64_I64S; srcIsD = True; break;
1636           case Iop_F64toI64U:
1637              cvt_op = ARM64cvt_F64_I64U; srcIsD = True; break;
1638           case Iop_F64toI32S:
1639              cvt_op = ARM64cvt_F64_I32S; srcIsD = True; break;
1640           case Iop_F64toI32U:
1641              cvt_op = ARM64cvt_F64_I32U; srcIsD = True; break;
1642           case Iop_F32toI32S:
1643              cvt_op = ARM64cvt_F32_I32S; srcIsD = False; break;
1644           case Iop_F32toI32U:
1645              cvt_op = ARM64cvt_F32_I32U; srcIsD = False; break;
1646           case Iop_F32toI64S:
1647              cvt_op = ARM64cvt_F32_I64S; srcIsD = False; break;
1648           case Iop_F32toI64U:
1649              cvt_op = ARM64cvt_F32_I64U; srcIsD = False; break;
1650           default:
1651              break;
1652        }
1653        if (cvt_op != ARM64cvt_INVALID) {
1654           /* This is all a bit dodgy, because we can't handle a
1655              non-constant (not-known-at-JIT-time) rounding mode
1656              indication.  That's because there's no instruction
1657              AFAICS that does this conversion but rounds according to
1658              FPCR.RM, so we have to bake the rounding mode into the
1659              instruction right now.  But that should be OK because
1660              (1) the front end attaches a literal Irrm_ value to the
1661              conversion binop, and (2) iropt will never float that
1662              off via CSE, into a literal.  Hence we should always
1663              have an Irrm_ value as the first arg. */
1664           IRExpr* arg1 = e->Iex.Binop.arg1;
1665           if (arg1->tag != Iex_Const) goto irreducible;
1666           IRConst* arg1con = arg1->Iex.Const.con;
1667           vassert(arg1con->tag == Ico_U32); // else ill-typed IR
1668           UInt irrm = arg1con->Ico.U32;
1669           /* Find the ARM-encoded equivalent for |irrm|. */
1670           UInt armrm = 4; /* impossible */
1671           switch (irrm) {
1672              case Irrm_NEAREST: armrm = 0; break;
1673              case Irrm_NegINF:  armrm = 2; break;
1674              case Irrm_PosINF:  armrm = 1; break;
1675              case Irrm_ZERO:    armrm = 3; break;
1676              default: goto irreducible;
1677           }
1678           HReg src = (srcIsD ? iselDblExpr : iselFltExpr)
1679                         (env, e->Iex.Binop.arg2);
1680           HReg dst = newVRegI(env);
1681           addInstr(env, ARM64Instr_VCvtF2I(cvt_op, dst, src, armrm));
1682           return dst;
1683        }
1684      } /* local scope */
1685
1686      /* All cases involving host-side helper calls. */
1687      void* fn = NULL;
1688      switch (e->Iex.Binop.op) {
1689         case Iop_DivU32:
1690            fn = &h_calc_udiv32_w_arm_semantics; break;
1691         case Iop_DivS32:
1692            fn = &h_calc_sdiv32_w_arm_semantics; break;
1693         case Iop_DivU64:
1694            fn = &h_calc_udiv64_w_arm_semantics; break;
1695         case Iop_DivS64:
1696            fn = &h_calc_sdiv64_w_arm_semantics; break;
1697         default:
1698            break;
1699      }
1700
1701      if (fn) {
1702         HReg regL = iselIntExpr_R(env, e->Iex.Binop.arg1);
1703         HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
1704         HReg res  = newVRegI(env);
1705         addInstr(env, ARM64Instr_MovI(hregARM64_X0(), regL));
1706         addInstr(env, ARM64Instr_MovI(hregARM64_X1(), regR));
1707         addInstr(env, ARM64Instr_Call( ARM64cc_AL, (Addr)fn,
1708                                        2, mk_RetLoc_simple(RLPri_Int) ));
1709         addInstr(env, ARM64Instr_MovI(res, hregARM64_X0()));
1710         return res;
1711      }
1712
1713      break;
1714   }
1715
1716   /* --------- UNARY OP --------- */
1717   case Iex_Unop: {
1718
1719      switch (e->Iex.Unop.op) {
1720         case Iop_16Uto64: {
1721            /* This probably doesn't occur often enough to be worth
1722               rolling the extension into the load. */
1723            IRExpr* arg = e->Iex.Unop.arg;
1724            HReg    src = iselIntExpr_R(env, arg);
1725            HReg    dst = widen_z_16_to_64(env, src);
1726            return dst;
1727         }
1728         case Iop_32Uto64: {
1729            IRExpr* arg = e->Iex.Unop.arg;
1730            if (arg->tag == Iex_Load) {
1731               /* This correctly zero extends because _LdSt32 is
1732                  defined to do a zero extending load. */
1733               HReg dst = newVRegI(env);
1734               ARM64AMode* am
1735                  = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I32);
1736               addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1737               return dst;
1738            }
1739            /* else be lame and mask it  */
1740            HReg src  = iselIntExpr_R(env, arg);
1741            HReg dst  = widen_z_32_to_64(env, src);
1742            return dst;
1743         }
1744         case Iop_8Uto32: /* Just freeload on the 8Uto64 case */
1745         case Iop_8Uto64: {
1746            IRExpr* arg = e->Iex.Unop.arg;
1747            if (arg->tag == Iex_Load) {
1748               /* This correctly zero extends because _LdSt8 is
1749                  defined to do a zero extending load. */
1750               HReg dst = newVRegI(env);
1751               ARM64AMode* am
1752                  = iselIntExpr_AMode(env, arg->Iex.Load.addr, Ity_I8);
1753               addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1754               return dst;
1755            }
1756            /* else be lame and mask it  */
1757            HReg src = iselIntExpr_R(env, arg);
1758            HReg dst = widen_z_8_to_64(env, src);
1759            return dst;
1760         }
1761         case Iop_128HIto64: {
1762            HReg rHi, rLo;
1763            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
1764            return rHi; /* and abandon rLo */
1765         }
1766         case Iop_8Sto32: case Iop_8Sto64: {
1767            IRExpr* arg = e->Iex.Unop.arg;
1768            HReg    src = iselIntExpr_R(env, arg);
1769            HReg    dst = widen_s_8_to_64(env, src);
1770            return dst;
1771         }
1772         case Iop_16Sto32: case Iop_16Sto64: {
1773            IRExpr* arg = e->Iex.Unop.arg;
1774            HReg    src = iselIntExpr_R(env, arg);
1775            HReg    dst = widen_s_16_to_64(env, src);
1776            return dst;
1777         }
1778         case Iop_32Sto64: {
1779            IRExpr* arg = e->Iex.Unop.arg;
1780            HReg    src = iselIntExpr_R(env, arg);
1781            HReg    dst = widen_s_32_to_64(env, src);
1782            return dst;
1783         }
1784         case Iop_Not32:
1785         case Iop_Not64: {
1786            HReg dst = newVRegI(env);
1787            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1788            addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NOT));
1789            return dst;
1790         }
1791         case Iop_Clz64: {
1792            HReg dst = newVRegI(env);
1793            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1794            addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_CLZ));
1795            return dst;
1796         }
1797         case Iop_Left32:
1798         case Iop_Left64: {
1799            /* Left64(src) = src | -src.  Left32 can use the same
1800               implementation since in that case we don't care what
1801               the upper 32 bits become. */
1802            HReg dst = newVRegI(env);
1803            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1804            addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1805            addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1806                                           ARM64lo_OR));
1807            return dst;
1808         }
1809         case Iop_CmpwNEZ64: {
1810           /* CmpwNEZ64(src) = (src == 0) ? 0...0 : 1...1
1811                             = Left64(src) >>s 63 */
1812            HReg dst = newVRegI(env);
1813            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
1814            addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1815            addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1816                                           ARM64lo_OR));
1817            addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1818                                           ARM64sh_SAR));
1819            return dst;
1820         }
1821         case Iop_CmpwNEZ32: {
1822            /* CmpwNEZ32(src) = CmpwNEZ64(src & 0xFFFFFFFF)
1823                              = Left64(src & 0xFFFFFFFF) >>s 63 */
1824            HReg dst = newVRegI(env);
1825            HReg pre = iselIntExpr_R(env, e->Iex.Unop.arg);
1826            HReg src = widen_z_32_to_64(env, pre);
1827            addInstr(env, ARM64Instr_Unary(dst, src, ARM64un_NEG));
1828            addInstr(env, ARM64Instr_Logic(dst, dst, ARM64RIL_R(src),
1829                                           ARM64lo_OR));
1830            addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1831                                           ARM64sh_SAR));
1832            return dst;
1833         }
1834         case Iop_V128to64: case Iop_V128HIto64: {
1835            HReg dst    = newVRegI(env);
1836            HReg src    = iselV128Expr(env, e->Iex.Unop.arg);
1837            UInt laneNo = (e->Iex.Unop.op == Iop_V128HIto64) ? 1 : 0;
1838            addInstr(env, ARM64Instr_VXfromQ(dst, src, laneNo));
1839            return dst;
1840         }
1841         case Iop_ReinterpF64asI64: {
1842            HReg dst = newVRegI(env);
1843            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
1844            addInstr(env, ARM64Instr_VXfromDorS(dst, src, True/*fromD*/));
1845            return dst;
1846         }
1847         case Iop_ReinterpF32asI32: {
1848            HReg dst = newVRegI(env);
1849            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
1850            addInstr(env, ARM64Instr_VXfromDorS(dst, src, False/*!fromD*/));
1851            return dst;
1852         }
1853         case Iop_1Sto16:
1854         case Iop_1Sto32:
1855         case Iop_1Sto64: {
1856            /* As with the iselStmt case for 'tmp:I1 = expr', we could
1857               do a lot better here if it ever became necessary. */
1858            HReg zero = newVRegI(env);
1859            HReg one  = newVRegI(env);
1860            HReg dst  = newVRegI(env);
1861            addInstr(env, ARM64Instr_Imm64(zero, 0));
1862            addInstr(env, ARM64Instr_Imm64(one,  1));
1863            ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1864            addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
1865            addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1866                                           ARM64sh_SHL));
1867            addInstr(env, ARM64Instr_Shift(dst, dst, ARM64RI6_I6(63),
1868                                           ARM64sh_SAR));
1869            return dst;
1870         }
1871         case Iop_NarrowUn16to8x8:
1872         case Iop_NarrowUn32to16x4:
1873         case Iop_NarrowUn64to32x2:
1874         case Iop_QNarrowUn16Sto8Sx8:
1875         case Iop_QNarrowUn32Sto16Sx4:
1876         case Iop_QNarrowUn64Sto32Sx2:
1877         case Iop_QNarrowUn16Uto8Ux8:
1878         case Iop_QNarrowUn32Uto16Ux4:
1879         case Iop_QNarrowUn64Uto32Ux2:
1880         case Iop_QNarrowUn16Sto8Ux8:
1881         case Iop_QNarrowUn32Sto16Ux4:
1882         case Iop_QNarrowUn64Sto32Ux2:
1883         {
1884            HReg src = iselV128Expr(env, e->Iex.Unop.arg);
1885            HReg tmp = newVRegV(env);
1886            HReg dst = newVRegI(env);
1887            UInt dszBlg2 = 3; /* illegal */
1888            ARM64VecNarrowOp op = ARM64vecna_INVALID;
1889            switch (e->Iex.Unop.op) {
1890               case Iop_NarrowUn16to8x8:
1891                  dszBlg2 = 0; op = ARM64vecna_XTN; break;
1892               case Iop_NarrowUn32to16x4:
1893                  dszBlg2 = 1; op = ARM64vecna_XTN; break;
1894               case Iop_NarrowUn64to32x2:
1895                  dszBlg2 = 2; op = ARM64vecna_XTN; break;
1896               case Iop_QNarrowUn16Sto8Sx8:
1897                  dszBlg2 = 0; op = ARM64vecna_SQXTN; break;
1898               case Iop_QNarrowUn32Sto16Sx4:
1899                  dszBlg2 = 1; op = ARM64vecna_SQXTN; break;
1900               case Iop_QNarrowUn64Sto32Sx2:
1901                  dszBlg2 = 2; op = ARM64vecna_SQXTN; break;
1902               case Iop_QNarrowUn16Uto8Ux8:
1903                  dszBlg2 = 0; op = ARM64vecna_UQXTN; break;
1904               case Iop_QNarrowUn32Uto16Ux4:
1905                  dszBlg2 = 1; op = ARM64vecna_UQXTN; break;
1906               case Iop_QNarrowUn64Uto32Ux2:
1907                  dszBlg2 = 2; op = ARM64vecna_UQXTN; break;
1908               case Iop_QNarrowUn16Sto8Ux8:
1909                  dszBlg2 = 0; op = ARM64vecna_SQXTUN; break;
1910               case Iop_QNarrowUn32Sto16Ux4:
1911                  dszBlg2 = 1; op = ARM64vecna_SQXTUN; break;
1912               case Iop_QNarrowUn64Sto32Ux2:
1913                  dszBlg2 = 2; op = ARM64vecna_SQXTUN; break;
1914               default:
1915                  vassert(0);
1916            }
1917            addInstr(env, ARM64Instr_VNarrowV(op, dszBlg2, tmp, src));
1918            addInstr(env, ARM64Instr_VXfromQ(dst, tmp, 0/*laneNo*/));
1919            return dst;
1920         }
1921         case Iop_1Uto64: {
1922            /* 1Uto64(tmp). */
1923            HReg dst = newVRegI(env);
1924            if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
1925               ARM64RIL* one = mb_mkARM64RIL_I(1);
1926               HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
1927               vassert(one);
1928               addInstr(env, ARM64Instr_Logic(dst, src, one, ARM64lo_AND));
1929            } else {
1930               /* CLONE-01 */
1931               HReg zero = newVRegI(env);
1932               HReg one  = newVRegI(env);
1933               addInstr(env, ARM64Instr_Imm64(zero, 0));
1934               addInstr(env, ARM64Instr_Imm64(one,  1));
1935               ARM64CondCode cc = iselCondCode(env, e->Iex.Unop.arg);
1936               addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
1937            }
1938            return dst;
1939         }
1940         case Iop_64to32:
1941         case Iop_64to16:
1942         case Iop_64to8:
1943            /* These are no-ops. */
1944            return iselIntExpr_R(env, e->Iex.Unop.arg);
1945
1946         default:
1947            break;
1948      }
1949
1950      break;
1951   }
1952
1953   /* --------- GET --------- */
1954   case Iex_Get: {
1955      if (ty == Ity_I64
1956          && 0 == (e->Iex.Get.offset & 7) && e->Iex.Get.offset < (8<<12)-8) {
1957         HReg        dst = newVRegI(env);
1958         ARM64AMode* am
1959            = mk_baseblock_64bit_access_amode(e->Iex.Get.offset);
1960         addInstr(env, ARM64Instr_LdSt64(True/*isLoad*/, dst, am));
1961         return dst;
1962      }
1963      if (ty == Ity_I32
1964          && 0 == (e->Iex.Get.offset & 3) && e->Iex.Get.offset < (4<<12)-4) {
1965         HReg        dst = newVRegI(env);
1966         ARM64AMode* am
1967            = mk_baseblock_32bit_access_amode(e->Iex.Get.offset);
1968         addInstr(env, ARM64Instr_LdSt32(True/*isLoad*/, dst, am));
1969         return dst;
1970      }
1971      if (ty == Ity_I16
1972          && 0 == (e->Iex.Get.offset & 1) && e->Iex.Get.offset < (2<<12)-2) {
1973         HReg        dst = newVRegI(env);
1974         ARM64AMode* am
1975            = mk_baseblock_16bit_access_amode(e->Iex.Get.offset);
1976         addInstr(env, ARM64Instr_LdSt16(True/*isLoad*/, dst, am));
1977         return dst;
1978      }
1979      if (ty == Ity_I8
1980          /* && no alignment check */ && e->Iex.Get.offset < (1<<12)-1) {
1981         HReg        dst = newVRegI(env);
1982         ARM64AMode* am
1983            = mk_baseblock_8bit_access_amode(e->Iex.Get.offset);
1984         addInstr(env, ARM64Instr_LdSt8(True/*isLoad*/, dst, am));
1985         return dst;
1986      }
1987      break;
1988   }
1989
1990   /* --------- CCALL --------- */
1991   case Iex_CCall: {
1992      HReg    dst = newVRegI(env);
1993      vassert(ty == e->Iex.CCall.retty);
1994
1995      /* be very restrictive for now.  Only 64-bit ints allowed for
1996         args, and 64 bits for return type.  Don't forget to change
1997         the RetLoc if more types are allowed in future. */
1998      if (e->Iex.CCall.retty != Ity_I64)
1999         goto irreducible;
2000
2001      /* Marshal args, do the call, clear stack. */
2002      UInt   addToSp = 0;
2003      RetLoc rloc    = mk_RetLoc_INVALID();
2004      Bool   ok      = doHelperCall( &addToSp, &rloc, env, NULL/*guard*/,
2005                                     e->Iex.CCall.cee, e->Iex.CCall.retty,
2006                                     e->Iex.CCall.args );
2007      /* */
2008      if (ok) {
2009         vassert(is_sane_RetLoc(rloc));
2010         vassert(rloc.pri == RLPri_Int);
2011         vassert(addToSp == 0);
2012         addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()));
2013         return dst;
2014      }
2015      /* else fall through; will hit the irreducible: label */
2016   }
2017
2018   /* --------- LITERAL --------- */
2019   /* 64-bit literals */
2020   case Iex_Const: {
2021      ULong u   = 0;
2022      HReg  dst = newVRegI(env);
2023      switch (e->Iex.Const.con->tag) {
2024         case Ico_U64: u = e->Iex.Const.con->Ico.U64; break;
2025         case Ico_U32: u = e->Iex.Const.con->Ico.U32; break;
2026         case Ico_U16: u = e->Iex.Const.con->Ico.U16; break;
2027         case Ico_U8:  u = e->Iex.Const.con->Ico.U8;  break;
2028         default: ppIRExpr(e); vpanic("iselIntExpr_R.Iex_Const(arm64)");
2029      }
2030      addInstr(env, ARM64Instr_Imm64(dst, u));
2031      return dst;
2032   }
2033
2034   /* --------- MULTIPLEX --------- */
2035   case Iex_ITE: {
2036      /* ITE(ccexpr, iftrue, iffalse) */
2037      if (ty == Ity_I64 || ty == Ity_I32) {
2038         ARM64CondCode cc;
2039         HReg r1  = iselIntExpr_R(env, e->Iex.ITE.iftrue);
2040         HReg r0  = iselIntExpr_R(env, e->Iex.ITE.iffalse);
2041         HReg dst = newVRegI(env);
2042         cc = iselCondCode(env, e->Iex.ITE.cond);
2043         addInstr(env, ARM64Instr_CSel(dst, r1, r0, cc));
2044         return dst;
2045      }
2046      break;
2047   }
2048
2049   default:
2050   break;
2051   } /* switch (e->tag) */
2052
2053   /* We get here if no pattern matched. */
2054  irreducible:
2055   ppIRExpr(e);
2056   vpanic("iselIntExpr_R: cannot reduce tree");
2057}
2058
2059
2060/*---------------------------------------------------------*/
2061/*--- ISEL: Integer expressions (128 bit)               ---*/
2062/*---------------------------------------------------------*/
2063
2064/* Compute a 128-bit value into a register pair, which is returned as
2065   the first two parameters.  As with iselIntExpr_R, these may be
2066   either real or virtual regs; in any case they must not be changed
2067   by subsequent code emitted by the caller.  */
2068
2069static void iselInt128Expr ( HReg* rHi, HReg* rLo,
2070                             ISelEnv* env, IRExpr* e )
2071{
2072   iselInt128Expr_wrk(rHi, rLo, env, e);
2073#  if 0
2074   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2075#  endif
2076   vassert(hregClass(*rHi) == HRcInt64);
2077   vassert(hregIsVirtual(*rHi));
2078   vassert(hregClass(*rLo) == HRcInt64);
2079   vassert(hregIsVirtual(*rLo));
2080}
2081
2082/* DO NOT CALL THIS DIRECTLY ! */
2083static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
2084                                 ISelEnv* env, IRExpr* e )
2085{
2086   vassert(e);
2087   vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
2088
2089   /* --------- BINARY ops --------- */
2090   if (e->tag == Iex_Binop) {
2091      switch (e->Iex.Binop.op) {
2092         /* 64 x 64 -> 128 multiply */
2093         case Iop_MullU64:
2094         case Iop_MullS64: {
2095            Bool syned = toBool(e->Iex.Binop.op == Iop_MullS64);
2096            HReg argL  = iselIntExpr_R(env, e->Iex.Binop.arg1);
2097            HReg argR  = iselIntExpr_R(env, e->Iex.Binop.arg2);
2098            HReg dstLo = newVRegI(env);
2099            HReg dstHi = newVRegI(env);
2100            addInstr(env, ARM64Instr_Mul(dstLo, argL, argR,
2101                                         ARM64mul_PLAIN));
2102            addInstr(env, ARM64Instr_Mul(dstHi, argL, argR,
2103                                         syned ? ARM64mul_SX : ARM64mul_ZX));
2104            *rHi = dstHi;
2105            *rLo = dstLo;
2106            return;
2107         }
2108         /* 64HLto128(e1,e2) */
2109         case Iop_64HLto128:
2110            *rHi = iselIntExpr_R(env, e->Iex.Binop.arg1);
2111            *rLo = iselIntExpr_R(env, e->Iex.Binop.arg2);
2112            return;
2113         default:
2114            break;
2115      }
2116   } /* if (e->tag == Iex_Binop) */
2117
2118   ppIRExpr(e);
2119   vpanic("iselInt128Expr(arm64)");
2120}
2121
2122
2123/*---------------------------------------------------------*/
2124/*--- ISEL: Vector expressions (128 bit)                ---*/
2125/*---------------------------------------------------------*/
2126
2127static HReg iselV128Expr ( ISelEnv* env, IRExpr* e )
2128{
2129   HReg r = iselV128Expr_wrk( env, e );
2130   vassert(hregClass(r) == HRcVec128);
2131   vassert(hregIsVirtual(r));
2132   return r;
2133}
2134
2135/* DO NOT CALL THIS DIRECTLY */
2136static HReg iselV128Expr_wrk ( ISelEnv* env, IRExpr* e )
2137{
2138   IRType ty = typeOfIRExpr(env->type_env, e);
2139   vassert(e);
2140   vassert(ty == Ity_V128);
2141
2142   if (e->tag == Iex_RdTmp) {
2143      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
2144   }
2145
2146   if (e->tag == Iex_Const) {
2147      /* Only a very limited range of constants is handled. */
2148      vassert(e->Iex.Const.con->tag == Ico_V128);
2149      UShort con = e->Iex.Const.con->Ico.V128;
2150      HReg   res = newVRegV(env);
2151      switch (con) {
2152         case 0x0000: case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
2153            addInstr(env, ARM64Instr_VImmQ(res, con));
2154            return res;
2155         case 0x00F0:
2156            addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2157            addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2158            return res;
2159         case 0x0F00:
2160            addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2161            addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2162            return res;
2163         case 0x0FF0:
2164            addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2165            addInstr(env, ARM64Instr_VExtV(res, res, res, 12));
2166            return res;
2167         case 0x0FFF:
2168            addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2169            addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2170            addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2171            return res;
2172         case 0xF000:
2173            addInstr(env, ARM64Instr_VImmQ(res, 0x000F));
2174            addInstr(env, ARM64Instr_VExtV(res, res, res, 4));
2175            return res;
2176         case 0xFF00:
2177            addInstr(env, ARM64Instr_VImmQ(res, 0x00FF));
2178            addInstr(env, ARM64Instr_VExtV(res, res, res, 8));
2179            return res;
2180         default:
2181            break;
2182      }
2183      /* Unhandled */
2184      goto v128_expr_bad;
2185   }
2186
2187   if (e->tag == Iex_Load) {
2188      HReg res = newVRegV(env);
2189      HReg rN  = iselIntExpr_R(env, e->Iex.Load.addr);
2190      vassert(ty == Ity_V128);
2191      addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, rN));
2192      return res;
2193   }
2194
2195   if (e->tag == Iex_Get) {
2196      UInt offs = (UInt)e->Iex.Get.offset;
2197      if (offs < (1<<12)) {
2198         HReg addr = mk_baseblock_128bit_access_addr(env, offs);
2199         HReg res  = newVRegV(env);
2200         vassert(ty == Ity_V128);
2201         addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, res, addr));
2202         return res;
2203      }
2204      goto v128_expr_bad;
2205   }
2206
2207   if (e->tag == Iex_Unop) {
2208
2209      /* Iop_ZeroHIXXofV128 cases */
2210      UShort imm16 = 0;
2211      switch (e->Iex.Unop.op) {
2212         case Iop_ZeroHI64ofV128:  imm16 = 0x00FF; break;
2213         case Iop_ZeroHI96ofV128:  imm16 = 0x000F; break;
2214         case Iop_ZeroHI112ofV128: imm16 = 0x0003; break;
2215         case Iop_ZeroHI120ofV128: imm16 = 0x0001; break;
2216         default: break;
2217      }
2218      if (imm16 != 0) {
2219         HReg src = iselV128Expr(env, e->Iex.Unop.arg);
2220         HReg imm = newVRegV(env);
2221         HReg res = newVRegV(env);
2222         addInstr(env, ARM64Instr_VImmQ(imm, imm16));
2223         addInstr(env, ARM64Instr_VBinV(ARM64vecb_AND, res, src, imm));
2224         return res;
2225      }
2226
2227      /* Other cases */
2228      switch (e->Iex.Unop.op) {
2229         case Iop_NotV128:
2230         case Iop_Abs64Fx2: case Iop_Abs32Fx4:
2231         case Iop_Neg64Fx2: case Iop_Neg32Fx4:
2232         case Iop_Abs64x2:  case Iop_Abs32x4:
2233         case Iop_Abs16x8:  case Iop_Abs8x16:
2234         case Iop_Cls32x4:  case Iop_Cls16x8:  case Iop_Cls8x16:
2235         case Iop_Clz32x4:  case Iop_Clz16x8:  case Iop_Clz8x16:
2236         case Iop_Cnt8x16:
2237         case Iop_Reverse1sIn8_x16:
2238         case Iop_Reverse8sIn16_x8:
2239         case Iop_Reverse8sIn32_x4: case Iop_Reverse16sIn32_x4:
2240         case Iop_Reverse8sIn64_x2: case Iop_Reverse16sIn64_x2:
2241         case Iop_Reverse32sIn64_x2:
2242         case Iop_RecipEst32Ux4:
2243         case Iop_RSqrtEst32Ux4:
2244         case Iop_RecipEst64Fx2: case Iop_RecipEst32Fx4:
2245         case Iop_RSqrtEst64Fx2: case Iop_RSqrtEst32Fx4:
2246         {
2247            HReg res   = newVRegV(env);
2248            HReg arg   = iselV128Expr(env, e->Iex.Unop.arg);
2249            Bool setRM = False;
2250            ARM64VecUnaryOp op = ARM64vecu_INVALID;
2251            switch (e->Iex.Unop.op) {
2252               case Iop_NotV128:           op = ARM64vecu_NOT;         break;
2253               case Iop_Abs64Fx2:          op = ARM64vecu_FABS64x2;    break;
2254               case Iop_Abs32Fx4:          op = ARM64vecu_FABS32x4;    break;
2255               case Iop_Neg64Fx2:          op = ARM64vecu_FNEG64x2;    break;
2256               case Iop_Neg32Fx4:          op = ARM64vecu_FNEG32x4;    break;
2257               case Iop_Abs64x2:           op = ARM64vecu_ABS64x2;     break;
2258               case Iop_Abs32x4:           op = ARM64vecu_ABS32x4;     break;
2259               case Iop_Abs16x8:           op = ARM64vecu_ABS16x8;     break;
2260               case Iop_Abs8x16:           op = ARM64vecu_ABS8x16;     break;
2261               case Iop_Cls32x4:           op = ARM64vecu_CLS32x4;     break;
2262               case Iop_Cls16x8:           op = ARM64vecu_CLS16x8;     break;
2263               case Iop_Cls8x16:           op = ARM64vecu_CLS8x16;     break;
2264               case Iop_Clz32x4:           op = ARM64vecu_CLZ32x4;     break;
2265               case Iop_Clz16x8:           op = ARM64vecu_CLZ16x8;     break;
2266               case Iop_Clz8x16:           op = ARM64vecu_CLZ8x16;     break;
2267               case Iop_Cnt8x16:           op = ARM64vecu_CNT8x16;     break;
2268               case Iop_Reverse1sIn8_x16:  op = ARM64vecu_RBIT;        break;
2269               case Iop_Reverse8sIn16_x8:  op = ARM64vecu_REV1616B;    break;
2270               case Iop_Reverse8sIn32_x4:  op = ARM64vecu_REV3216B;    break;
2271               case Iop_Reverse16sIn32_x4: op = ARM64vecu_REV328H;     break;
2272               case Iop_Reverse8sIn64_x2:  op = ARM64vecu_REV6416B;    break;
2273               case Iop_Reverse16sIn64_x2: op = ARM64vecu_REV648H;     break;
2274               case Iop_Reverse32sIn64_x2: op = ARM64vecu_REV644S;     break;
2275               case Iop_RecipEst32Ux4:     op = ARM64vecu_URECPE32x4;  break;
2276               case Iop_RSqrtEst32Ux4:     op = ARM64vecu_URSQRTE32x4; break;
2277               case Iop_RecipEst64Fx2:     setRM = True;
2278                                           op = ARM64vecu_FRECPE64x2;  break;
2279               case Iop_RecipEst32Fx4:     setRM = True;
2280                                           op = ARM64vecu_FRECPE32x4;  break;
2281               case Iop_RSqrtEst64Fx2:     setRM = True;
2282                                           op = ARM64vecu_FRSQRTE64x2; break;
2283               case Iop_RSqrtEst32Fx4:     setRM = True;
2284                                           op = ARM64vecu_FRSQRTE32x4; break;
2285               default: vassert(0);
2286            }
2287            if (setRM) {
2288               // This is a bit of a kludge.  We should do rm properly for
2289               // these recip-est insns, but that would require changing the
2290               // primop's type to take an rmode.
2291               set_FPCR_rounding_mode(env, IRExpr_Const(
2292                                              IRConst_U32(Irrm_NEAREST)));
2293            }
2294            addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2295            return res;
2296         }
2297         case Iop_CmpNEZ8x16:
2298         case Iop_CmpNEZ16x8:
2299         case Iop_CmpNEZ32x4:
2300         case Iop_CmpNEZ64x2: {
2301            HReg arg  = iselV128Expr(env, e->Iex.Unop.arg);
2302            HReg zero = newVRegV(env);
2303            HReg res  = newVRegV(env);
2304            ARM64VecBinOp cmp = ARM64vecb_INVALID;
2305            switch (e->Iex.Unop.op) {
2306               case Iop_CmpNEZ64x2: cmp = ARM64vecb_CMEQ64x2; break;
2307               case Iop_CmpNEZ32x4: cmp = ARM64vecb_CMEQ32x4; break;
2308               case Iop_CmpNEZ16x8: cmp = ARM64vecb_CMEQ16x8; break;
2309               case Iop_CmpNEZ8x16: cmp = ARM64vecb_CMEQ8x16; break;
2310               default: vassert(0);
2311            }
2312            // This is pretty feeble.  Better: use CMP against zero
2313            // and avoid the extra instruction and extra register.
2314            addInstr(env, ARM64Instr_VImmQ(zero, 0x0000));
2315            addInstr(env, ARM64Instr_VBinV(cmp, res, arg, zero));
2316            addInstr(env, ARM64Instr_VUnaryV(ARM64vecu_NOT, res, res));
2317            return res;
2318         }
2319         case Iop_V256toV128_0:
2320         case Iop_V256toV128_1: {
2321            HReg vHi, vLo;
2322            iselV256Expr(&vHi, &vLo, env, e->Iex.Unop.arg);
2323            return (e->Iex.Unop.op == Iop_V256toV128_1) ? vHi : vLo;
2324         }
2325         case Iop_64UtoV128: {
2326            HReg res = newVRegV(env);
2327            HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2328            addInstr(env, ARM64Instr_VQfromX(res, arg));
2329            return res;
2330         }
2331         case Iop_Widen8Sto16x8: {
2332            HReg res = newVRegV(env);
2333            HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2334            addInstr(env, ARM64Instr_VQfromX(res, arg));
2335            addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP18x16, res, res, res));
2336            addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR16x8,
2337                                                res, res, 8));
2338            return res;
2339         }
2340         case Iop_Widen16Sto32x4: {
2341            HReg res = newVRegV(env);
2342            HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2343            addInstr(env, ARM64Instr_VQfromX(res, arg));
2344            addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP116x8, res, res, res));
2345            addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR32x4,
2346                                                res, res, 16));
2347            return res;
2348         }
2349         case Iop_Widen32Sto64x2: {
2350            HReg res = newVRegV(env);
2351            HReg arg = iselIntExpr_R(env, e->Iex.Unop.arg);
2352            addInstr(env, ARM64Instr_VQfromX(res, arg));
2353            addInstr(env, ARM64Instr_VBinV(ARM64vecb_ZIP132x4, res, res, res));
2354            addInstr(env, ARM64Instr_VShiftImmV(ARM64vecshi_SSHR64x2,
2355                                                res, res, 32));
2356            return res;
2357         }
2358         /* ... */
2359         default:
2360            break;
2361      } /* switch on the unop */
2362   } /* if (e->tag == Iex_Unop) */
2363
2364   if (e->tag == Iex_Binop) {
2365      switch (e->Iex.Binop.op) {
2366         case Iop_Sqrt32Fx4:
2367         case Iop_Sqrt64Fx2: {
2368            HReg arg = iselV128Expr(env, e->Iex.Binop.arg2);
2369            HReg res = newVRegV(env);
2370            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
2371            ARM64VecUnaryOp op
2372               = e->Iex.Binop.op == Iop_Sqrt32Fx4
2373                    ? ARM64vecu_FSQRT32x4 : ARM64vecu_FSQRT64x2;
2374            addInstr(env, ARM64Instr_VUnaryV(op, res, arg));
2375            return res;
2376         }
2377         case Iop_64HLtoV128: {
2378            HReg res  = newVRegV(env);
2379            HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2380            HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2381            addInstr(env, ARM64Instr_VQfromXX(res, argL, argR));
2382            return res;
2383         }
2384         /* -- Cases where we can generate a simple three-reg instruction. -- */
2385         case Iop_AndV128:
2386         case Iop_OrV128:
2387         case Iop_XorV128:
2388         case Iop_Max32Ux4: case Iop_Max16Ux8: case Iop_Max8Ux16:
2389         case Iop_Min32Ux4: case Iop_Min16Ux8: case Iop_Min8Ux16:
2390         case Iop_Max32Sx4: case Iop_Max16Sx8: case Iop_Max8Sx16:
2391         case Iop_Min32Sx4: case Iop_Min16Sx8: case Iop_Min8Sx16:
2392         case Iop_Add64x2: case Iop_Add32x4:
2393         case Iop_Add16x8: case Iop_Add8x16:
2394         case Iop_Sub64x2: case Iop_Sub32x4:
2395         case Iop_Sub16x8: case Iop_Sub8x16:
2396         case Iop_Mul32x4: case Iop_Mul16x8: case Iop_Mul8x16:
2397         case Iop_CmpEQ64x2: case Iop_CmpEQ32x4:
2398         case Iop_CmpEQ16x8:  case Iop_CmpEQ8x16:
2399         case Iop_CmpGT64Ux2: case Iop_CmpGT32Ux4:
2400         case Iop_CmpGT16Ux8: case Iop_CmpGT8Ux16:
2401         case Iop_CmpGT64Sx2: case Iop_CmpGT32Sx4:
2402         case Iop_CmpGT16Sx8: case Iop_CmpGT8Sx16:
2403         case Iop_CmpEQ64Fx2: case Iop_CmpEQ32Fx4:
2404         case Iop_CmpLE64Fx2: case Iop_CmpLE32Fx4:
2405         case Iop_CmpLT64Fx2: case Iop_CmpLT32Fx4:
2406         case Iop_Perm8x16:
2407         case Iop_InterleaveLO64x2: case Iop_CatEvenLanes32x4:
2408         case Iop_CatEvenLanes16x8: case Iop_CatEvenLanes8x16:
2409         case Iop_InterleaveHI64x2: case Iop_CatOddLanes32x4:
2410         case Iop_CatOddLanes16x8:  case Iop_CatOddLanes8x16:
2411         case Iop_InterleaveHI32x4:
2412         case Iop_InterleaveHI16x8: case Iop_InterleaveHI8x16:
2413         case Iop_InterleaveLO32x4:
2414         case Iop_InterleaveLO16x8: case Iop_InterleaveLO8x16:
2415         case Iop_PolynomialMul8x16:
2416         case Iop_QAdd64Sx2: case Iop_QAdd32Sx4:
2417         case Iop_QAdd16Sx8: case Iop_QAdd8Sx16:
2418         case Iop_QAdd64Ux2: case Iop_QAdd32Ux4:
2419         case Iop_QAdd16Ux8: case Iop_QAdd8Ux16:
2420         case Iop_QSub64Sx2: case Iop_QSub32Sx4:
2421         case Iop_QSub16Sx8: case Iop_QSub8Sx16:
2422         case Iop_QSub64Ux2: case Iop_QSub32Ux4:
2423         case Iop_QSub16Ux8: case Iop_QSub8Ux16:
2424         case Iop_QDMulHi32Sx4:  case Iop_QDMulHi16Sx8:
2425         case Iop_QRDMulHi32Sx4: case Iop_QRDMulHi16Sx8:
2426         case Iop_Sh8Sx16:  case Iop_Sh16Sx8:
2427         case Iop_Sh32Sx4:  case Iop_Sh64Sx2:
2428         case Iop_Sh8Ux16:  case Iop_Sh16Ux8:
2429         case Iop_Sh32Ux4:  case Iop_Sh64Ux2:
2430         case Iop_Rsh8Sx16: case Iop_Rsh16Sx8:
2431         case Iop_Rsh32Sx4: case Iop_Rsh64Sx2:
2432         case Iop_Rsh8Ux16: case Iop_Rsh16Ux8:
2433         case Iop_Rsh32Ux4: case Iop_Rsh64Ux2:
2434         case Iop_Max64Fx2: case Iop_Max32Fx4:
2435         case Iop_Min64Fx2: case Iop_Min32Fx4:
2436         case Iop_RecipStep64Fx2: case Iop_RecipStep32Fx4:
2437         case Iop_RSqrtStep64Fx2: case Iop_RSqrtStep32Fx4:
2438         {
2439            HReg res   = newVRegV(env);
2440            HReg argL  = iselV128Expr(env, e->Iex.Binop.arg1);
2441            HReg argR  = iselV128Expr(env, e->Iex.Binop.arg2);
2442            Bool sw    = False;
2443            Bool setRM = False;
2444            ARM64VecBinOp op = ARM64vecb_INVALID;
2445            switch (e->Iex.Binop.op) {
2446               case Iop_AndV128:    op = ARM64vecb_AND; break;
2447               case Iop_OrV128:     op = ARM64vecb_ORR; break;
2448               case Iop_XorV128:    op = ARM64vecb_XOR; break;
2449               case Iop_Max32Ux4:   op = ARM64vecb_UMAX32x4; break;
2450               case Iop_Max16Ux8:   op = ARM64vecb_UMAX16x8; break;
2451               case Iop_Max8Ux16:   op = ARM64vecb_UMAX8x16; break;
2452               case Iop_Min32Ux4:   op = ARM64vecb_UMIN32x4; break;
2453               case Iop_Min16Ux8:   op = ARM64vecb_UMIN16x8; break;
2454               case Iop_Min8Ux16:   op = ARM64vecb_UMIN8x16; break;
2455               case Iop_Max32Sx4:   op = ARM64vecb_SMAX32x4; break;
2456               case Iop_Max16Sx8:   op = ARM64vecb_SMAX16x8; break;
2457               case Iop_Max8Sx16:   op = ARM64vecb_SMAX8x16; break;
2458               case Iop_Min32Sx4:   op = ARM64vecb_SMIN32x4; break;
2459               case Iop_Min16Sx8:   op = ARM64vecb_SMIN16x8; break;
2460               case Iop_Min8Sx16:   op = ARM64vecb_SMIN8x16; break;
2461               case Iop_Add64x2:    op = ARM64vecb_ADD64x2; break;
2462               case Iop_Add32x4:    op = ARM64vecb_ADD32x4; break;
2463               case Iop_Add16x8:    op = ARM64vecb_ADD16x8; break;
2464               case Iop_Add8x16:    op = ARM64vecb_ADD8x16; break;
2465               case Iop_Sub64x2:    op = ARM64vecb_SUB64x2; break;
2466               case Iop_Sub32x4:    op = ARM64vecb_SUB32x4; break;
2467               case Iop_Sub16x8:    op = ARM64vecb_SUB16x8; break;
2468               case Iop_Sub8x16:    op = ARM64vecb_SUB8x16; break;
2469               case Iop_Mul32x4:    op = ARM64vecb_MUL32x4; break;
2470               case Iop_Mul16x8:    op = ARM64vecb_MUL16x8; break;
2471               case Iop_Mul8x16:    op = ARM64vecb_MUL8x16; break;
2472               case Iop_CmpEQ64x2:  op = ARM64vecb_CMEQ64x2; break;
2473               case Iop_CmpEQ32x4:  op = ARM64vecb_CMEQ32x4; break;
2474               case Iop_CmpEQ16x8:  op = ARM64vecb_CMEQ16x8; break;
2475               case Iop_CmpEQ8x16:  op = ARM64vecb_CMEQ8x16; break;
2476               case Iop_CmpGT64Ux2: op = ARM64vecb_CMHI64x2; break;
2477               case Iop_CmpGT32Ux4: op = ARM64vecb_CMHI32x4; break;
2478               case Iop_CmpGT16Ux8: op = ARM64vecb_CMHI16x8; break;
2479               case Iop_CmpGT8Ux16: op = ARM64vecb_CMHI8x16; break;
2480               case Iop_CmpGT64Sx2: op = ARM64vecb_CMGT64x2; break;
2481               case Iop_CmpGT32Sx4: op = ARM64vecb_CMGT32x4; break;
2482               case Iop_CmpGT16Sx8: op = ARM64vecb_CMGT16x8; break;
2483               case Iop_CmpGT8Sx16: op = ARM64vecb_CMGT8x16; break;
2484               case Iop_CmpEQ64Fx2: op = ARM64vecb_FCMEQ64x2; break;
2485               case Iop_CmpEQ32Fx4: op = ARM64vecb_FCMEQ32x4; break;
2486               case Iop_CmpLE64Fx2: op = ARM64vecb_FCMGE64x2; sw = True; break;
2487               case Iop_CmpLE32Fx4: op = ARM64vecb_FCMGE32x4; sw = True; break;
2488               case Iop_CmpLT64Fx2: op = ARM64vecb_FCMGT64x2; sw = True; break;
2489               case Iop_CmpLT32Fx4: op = ARM64vecb_FCMGT32x4; sw = True; break;
2490               case Iop_Perm8x16:   op = ARM64vecb_TBL1; break;
2491               case Iop_InterleaveLO64x2: op = ARM64vecb_UZP164x2; sw = True;
2492                                          break;
2493               case Iop_CatEvenLanes32x4: op = ARM64vecb_UZP132x4; sw = True;
2494                                          break;
2495               case Iop_CatEvenLanes16x8: op = ARM64vecb_UZP116x8; sw = True;
2496                                          break;
2497               case Iop_CatEvenLanes8x16: op = ARM64vecb_UZP18x16; sw = True;
2498                                          break;
2499               case Iop_InterleaveHI64x2: op = ARM64vecb_UZP264x2; sw = True;
2500                                          break;
2501               case Iop_CatOddLanes32x4:  op = ARM64vecb_UZP232x4; sw = True;
2502                                          break;
2503               case Iop_CatOddLanes16x8:  op = ARM64vecb_UZP216x8; sw = True;
2504                                          break;
2505               case Iop_CatOddLanes8x16:  op = ARM64vecb_UZP28x16; sw = True;
2506                                          break;
2507               case Iop_InterleaveHI32x4: op = ARM64vecb_ZIP232x4; sw = True;
2508                                          break;
2509               case Iop_InterleaveHI16x8: op = ARM64vecb_ZIP216x8; sw = True;
2510                                          break;
2511               case Iop_InterleaveHI8x16: op = ARM64vecb_ZIP28x16; sw = True;
2512                                          break;
2513               case Iop_InterleaveLO32x4: op = ARM64vecb_ZIP132x4; sw = True;
2514                                          break;
2515               case Iop_InterleaveLO16x8: op = ARM64vecb_ZIP116x8; sw = True;
2516                                          break;
2517               case Iop_InterleaveLO8x16: op = ARM64vecb_ZIP18x16; sw = True;
2518                                          break;
2519               case Iop_PolynomialMul8x16: op = ARM64vecb_PMUL8x16; break;
2520               case Iop_QAdd64Sx2:      op = ARM64vecb_SQADD64x2; break;
2521               case Iop_QAdd32Sx4:      op = ARM64vecb_SQADD32x4; break;
2522               case Iop_QAdd16Sx8:      op = ARM64vecb_SQADD16x8; break;
2523               case Iop_QAdd8Sx16:      op = ARM64vecb_SQADD8x16; break;
2524               case Iop_QAdd64Ux2:      op = ARM64vecb_UQADD64x2; break;
2525               case Iop_QAdd32Ux4:      op = ARM64vecb_UQADD32x4; break;
2526               case Iop_QAdd16Ux8:      op = ARM64vecb_UQADD16x8; break;
2527               case Iop_QAdd8Ux16:      op = ARM64vecb_UQADD8x16; break;
2528               case Iop_QSub64Sx2:      op = ARM64vecb_SQSUB64x2; break;
2529               case Iop_QSub32Sx4:      op = ARM64vecb_SQSUB32x4; break;
2530               case Iop_QSub16Sx8:      op = ARM64vecb_SQSUB16x8; break;
2531               case Iop_QSub8Sx16:      op = ARM64vecb_SQSUB8x16; break;
2532               case Iop_QSub64Ux2:      op = ARM64vecb_UQSUB64x2; break;
2533               case Iop_QSub32Ux4:      op = ARM64vecb_UQSUB32x4; break;
2534               case Iop_QSub16Ux8:      op = ARM64vecb_UQSUB16x8; break;
2535               case Iop_QSub8Ux16:      op = ARM64vecb_UQSUB8x16; break;
2536               case Iop_QDMulHi32Sx4:   op = ARM64vecb_SQDMULH32x4; break;
2537               case Iop_QDMulHi16Sx8:   op = ARM64vecb_SQDMULH16x8; break;
2538               case Iop_QRDMulHi32Sx4:  op = ARM64vecb_SQRDMULH32x4; break;
2539               case Iop_QRDMulHi16Sx8:  op = ARM64vecb_SQRDMULH16x8; break;
2540               case Iop_Sh8Sx16:        op = ARM64vecb_SSHL8x16; break;
2541               case Iop_Sh16Sx8:        op = ARM64vecb_SSHL16x8; break;
2542               case Iop_Sh32Sx4:        op = ARM64vecb_SSHL32x4; break;
2543               case Iop_Sh64Sx2:        op = ARM64vecb_SSHL64x2; break;
2544               case Iop_Sh8Ux16:        op = ARM64vecb_USHL8x16; break;
2545               case Iop_Sh16Ux8:        op = ARM64vecb_USHL16x8; break;
2546               case Iop_Sh32Ux4:        op = ARM64vecb_USHL32x4; break;
2547               case Iop_Sh64Ux2:        op = ARM64vecb_USHL64x2; break;
2548               case Iop_Rsh8Sx16:       op = ARM64vecb_SRSHL8x16; break;
2549               case Iop_Rsh16Sx8:       op = ARM64vecb_SRSHL16x8; break;
2550               case Iop_Rsh32Sx4:       op = ARM64vecb_SRSHL32x4; break;
2551               case Iop_Rsh64Sx2:       op = ARM64vecb_SRSHL64x2; break;
2552               case Iop_Rsh8Ux16:       op = ARM64vecb_URSHL8x16; break;
2553               case Iop_Rsh16Ux8:       op = ARM64vecb_URSHL16x8; break;
2554               case Iop_Rsh32Ux4:       op = ARM64vecb_URSHL32x4; break;
2555               case Iop_Rsh64Ux2:       op = ARM64vecb_URSHL64x2; break;
2556               case Iop_Max64Fx2:       op = ARM64vecb_FMAX64x2; break;
2557               case Iop_Max32Fx4:       op = ARM64vecb_FMAX32x4; break;
2558               case Iop_Min64Fx2:       op = ARM64vecb_FMIN64x2; break;
2559               case Iop_Min32Fx4:       op = ARM64vecb_FMIN32x4; break;
2560               case Iop_RecipStep64Fx2: setRM = True;
2561                                        op = ARM64vecb_FRECPS64x2; break;
2562               case Iop_RecipStep32Fx4: setRM = True;
2563                                        op = ARM64vecb_FRECPS32x4; break;
2564               case Iop_RSqrtStep64Fx2: setRM = True;
2565                                        op = ARM64vecb_FRSQRTS64x2; break;
2566               case Iop_RSqrtStep32Fx4: setRM = True;
2567                                        op = ARM64vecb_FRSQRTS32x4; break;
2568               default: vassert(0);
2569            }
2570            if (setRM) {
2571               // This is a bit of a kludge.  We should do rm properly for
2572               // these recip-step insns, but that would require changing the
2573               // primop's type to take an rmode.
2574               set_FPCR_rounding_mode(env, IRExpr_Const(
2575                                              IRConst_U32(Irrm_NEAREST)));
2576            }
2577            if (sw) {
2578               addInstr(env, ARM64Instr_VBinV(op, res, argR, argL));
2579            } else {
2580               addInstr(env, ARM64Instr_VBinV(op, res, argL, argR));
2581            }
2582            return res;
2583         }
2584         /* -- These only have 2 operand instructions, so we have to first move
2585            the first argument into a new register, for modification. -- */
2586         case Iop_QAddExtUSsatSS8x16: case Iop_QAddExtUSsatSS16x8:
2587         case Iop_QAddExtUSsatSS32x4: case Iop_QAddExtUSsatSS64x2:
2588         case Iop_QAddExtSUsatUU8x16: case Iop_QAddExtSUsatUU16x8:
2589         case Iop_QAddExtSUsatUU32x4: case Iop_QAddExtSUsatUU64x2:
2590         {
2591            HReg res  = newVRegV(env);
2592            HReg argL = iselV128Expr(env, e->Iex.Binop.arg1);
2593            HReg argR = iselV128Expr(env, e->Iex.Binop.arg2);
2594            ARM64VecModifyOp op = ARM64vecmo_INVALID;
2595            switch (e->Iex.Binop.op) {
2596               /* In the following 8 cases, the US - SU switching is intended.
2597                  See comments on the libvex_ir.h for details.  Also in the
2598                  ARM64 front end, where used these primops are generated. */
2599               case Iop_QAddExtUSsatSS8x16: op = ARM64vecmo_SUQADD8x16; break;
2600               case Iop_QAddExtUSsatSS16x8: op = ARM64vecmo_SUQADD16x8; break;
2601               case Iop_QAddExtUSsatSS32x4: op = ARM64vecmo_SUQADD32x4; break;
2602               case Iop_QAddExtUSsatSS64x2: op = ARM64vecmo_SUQADD64x2; break;
2603               case Iop_QAddExtSUsatUU8x16: op = ARM64vecmo_USQADD8x16; break;
2604               case Iop_QAddExtSUsatUU16x8: op = ARM64vecmo_USQADD16x8; break;
2605               case Iop_QAddExtSUsatUU32x4: op = ARM64vecmo_USQADD32x4; break;
2606               case Iop_QAddExtSUsatUU64x2: op = ARM64vecmo_USQADD64x2; break;
2607               default: vassert(0);
2608            }
2609            /* The order of the operands is important.  Although this is
2610               basically addition, the two operands are extended differently,
2611               making it important to get them into the correct registers in
2612               the instruction. */
2613            addInstr(env, ARM64Instr_VMov(16, res, argR));
2614            addInstr(env, ARM64Instr_VModifyV(op, res, argL));
2615            return res;
2616         }
2617         /* -- Shifts by an immediate. -- */
2618         case Iop_ShrN64x2: case Iop_ShrN32x4:
2619         case Iop_ShrN16x8: case Iop_ShrN8x16:
2620         case Iop_SarN64x2: case Iop_SarN32x4:
2621         case Iop_SarN16x8: case Iop_SarN8x16:
2622         case Iop_ShlN64x2: case Iop_ShlN32x4:
2623         case Iop_ShlN16x8: case Iop_ShlN8x16:
2624         case Iop_QShlNsatUU64x2: case Iop_QShlNsatUU32x4:
2625         case Iop_QShlNsatUU16x8: case Iop_QShlNsatUU8x16:
2626         case Iop_QShlNsatSS64x2: case Iop_QShlNsatSS32x4:
2627         case Iop_QShlNsatSS16x8: case Iop_QShlNsatSS8x16:
2628         case Iop_QShlNsatSU64x2: case Iop_QShlNsatSU32x4:
2629         case Iop_QShlNsatSU16x8: case Iop_QShlNsatSU8x16:
2630         {
2631            IRExpr* argL = e->Iex.Binop.arg1;
2632            IRExpr* argR = e->Iex.Binop.arg2;
2633            if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2634               UInt amt   = argR->Iex.Const.con->Ico.U8;
2635               UInt limLo = 0;
2636               UInt limHi = 0;
2637               ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2638               /* Establish the instruction to use. */
2639               switch (e->Iex.Binop.op) {
2640                  case Iop_ShrN64x2:       op = ARM64vecshi_USHR64x2;   break;
2641                  case Iop_ShrN32x4:       op = ARM64vecshi_USHR32x4;   break;
2642                  case Iop_ShrN16x8:       op = ARM64vecshi_USHR16x8;   break;
2643                  case Iop_ShrN8x16:       op = ARM64vecshi_USHR8x16;   break;
2644                  case Iop_SarN64x2:       op = ARM64vecshi_SSHR64x2;   break;
2645                  case Iop_SarN32x4:       op = ARM64vecshi_SSHR32x4;   break;
2646                  case Iop_SarN16x8:       op = ARM64vecshi_SSHR16x8;   break;
2647                  case Iop_SarN8x16:       op = ARM64vecshi_SSHR8x16;   break;
2648                  case Iop_ShlN64x2:       op = ARM64vecshi_SHL64x2;    break;
2649                  case Iop_ShlN32x4:       op = ARM64vecshi_SHL32x4;    break;
2650                  case Iop_ShlN16x8:       op = ARM64vecshi_SHL16x8;    break;
2651                  case Iop_ShlN8x16:       op = ARM64vecshi_SHL8x16;    break;
2652                  case Iop_QShlNsatUU64x2: op = ARM64vecshi_UQSHL64x2;  break;
2653                  case Iop_QShlNsatUU32x4: op = ARM64vecshi_UQSHL32x4;  break;
2654                  case Iop_QShlNsatUU16x8: op = ARM64vecshi_UQSHL16x8;  break;
2655                  case Iop_QShlNsatUU8x16: op = ARM64vecshi_UQSHL8x16;  break;
2656                  case Iop_QShlNsatSS64x2: op = ARM64vecshi_SQSHL64x2;  break;
2657                  case Iop_QShlNsatSS32x4: op = ARM64vecshi_SQSHL32x4;  break;
2658                  case Iop_QShlNsatSS16x8: op = ARM64vecshi_SQSHL16x8;  break;
2659                  case Iop_QShlNsatSS8x16: op = ARM64vecshi_SQSHL8x16;  break;
2660                  case Iop_QShlNsatSU64x2: op = ARM64vecshi_SQSHLU64x2; break;
2661                  case Iop_QShlNsatSU32x4: op = ARM64vecshi_SQSHLU32x4; break;
2662                  case Iop_QShlNsatSU16x8: op = ARM64vecshi_SQSHLU16x8; break;
2663                  case Iop_QShlNsatSU8x16: op = ARM64vecshi_SQSHLU8x16; break;
2664                  default: vassert(0);
2665               }
2666               /* Establish the shift limits, for sanity check purposes only. */
2667               switch (e->Iex.Binop.op) {
2668                  case Iop_ShrN64x2:       limLo = 1; limHi = 64; break;
2669                  case Iop_ShrN32x4:       limLo = 1; limHi = 32; break;
2670                  case Iop_ShrN16x8:       limLo = 1; limHi = 16; break;
2671                  case Iop_ShrN8x16:       limLo = 1; limHi = 8;  break;
2672                  case Iop_SarN64x2:       limLo = 1; limHi = 64; break;
2673                  case Iop_SarN32x4:       limLo = 1; limHi = 32; break;
2674                  case Iop_SarN16x8:       limLo = 1; limHi = 16; break;
2675                  case Iop_SarN8x16:       limLo = 1; limHi = 8;  break;
2676                  case Iop_ShlN64x2:       limLo = 0; limHi = 63; break;
2677                  case Iop_ShlN32x4:       limLo = 0; limHi = 31; break;
2678                  case Iop_ShlN16x8:       limLo = 0; limHi = 15; break;
2679                  case Iop_ShlN8x16:       limLo = 0; limHi = 7;  break;
2680                  case Iop_QShlNsatUU64x2: limLo = 0; limHi = 63; break;
2681                  case Iop_QShlNsatUU32x4: limLo = 0; limHi = 31; break;
2682                  case Iop_QShlNsatUU16x8: limLo = 0; limHi = 15; break;
2683                  case Iop_QShlNsatUU8x16: limLo = 0; limHi = 7;  break;
2684                  case Iop_QShlNsatSS64x2: limLo = 0; limHi = 63; break;
2685                  case Iop_QShlNsatSS32x4: limLo = 0; limHi = 31; break;
2686                  case Iop_QShlNsatSS16x8: limLo = 0; limHi = 15; break;
2687                  case Iop_QShlNsatSS8x16: limLo = 0; limHi = 7;  break;
2688                  case Iop_QShlNsatSU64x2: limLo = 0; limHi = 63; break;
2689                  case Iop_QShlNsatSU32x4: limLo = 0; limHi = 31; break;
2690                  case Iop_QShlNsatSU16x8: limLo = 0; limHi = 15; break;
2691                  case Iop_QShlNsatSU8x16: limLo = 0; limHi = 7;  break;
2692                  default: vassert(0);
2693               }
2694               /* For left shifts, the allowable amt values are
2695                  0 .. lane_bits-1.  For right shifts the allowable
2696                  values are 1 .. lane_bits. */
2697               if (op != ARM64vecshi_INVALID && amt >= limLo && amt <= limHi) {
2698                  HReg src = iselV128Expr(env, argL);
2699                  HReg dst = newVRegV(env);
2700                  addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2701                  return dst;
2702               }
2703               /* Special case some no-op shifts that the arm64 front end
2704                  throws at us.  We can't generate any instructions for these,
2705                  but we don't need to either. */
2706               switch (e->Iex.Binop.op) {
2707                  case Iop_ShrN64x2: case Iop_ShrN32x4:
2708                  case Iop_ShrN16x8: case Iop_ShrN8x16:
2709                     if (amt == 0) {
2710                        return iselV128Expr(env, argL);
2711                     }
2712                     break;
2713                  default:
2714                     break;
2715               }
2716               /* otherwise unhandled */
2717            }
2718            /* else fall out; this is unhandled */
2719            break;
2720         }
2721         /* -- Saturating narrowing by an immediate -- */
2722         /* uu */
2723         case Iop_QandQShrNnarrow16Uto8Ux8:
2724         case Iop_QandQShrNnarrow32Uto16Ux4:
2725         case Iop_QandQShrNnarrow64Uto32Ux2:
2726         /* ss */
2727         case Iop_QandQSarNnarrow16Sto8Sx8:
2728         case Iop_QandQSarNnarrow32Sto16Sx4:
2729         case Iop_QandQSarNnarrow64Sto32Sx2:
2730         /* su */
2731         case Iop_QandQSarNnarrow16Sto8Ux8:
2732         case Iop_QandQSarNnarrow32Sto16Ux4:
2733         case Iop_QandQSarNnarrow64Sto32Ux2:
2734         /* ruu */
2735         case Iop_QandQRShrNnarrow16Uto8Ux8:
2736         case Iop_QandQRShrNnarrow32Uto16Ux4:
2737         case Iop_QandQRShrNnarrow64Uto32Ux2:
2738         /* rss */
2739         case Iop_QandQRSarNnarrow16Sto8Sx8:
2740         case Iop_QandQRSarNnarrow32Sto16Sx4:
2741         case Iop_QandQRSarNnarrow64Sto32Sx2:
2742         /* rsu */
2743         case Iop_QandQRSarNnarrow16Sto8Ux8:
2744         case Iop_QandQRSarNnarrow32Sto16Ux4:
2745         case Iop_QandQRSarNnarrow64Sto32Ux2:
2746         {
2747            IRExpr* argL = e->Iex.Binop.arg1;
2748            IRExpr* argR = e->Iex.Binop.arg2;
2749            if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2750               UInt amt   = argR->Iex.Const.con->Ico.U8;
2751               UInt limit = 0;
2752               ARM64VecShiftImmOp op = ARM64vecshi_INVALID;
2753               switch (e->Iex.Binop.op) {
2754                  /* uu */
2755                  case Iop_QandQShrNnarrow64Uto32Ux2:
2756                     op = ARM64vecshi_UQSHRN2SD; limit = 64; break;
2757                  case Iop_QandQShrNnarrow32Uto16Ux4:
2758                     op = ARM64vecshi_UQSHRN4HS; limit = 32; break;
2759                  case Iop_QandQShrNnarrow16Uto8Ux8:
2760                     op = ARM64vecshi_UQSHRN8BH; limit = 16; break;
2761                  /* ss */
2762                  case Iop_QandQSarNnarrow64Sto32Sx2:
2763                     op = ARM64vecshi_SQSHRN2SD; limit = 64; break;
2764                  case Iop_QandQSarNnarrow32Sto16Sx4:
2765                     op = ARM64vecshi_SQSHRN4HS; limit = 32; break;
2766                  case Iop_QandQSarNnarrow16Sto8Sx8:
2767                     op = ARM64vecshi_SQSHRN8BH; limit = 16; break;
2768                  /* su */
2769                  case Iop_QandQSarNnarrow64Sto32Ux2:
2770                     op = ARM64vecshi_SQSHRUN2SD; limit = 64; break;
2771                  case Iop_QandQSarNnarrow32Sto16Ux4:
2772                     op = ARM64vecshi_SQSHRUN4HS; limit = 32; break;
2773                  case Iop_QandQSarNnarrow16Sto8Ux8:
2774                     op = ARM64vecshi_SQSHRUN8BH; limit = 16; break;
2775                  /* ruu */
2776                  case Iop_QandQRShrNnarrow64Uto32Ux2:
2777                     op = ARM64vecshi_UQRSHRN2SD; limit = 64; break;
2778                  case Iop_QandQRShrNnarrow32Uto16Ux4:
2779                     op = ARM64vecshi_UQRSHRN4HS; limit = 32; break;
2780                  case Iop_QandQRShrNnarrow16Uto8Ux8:
2781                     op = ARM64vecshi_UQRSHRN8BH; limit = 16; break;
2782                  /* rss */
2783                  case Iop_QandQRSarNnarrow64Sto32Sx2:
2784                     op = ARM64vecshi_SQRSHRN2SD; limit = 64; break;
2785                  case Iop_QandQRSarNnarrow32Sto16Sx4:
2786                     op = ARM64vecshi_SQRSHRN4HS; limit = 32; break;
2787                  case Iop_QandQRSarNnarrow16Sto8Sx8:
2788                     op = ARM64vecshi_SQRSHRN8BH; limit = 16; break;
2789                  /* rsu */
2790                  case Iop_QandQRSarNnarrow64Sto32Ux2:
2791                     op = ARM64vecshi_SQRSHRUN2SD; limit = 64; break;
2792                  case Iop_QandQRSarNnarrow32Sto16Ux4:
2793                     op = ARM64vecshi_SQRSHRUN4HS; limit = 32; break;
2794                  case Iop_QandQRSarNnarrow16Sto8Ux8:
2795                     op = ARM64vecshi_SQRSHRUN8BH; limit = 16; break;
2796                  /**/
2797                  default:
2798                     vassert(0);
2799               }
2800               if (op != ARM64vecshi_INVALID && amt >= 1 && amt <= limit) {
2801                  HReg src  = iselV128Expr(env, argL);
2802                  HReg dst  = newVRegV(env);
2803                  HReg fpsr = newVRegI(env);
2804                  /* Clear FPSR.Q, do the operation, and return both its
2805                     result and the new value of FPSR.Q.  We can simply
2806                     zero out FPSR since all the other bits have no relevance
2807                     in VEX generated code. */
2808                  addInstr(env, ARM64Instr_Imm64(fpsr, 0));
2809                  addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
2810                  addInstr(env, ARM64Instr_VShiftImmV(op, dst, src, amt));
2811                  addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
2812                  addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
2813                                                             ARM64sh_SHR));
2814                  ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
2815                  vassert(ril_one);
2816                  addInstr(env, ARM64Instr_Logic(fpsr,
2817                                                 fpsr, ril_one, ARM64lo_AND));
2818                  /* Now we have: the main (shift) result in the bottom half
2819                     of |dst|, and the Q bit at the bottom of |fpsr|.
2820                     Combining them with a "InterleaveLO64x2" style operation
2821                     produces a 128 bit value, dst[63:0]:fpsr[63:0],
2822                     which is what we want. */
2823                  HReg scratch = newVRegV(env);
2824                  addInstr(env, ARM64Instr_VQfromX(scratch, fpsr));
2825                  addInstr(env, ARM64Instr_VBinV(ARM64vecb_UZP164x2,
2826                                                 dst, dst, scratch));
2827                  return dst;
2828               }
2829            }
2830            /* else fall out; this is unhandled */
2831            break;
2832         }
2833
2834         // Use Iop_SliceV128 in preference to Iop_ShlV128 and Iop_ShrV128,
2835         // as it is in some ways more general and often leads to better
2836         // code overall.
2837         case Iop_ShlV128:
2838         case Iop_ShrV128: {
2839            Bool isSHR = e->Iex.Binop.op == Iop_ShrV128;
2840            /* This is tricky.  Generate an EXT instruction with zeroes in
2841               the high operand (shift right) or low operand (shift left).
2842               Note that we can only slice in the EXT instruction at a byte
2843               level of granularity, so the shift amount needs careful
2844               checking. */
2845            IRExpr* argL = e->Iex.Binop.arg1;
2846            IRExpr* argR = e->Iex.Binop.arg2;
2847            if (argR->tag == Iex_Const && argR->Iex.Const.con->tag == Ico_U8) {
2848               UInt amt   = argR->Iex.Const.con->Ico.U8;
2849               Bool amtOK = False;
2850               switch (amt) {
2851                  case 0x08: case 0x10: case 0x18: case 0x20: case 0x28:
2852                  case 0x30: case 0x38: case 0x40: case 0x48: case 0x50:
2853                  case 0x58: case 0x60: case 0x68: case 0x70: case 0x78:
2854                     amtOK = True; break;
2855               }
2856               /* We could also deal with amt==0 by copying the source to
2857                  the destination, but there's no need for that so far. */
2858               if (amtOK) {
2859                  HReg src  = iselV128Expr(env, argL);
2860                  HReg srcZ = newVRegV(env);
2861                  addInstr(env, ARM64Instr_VImmQ(srcZ, 0x0000));
2862                  UInt immB = amt / 8;
2863                  vassert(immB >= 1 && immB <= 15);
2864                  HReg dst = newVRegV(env);
2865                  if (isSHR) {
2866                    addInstr(env, ARM64Instr_VExtV(dst, src/*lo*/, srcZ/*hi*/,
2867                                                         immB));
2868                  } else {
2869                    addInstr(env, ARM64Instr_VExtV(dst, srcZ/*lo*/, src/*hi*/,
2870                                                         16 - immB));
2871                  }
2872                  return dst;
2873               }
2874            }
2875            /* else fall out; this is unhandled */
2876            break;
2877         }
2878
2879         case Iop_PolynomialMull8x8:
2880         case Iop_Mull32Ux2:
2881         case Iop_Mull16Ux4:
2882         case Iop_Mull8Ux8:
2883         case Iop_Mull32Sx2:
2884         case Iop_Mull16Sx4:
2885         case Iop_Mull8Sx8:
2886         case Iop_QDMull32Sx2:
2887         case Iop_QDMull16Sx4:
2888         {
2889            HReg iSrcL = iselIntExpr_R(env, e->Iex.Binop.arg1);
2890            HReg iSrcR = iselIntExpr_R(env, e->Iex.Binop.arg2);
2891            HReg vSrcL = newVRegV(env);
2892            HReg vSrcR = newVRegV(env);
2893            HReg dst   = newVRegV(env);
2894            ARM64VecBinOp op = ARM64vecb_INVALID;
2895            switch (e->Iex.Binop.op) {
2896               case Iop_PolynomialMull8x8: op = ARM64vecb_PMULL8x8;    break;
2897               case Iop_Mull32Ux2:         op = ARM64vecb_UMULL2DSS;   break;
2898               case Iop_Mull16Ux4:         op = ARM64vecb_UMULL4SHH;   break;
2899               case Iop_Mull8Ux8:          op = ARM64vecb_UMULL8HBB;   break;
2900               case Iop_Mull32Sx2:         op = ARM64vecb_SMULL2DSS;   break;
2901               case Iop_Mull16Sx4:         op = ARM64vecb_SMULL4SHH;   break;
2902               case Iop_Mull8Sx8:          op = ARM64vecb_SMULL8HBB;   break;
2903               case Iop_QDMull32Sx2:       op = ARM64vecb_SQDMULL2DSS; break;
2904               case Iop_QDMull16Sx4:       op = ARM64vecb_SQDMULL4SHH; break;
2905               default: vassert(0);
2906            }
2907            addInstr(env, ARM64Instr_VQfromXX(vSrcL, iSrcL, iSrcL));
2908            addInstr(env, ARM64Instr_VQfromXX(vSrcR, iSrcR, iSrcR));
2909            addInstr(env, ARM64Instr_VBinV(op, dst, vSrcL, vSrcR));
2910            return dst;
2911         }
2912
2913         /* ... */
2914         default:
2915            break;
2916      } /* switch on the binop */
2917   } /* if (e->tag == Iex_Binop) */
2918
2919   if (e->tag == Iex_Triop) {
2920      IRTriop*      triop  = e->Iex.Triop.details;
2921      ARM64VecBinOp vecbop = ARM64vecb_INVALID;
2922      switch (triop->op) {
2923         case Iop_Add64Fx2: vecbop = ARM64vecb_FADD64x2; break;
2924         case Iop_Sub64Fx2: vecbop = ARM64vecb_FSUB64x2; break;
2925         case Iop_Mul64Fx2: vecbop = ARM64vecb_FMUL64x2; break;
2926         case Iop_Div64Fx2: vecbop = ARM64vecb_FDIV64x2; break;
2927         case Iop_Add32Fx4: vecbop = ARM64vecb_FADD32x4; break;
2928         case Iop_Sub32Fx4: vecbop = ARM64vecb_FSUB32x4; break;
2929         case Iop_Mul32Fx4: vecbop = ARM64vecb_FMUL32x4; break;
2930         case Iop_Div32Fx4: vecbop = ARM64vecb_FDIV32x4; break;
2931         default: break;
2932      }
2933      if (vecbop != ARM64vecb_INVALID) {
2934         HReg argL = iselV128Expr(env, triop->arg2);
2935         HReg argR = iselV128Expr(env, triop->arg3);
2936         HReg dst  = newVRegV(env);
2937         set_FPCR_rounding_mode(env, triop->arg1);
2938         addInstr(env, ARM64Instr_VBinV(vecbop, dst, argL, argR));
2939         return dst;
2940      }
2941
2942      if (triop->op == Iop_SliceV128) {
2943         /* Note that, compared to ShlV128/ShrV128 just above, the shift
2944            amount here is in bytes, not bits. */
2945         IRExpr* argHi  = triop->arg1;
2946         IRExpr* argLo  = triop->arg2;
2947         IRExpr* argAmt = triop->arg3;
2948         if (argAmt->tag == Iex_Const && argAmt->Iex.Const.con->tag == Ico_U8) {
2949            UInt amt   = argAmt->Iex.Const.con->Ico.U8;
2950            Bool amtOK = amt >= 1 && amt <= 15;
2951            /* We could also deal with amt==0 by copying argLO to
2952               the destination, but there's no need for that so far. */
2953            if (amtOK) {
2954               HReg srcHi = iselV128Expr(env, argHi);
2955               HReg srcLo = iselV128Expr(env, argLo);
2956               HReg dst = newVRegV(env);
2957              addInstr(env, ARM64Instr_VExtV(dst, srcLo, srcHi, amt));
2958               return dst;
2959            }
2960         }
2961         /* else fall out; this is unhandled */
2962      }
2963
2964   } /* if (e->tag == Iex_Triop) */
2965
2966  v128_expr_bad:
2967   ppIRExpr(e);
2968   vpanic("iselV128Expr_wrk");
2969}
2970
2971
2972/*---------------------------------------------------------*/
2973/*--- ISEL: Floating point expressions (64 bit)         ---*/
2974/*---------------------------------------------------------*/
2975
2976/* Compute a 64-bit floating point value into a register, the identity
2977   of which is returned.  As with iselIntExpr_R, the reg may be either
2978   real or virtual; in any case it must not be changed by subsequent
2979   code emitted by the caller.  */
2980
2981static HReg iselDblExpr ( ISelEnv* env, IRExpr* e )
2982{
2983   HReg r = iselDblExpr_wrk( env, e );
2984#  if 0
2985   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2986#  endif
2987   vassert(hregClass(r) == HRcFlt64);
2988   vassert(hregIsVirtual(r));
2989   return r;
2990}
2991
2992/* DO NOT CALL THIS DIRECTLY */
2993static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e )
2994{
2995   IRType ty = typeOfIRExpr(env->type_env,e);
2996   vassert(e);
2997   vassert(ty == Ity_F64);
2998
2999   if (e->tag == Iex_RdTmp) {
3000      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3001   }
3002
3003   if (e->tag == Iex_Const) {
3004      IRConst* con = e->Iex.Const.con;
3005      if (con->tag == Ico_F64i) {
3006         HReg src = newVRegI(env);
3007         HReg dst = newVRegD(env);
3008         addInstr(env, ARM64Instr_Imm64(src, con->Ico.F64i));
3009         addInstr(env, ARM64Instr_VDfromX(dst, src));
3010         return dst;
3011      }
3012      if (con->tag == Ico_F64) {
3013         HReg src = newVRegI(env);
3014         HReg dst = newVRegD(env);
3015         union { Double d64; ULong u64; } u;
3016         vassert(sizeof(u) == 8);
3017         u.d64 = con->Ico.F64;
3018         addInstr(env, ARM64Instr_Imm64(src, u.u64));
3019         addInstr(env, ARM64Instr_VDfromX(dst, src));
3020         return dst;
3021      }
3022   }
3023
3024   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3025      vassert(e->Iex.Load.ty == Ity_F64);
3026      HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3027      HReg res  = newVRegD(env);
3028      addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, res, addr, 0));
3029      return res;
3030   }
3031
3032   if (e->tag == Iex_Get) {
3033      Int offs = e->Iex.Get.offset;
3034      if (offs >= 0 && offs < 32768 && 0 == (offs & 7)) {
3035         HReg rD = newVRegD(env);
3036         HReg rN = get_baseblock_register();
3037         addInstr(env, ARM64Instr_VLdStD(True/*isLoad*/, rD, rN, offs));
3038         return rD;
3039      }
3040   }
3041
3042   if (e->tag == Iex_Unop) {
3043      switch (e->Iex.Unop.op) {
3044         case Iop_NegF64: {
3045            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3046            HReg dst = newVRegD(env);
3047            addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_NEG, dst, src));
3048            return dst;
3049         }
3050         case Iop_AbsF64: {
3051            HReg src = iselDblExpr(env, e->Iex.Unop.arg);
3052            HReg dst = newVRegD(env);
3053            addInstr(env, ARM64Instr_VUnaryD(ARM64fpu_ABS, dst, src));
3054            return dst;
3055         }
3056         case Iop_F32toF64: {
3057            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3058            HReg dst = newVRegD(env);
3059            addInstr(env, ARM64Instr_VCvtSD(True/*sToD*/, dst, src));
3060            return dst;
3061         }
3062         case Iop_F16toF64: {
3063            HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3064            HReg dst = newVRegD(env);
3065            addInstr(env, ARM64Instr_VCvtHD(True/*hToD*/, dst, src));
3066            return dst;
3067         }
3068         case Iop_I32UtoF64:
3069         case Iop_I32StoF64: {
3070            /* Rounding mode is not involved here, since the
3071               conversion can always be done without loss of
3072               precision. */
3073            HReg src   = iselIntExpr_R(env, e->Iex.Unop.arg);
3074            HReg dst   = newVRegD(env);
3075            Bool syned = e->Iex.Unop.op == Iop_I32StoF64;
3076            ARM64CvtOp cvt_op = syned ? ARM64cvt_F64_I32S : ARM64cvt_F64_I32U;
3077            addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dst, src));
3078            return dst;
3079         }
3080         default:
3081            break;
3082      }
3083   }
3084
3085   if (e->tag == Iex_Binop) {
3086      switch (e->Iex.Binop.op) {
3087         case Iop_RoundF64toInt:
3088         case Iop_SqrtF64:
3089         case Iop_RecpExpF64: {
3090            HReg src = iselDblExpr(env, e->Iex.Binop.arg2);
3091            HReg dst = newVRegD(env);
3092            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3093            ARM64FpUnaryOp op = ARM64fpu_INVALID;
3094            switch (e->Iex.Binop.op) {
3095               case Iop_RoundF64toInt: op = ARM64fpu_RINT;  break;
3096               case Iop_SqrtF64:       op = ARM64fpu_SQRT;  break;
3097               case Iop_RecpExpF64:    op = ARM64fpu_RECPX; break;
3098               default: vassert(0);
3099            }
3100            addInstr(env, ARM64Instr_VUnaryD(op, dst, src));
3101            return dst;
3102         }
3103         case Iop_I64StoF64:
3104         case Iop_I64UtoF64: {
3105            ARM64CvtOp cvt_op = e->Iex.Binop.op == Iop_I64StoF64
3106                                   ? ARM64cvt_F64_I64S : ARM64cvt_F64_I64U;
3107            HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3108            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3109            HReg dstS = newVRegD(env);
3110            addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3111            return dstS;
3112         }
3113         default:
3114            break;
3115      }
3116   }
3117
3118   if (e->tag == Iex_Triop) {
3119      IRTriop*     triop = e->Iex.Triop.details;
3120      ARM64FpBinOp dblop = ARM64fpb_INVALID;
3121      switch (triop->op) {
3122         case Iop_DivF64: dblop = ARM64fpb_DIV; break;
3123         case Iop_MulF64: dblop = ARM64fpb_MUL; break;
3124         case Iop_SubF64: dblop = ARM64fpb_SUB; break;
3125         case Iop_AddF64: dblop = ARM64fpb_ADD; break;
3126         default: break;
3127      }
3128      if (dblop != ARM64fpb_INVALID) {
3129         HReg argL = iselDblExpr(env, triop->arg2);
3130         HReg argR = iselDblExpr(env, triop->arg3);
3131         HReg dst  = newVRegD(env);
3132         set_FPCR_rounding_mode(env, triop->arg1);
3133         addInstr(env, ARM64Instr_VBinD(dblop, dst, argL, argR));
3134         return dst;
3135      }
3136   }
3137
3138   if (e->tag == Iex_ITE) {
3139      /* ITE(ccexpr, iftrue, iffalse) */
3140      ARM64CondCode cc;
3141      HReg r1  = iselDblExpr(env, e->Iex.ITE.iftrue);
3142      HReg r0  = iselDblExpr(env, e->Iex.ITE.iffalse);
3143      HReg dst = newVRegD(env);
3144      cc = iselCondCode(env, e->Iex.ITE.cond);
3145      addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, True/*64-bit*/));
3146      return dst;
3147   }
3148
3149   ppIRExpr(e);
3150   vpanic("iselDblExpr_wrk");
3151}
3152
3153
3154/*---------------------------------------------------------*/
3155/*--- ISEL: Floating point expressions (32 bit)         ---*/
3156/*---------------------------------------------------------*/
3157
3158/* Compute a 32-bit floating point value into a register, the identity
3159   of which is returned.  As with iselIntExpr_R, the reg may be either
3160   real or virtual; in any case it must not be changed by subsequent
3161   code emitted by the caller.  Values are generated into HRcFlt64
3162   registers despite the values themselves being Ity_F32s. */
3163
3164static HReg iselFltExpr ( ISelEnv* env, IRExpr* e )
3165{
3166   HReg r = iselFltExpr_wrk( env, e );
3167#  if 0
3168   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3169#  endif
3170   vassert(hregClass(r) == HRcFlt64);
3171   vassert(hregIsVirtual(r));
3172   return r;
3173}
3174
3175/* DO NOT CALL THIS DIRECTLY */
3176static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
3177{
3178   IRType ty = typeOfIRExpr(env->type_env,e);
3179   vassert(e);
3180   vassert(ty == Ity_F32);
3181
3182   if (e->tag == Iex_RdTmp) {
3183      return lookupIRTemp(env, e->Iex.RdTmp.tmp);
3184   }
3185
3186   if (e->tag == Iex_Const) {
3187      /* This is something of a kludge.  Since a 32 bit floating point
3188         zero is just .. all zeroes, just create a 64 bit zero word
3189         and transfer it.  This avoids having to create a SfromW
3190         instruction for this specific case. */
3191      IRConst* con = e->Iex.Const.con;
3192      if (con->tag == Ico_F32i && con->Ico.F32i == 0) {
3193         HReg src = newVRegI(env);
3194         HReg dst = newVRegD(env);
3195         addInstr(env, ARM64Instr_Imm64(src, 0));
3196         addInstr(env, ARM64Instr_VDfromX(dst, src));
3197         return dst;
3198      }
3199      if (con->tag == Ico_F32) {
3200         HReg src = newVRegI(env);
3201         HReg dst = newVRegD(env);
3202         union { Float f32; UInt u32; } u;
3203         vassert(sizeof(u) == 4);
3204         u.f32 = con->Ico.F32;
3205         addInstr(env, ARM64Instr_Imm64(src, (ULong)u.u32));
3206         addInstr(env, ARM64Instr_VDfromX(dst, src));
3207         return dst;
3208      }
3209   }
3210
3211   if (e->tag == Iex_Load && e->Iex.Load.end == Iend_LE) {
3212      vassert(e->Iex.Load.ty == Ity_F32);
3213      HReg addr = iselIntExpr_R(env, e->Iex.Load.addr);
3214      HReg res  = newVRegD(env);
3215      addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, res, addr, 0));
3216      return res;
3217   }
3218
3219   if (e->tag == Iex_Get) {
3220      Int offs = e->Iex.Get.offset;
3221      if (offs >= 0 && offs < 16384 && 0 == (offs & 3)) {
3222         HReg rD = newVRegD(env);
3223         HReg rN = get_baseblock_register();
3224         addInstr(env, ARM64Instr_VLdStS(True/*isLoad*/, rD, rN, offs));
3225         return rD;
3226      }
3227   }
3228
3229   if (e->tag == Iex_Unop) {
3230      switch (e->Iex.Unop.op) {
3231         case Iop_NegF32: {
3232            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3233            HReg dst = newVRegD(env);
3234            addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_NEG, dst, src));
3235            return dst;
3236         }
3237         case Iop_AbsF32: {
3238            HReg src = iselFltExpr(env, e->Iex.Unop.arg);
3239            HReg dst = newVRegD(env);
3240            addInstr(env, ARM64Instr_VUnaryS(ARM64fpu_ABS, dst, src));
3241            return dst;
3242         }
3243         case Iop_F16toF32: {
3244            HReg src = iselF16Expr(env, e->Iex.Unop.arg);
3245            HReg dst = newVRegD(env);
3246            addInstr(env, ARM64Instr_VCvtHS(True/*hToS*/, dst, src));
3247            return dst;
3248         }
3249         default:
3250            break;
3251      }
3252   }
3253
3254   if (e->tag == Iex_Binop) {
3255      switch (e->Iex.Binop.op) {
3256         case Iop_RoundF32toInt:
3257         case Iop_SqrtF32:
3258         case Iop_RecpExpF32: {
3259            HReg src = iselFltExpr(env, e->Iex.Binop.arg2);
3260            HReg dst = newVRegD(env);
3261            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3262            ARM64FpUnaryOp op = ARM64fpu_INVALID;
3263            switch (e->Iex.Binop.op) {
3264               case Iop_RoundF32toInt: op = ARM64fpu_RINT;  break;
3265               case Iop_SqrtF32:       op = ARM64fpu_SQRT;  break;
3266               case Iop_RecpExpF32:    op = ARM64fpu_RECPX; break;
3267               default: vassert(0);
3268            }
3269            addInstr(env, ARM64Instr_VUnaryS(op, dst, src));
3270            return dst;
3271         }
3272         case Iop_F64toF32: {
3273            HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3274            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3275            HReg dstS = newVRegD(env);
3276            addInstr(env, ARM64Instr_VCvtSD(False/*!sToD*/, dstS, srcD));
3277            return dstS;
3278         }
3279         case Iop_I32UtoF32:
3280         case Iop_I32StoF32:
3281         case Iop_I64UtoF32:
3282         case Iop_I64StoF32: {
3283            ARM64CvtOp cvt_op = ARM64cvt_INVALID;
3284            switch (e->Iex.Binop.op) {
3285               case Iop_I32UtoF32: cvt_op = ARM64cvt_F32_I32U; break;
3286               case Iop_I32StoF32: cvt_op = ARM64cvt_F32_I32S; break;
3287               case Iop_I64UtoF32: cvt_op = ARM64cvt_F32_I64U; break;
3288               case Iop_I64StoF32: cvt_op = ARM64cvt_F32_I64S; break;
3289               default: vassert(0);
3290            }
3291            HReg srcI = iselIntExpr_R(env, e->Iex.Binop.arg2);
3292            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3293            HReg dstS = newVRegD(env);
3294            addInstr(env, ARM64Instr_VCvtI2F(cvt_op, dstS, srcI));
3295            return dstS;
3296         }
3297         default:
3298            break;
3299      }
3300   }
3301
3302   if (e->tag == Iex_Triop) {
3303      IRTriop*     triop = e->Iex.Triop.details;
3304      ARM64FpBinOp sglop = ARM64fpb_INVALID;
3305      switch (triop->op) {
3306         case Iop_DivF32: sglop = ARM64fpb_DIV; break;
3307         case Iop_MulF32: sglop = ARM64fpb_MUL; break;
3308         case Iop_SubF32: sglop = ARM64fpb_SUB; break;
3309         case Iop_AddF32: sglop = ARM64fpb_ADD; break;
3310         default: break;
3311      }
3312      if (sglop != ARM64fpb_INVALID) {
3313         HReg argL = iselFltExpr(env, triop->arg2);
3314         HReg argR = iselFltExpr(env, triop->arg3);
3315         HReg dst  = newVRegD(env);
3316         set_FPCR_rounding_mode(env, triop->arg1);
3317         addInstr(env, ARM64Instr_VBinS(sglop, dst, argL, argR));
3318         return dst;
3319      }
3320   }
3321
3322   if (e->tag == Iex_ITE) {
3323      /* ITE(ccexpr, iftrue, iffalse) */
3324      ARM64CondCode cc;
3325      HReg r1  = iselFltExpr(env, e->Iex.ITE.iftrue);
3326      HReg r0  = iselFltExpr(env, e->Iex.ITE.iffalse);
3327      HReg dst = newVRegD(env);
3328      cc = iselCondCode(env, e->Iex.ITE.cond);
3329      addInstr(env, ARM64Instr_VFCSel(dst, r1, r0, cc, False/*!64-bit*/));
3330      return dst;
3331   }
3332
3333   ppIRExpr(e);
3334   vpanic("iselFltExpr_wrk");
3335}
3336
3337
3338/*---------------------------------------------------------*/
3339/*--- ISEL: Floating point expressions (16 bit)         ---*/
3340/*---------------------------------------------------------*/
3341
3342/* Compute a 16-bit floating point value into a register, the identity
3343   of which is returned.  As with iselIntExpr_R, the reg may be either
3344   real or virtual; in any case it must not be changed by subsequent
3345   code emitted by the caller.  Values are generated into HRcFlt64
3346   registers despite the values themselves being Ity_F16s. */
3347
3348static HReg iselF16Expr ( ISelEnv* env, IRExpr* e )
3349{
3350   HReg r = iselF16Expr_wrk( env, e );
3351#  if 0
3352   vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3353#  endif
3354   vassert(hregClass(r) == HRcFlt64);
3355   vassert(hregIsVirtual(r));
3356   return r;
3357}
3358
3359/* DO NOT CALL THIS DIRECTLY */
3360static HReg iselF16Expr_wrk ( ISelEnv* env, IRExpr* e )
3361{
3362   IRType ty = typeOfIRExpr(env->type_env,e);
3363   vassert(e);
3364   vassert(ty == Ity_F16);
3365
3366   if (e->tag == Iex_Get) {
3367      Int offs = e->Iex.Get.offset;
3368      if (offs >= 0 && offs < 8192 && 0 == (offs & 1)) {
3369         HReg rD = newVRegD(env);
3370         HReg rN = get_baseblock_register();
3371         addInstr(env, ARM64Instr_VLdStH(True/*isLoad*/, rD, rN, offs));
3372         return rD;
3373      }
3374   }
3375
3376   if (e->tag == Iex_Binop) {
3377      switch (e->Iex.Binop.op) {
3378         case Iop_F32toF16: {
3379            HReg srcS = iselFltExpr(env, e->Iex.Binop.arg2);
3380            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3381            HReg dstH = newVRegD(env);
3382            addInstr(env, ARM64Instr_VCvtHS(False/*!hToS*/, dstH, srcS));
3383            return dstH;
3384         }
3385         case Iop_F64toF16: {
3386            HReg srcD = iselDblExpr(env, e->Iex.Binop.arg2);
3387            set_FPCR_rounding_mode(env, e->Iex.Binop.arg1);
3388            HReg dstH = newVRegD(env);
3389            addInstr(env, ARM64Instr_VCvtHD(False/*!hToD*/, dstH, srcD));
3390            return dstH;
3391         }
3392         default:
3393            break;
3394      }
3395   }
3396
3397   ppIRExpr(e);
3398   vpanic("iselF16Expr_wrk");
3399}
3400
3401
3402/*---------------------------------------------------------*/
3403/*--- ISEL: Vector expressions (256 bit)                ---*/
3404/*---------------------------------------------------------*/
3405
3406static void iselV256Expr ( /*OUT*/HReg* rHi, HReg* rLo,
3407                           ISelEnv* env, IRExpr* e )
3408{
3409   iselV256Expr_wrk( rHi, rLo, env, e );
3410   vassert(hregClass(*rHi) == HRcVec128);
3411   vassert(hregClass(*rLo) == HRcVec128);
3412   vassert(hregIsVirtual(*rHi));
3413   vassert(hregIsVirtual(*rLo));
3414}
3415
3416/* DO NOT CALL THIS DIRECTLY */
3417static void iselV256Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
3418                               ISelEnv* env, IRExpr* e )
3419{
3420   vassert(e);
3421   IRType ty = typeOfIRExpr(env->type_env,e);
3422   vassert(ty == Ity_V256);
3423
3424   /* read 256-bit IRTemp */
3425   if (e->tag == Iex_RdTmp) {
3426      lookupIRTempPair( rHi, rLo, env, e->Iex.RdTmp.tmp);
3427      return;
3428   }
3429
3430   if (e->tag == Iex_Binop) {
3431      switch (e->Iex.Binop.op) {
3432         case Iop_V128HLtoV256: {
3433            *rHi = iselV128Expr(env, e->Iex.Binop.arg1);
3434            *rLo = iselV128Expr(env, e->Iex.Binop.arg2);
3435            return;
3436         }
3437         case Iop_QandSQsh64x2:
3438         case Iop_QandSQsh32x4:
3439         case Iop_QandSQsh16x8:
3440         case Iop_QandSQsh8x16:
3441         case Iop_QandUQsh64x2:
3442         case Iop_QandUQsh32x4:
3443         case Iop_QandUQsh16x8:
3444         case Iop_QandUQsh8x16:
3445         case Iop_QandSQRsh64x2:
3446         case Iop_QandSQRsh32x4:
3447         case Iop_QandSQRsh16x8:
3448         case Iop_QandSQRsh8x16:
3449         case Iop_QandUQRsh64x2:
3450         case Iop_QandUQRsh32x4:
3451         case Iop_QandUQRsh16x8:
3452         case Iop_QandUQRsh8x16:
3453         {
3454            HReg argL  = iselV128Expr(env, e->Iex.Binop.arg1);
3455            HReg argR  = iselV128Expr(env, e->Iex.Binop.arg2);
3456            HReg fpsr  = newVRegI(env);
3457            HReg resHi = newVRegV(env);
3458            HReg resLo = newVRegV(env);
3459            ARM64VecBinOp op = ARM64vecb_INVALID;
3460            switch (e->Iex.Binop.op) {
3461               case Iop_QandSQsh64x2:  op = ARM64vecb_SQSHL64x2;  break;
3462               case Iop_QandSQsh32x4:  op = ARM64vecb_SQSHL32x4;  break;
3463               case Iop_QandSQsh16x8:  op = ARM64vecb_SQSHL16x8;  break;
3464               case Iop_QandSQsh8x16:  op = ARM64vecb_SQSHL8x16;  break;
3465               case Iop_QandUQsh64x2:  op = ARM64vecb_UQSHL64x2;  break;
3466               case Iop_QandUQsh32x4:  op = ARM64vecb_UQSHL32x4;  break;
3467               case Iop_QandUQsh16x8:  op = ARM64vecb_UQSHL16x8;  break;
3468               case Iop_QandUQsh8x16:  op = ARM64vecb_UQSHL8x16;  break;
3469               case Iop_QandSQRsh64x2: op = ARM64vecb_SQRSHL64x2; break;
3470               case Iop_QandSQRsh32x4: op = ARM64vecb_SQRSHL32x4; break;
3471               case Iop_QandSQRsh16x8: op = ARM64vecb_SQRSHL16x8; break;
3472               case Iop_QandSQRsh8x16: op = ARM64vecb_SQRSHL8x16; break;
3473               case Iop_QandUQRsh64x2: op = ARM64vecb_UQRSHL64x2; break;
3474               case Iop_QandUQRsh32x4: op = ARM64vecb_UQRSHL32x4; break;
3475               case Iop_QandUQRsh16x8: op = ARM64vecb_UQRSHL16x8; break;
3476               case Iop_QandUQRsh8x16: op = ARM64vecb_UQRSHL8x16; break;
3477               default: vassert(0);
3478            }
3479            /* Clear FPSR.Q, do the operation, and return both its result
3480               and the new value of FPSR.Q.  We can simply zero out FPSR
3481               since all the other bits have no relevance in VEX generated
3482               code. */
3483            addInstr(env, ARM64Instr_Imm64(fpsr, 0));
3484            addInstr(env, ARM64Instr_FPSR(True/*toFPSR*/, fpsr));
3485            addInstr(env, ARM64Instr_VBinV(op, resLo, argL, argR));
3486            addInstr(env, ARM64Instr_FPSR(False/*!toFPSR*/, fpsr));
3487            addInstr(env, ARM64Instr_Shift(fpsr, fpsr, ARM64RI6_I6(27),
3488                                                       ARM64sh_SHR));
3489            ARM64RIL* ril_one = mb_mkARM64RIL_I(1);
3490            vassert(ril_one);
3491            addInstr(env, ARM64Instr_Logic(fpsr, fpsr, ril_one, ARM64lo_AND));
3492            /* Now we have: the main (shift) result in |resLo|, and the
3493               Q bit at the bottom of |fpsr|. */
3494            addInstr(env, ARM64Instr_VQfromX(resHi, fpsr));
3495            *rHi = resHi;
3496            *rLo = resLo;
3497            return;
3498         }
3499
3500         /* ... */
3501         default:
3502            break;
3503      } /* switch on the binop */
3504   } /* if (e->tag == Iex_Binop) */
3505
3506   ppIRExpr(e);
3507   vpanic("iselV256Expr_wrk");
3508}
3509
3510
3511/*---------------------------------------------------------*/
3512/*--- ISEL: Statements                                  ---*/
3513/*---------------------------------------------------------*/
3514
3515static void iselStmt ( ISelEnv* env, IRStmt* stmt )
3516{
3517   if (vex_traceflags & VEX_TRACE_VCODE) {
3518      vex_printf("\n-- ");
3519      ppIRStmt(stmt);
3520      vex_printf("\n");
3521   }
3522   switch (stmt->tag) {
3523
3524   /* --------- STORE --------- */
3525   /* little-endian write to memory */
3526   case Ist_Store: {
3527      IRType    tya  = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
3528      IRType    tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
3529      IREndness end  = stmt->Ist.Store.end;
3530
3531      if (tya != Ity_I64 || end != Iend_LE)
3532         goto stmt_fail;
3533
3534      if (tyd == Ity_I64) {
3535         HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3536         ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3537         addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3538         return;
3539      }
3540      if (tyd == Ity_I32) {
3541         HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3542         ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3543         addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3544         return;
3545      }
3546      if (tyd == Ity_I16) {
3547         HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3548         ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3549         addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3550         return;
3551      }
3552      if (tyd == Ity_I8) {
3553         HReg        rD = iselIntExpr_R(env, stmt->Ist.Store.data);
3554         ARM64AMode* am = iselIntExpr_AMode(env, stmt->Ist.Store.addr, tyd);
3555         addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3556         return;
3557      }
3558      if (tyd == Ity_V128) {
3559         HReg qD   = iselV128Expr(env, stmt->Ist.Store.data);
3560         HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3561         addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3562         return;
3563      }
3564      if (tyd == Ity_F64) {
3565         HReg dD   = iselDblExpr(env, stmt->Ist.Store.data);
3566         HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3567         addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, addr, 0));
3568         return;
3569      }
3570      if (tyd == Ity_F32) {
3571         HReg sD   = iselFltExpr(env, stmt->Ist.Store.data);
3572         HReg addr = iselIntExpr_R(env, stmt->Ist.Store.addr);
3573         addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, addr, 0));
3574         return;
3575      }
3576      break;
3577   }
3578
3579   /* --------- PUT --------- */
3580   /* write guest state, fixed offset */
3581   case Ist_Put: {
3582      IRType tyd  = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
3583      UInt   offs = (UInt)stmt->Ist.Put.offset;
3584      if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) {
3585         HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3586         ARM64AMode* am = mk_baseblock_64bit_access_amode(offs);
3587         addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am));
3588         return;
3589      }
3590      if (tyd == Ity_I32 && 0 == (offs & 3) && offs < (4<<12)) {
3591         HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3592         ARM64AMode* am = mk_baseblock_32bit_access_amode(offs);
3593         addInstr(env, ARM64Instr_LdSt32(False/*!isLoad*/, rD, am));
3594         return;
3595      }
3596      if (tyd == Ity_I16 && 0 == (offs & 1) && offs < (2<<12)) {
3597         HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3598         ARM64AMode* am = mk_baseblock_16bit_access_amode(offs);
3599         addInstr(env, ARM64Instr_LdSt16(False/*!isLoad*/, rD, am));
3600         return;
3601      }
3602      if (tyd == Ity_I8 && offs < (1<<12)) {
3603         HReg        rD = iselIntExpr_R(env, stmt->Ist.Put.data);
3604         ARM64AMode* am = mk_baseblock_8bit_access_amode(offs);
3605         addInstr(env, ARM64Instr_LdSt8(False/*!isLoad*/, rD, am));
3606         return;
3607      }
3608      if (tyd == Ity_V128 && offs < (1<<12)) {
3609         HReg qD   = iselV128Expr(env, stmt->Ist.Put.data);
3610         HReg addr = mk_baseblock_128bit_access_addr(env, offs);
3611         addInstr(env, ARM64Instr_VLdStQ(False/*!isLoad*/, qD, addr));
3612         return;
3613      }
3614      if (tyd == Ity_F64 && 0 == (offs & 7) && offs < (8<<12)) {
3615         HReg dD   = iselDblExpr(env, stmt->Ist.Put.data);
3616         HReg bbp  = get_baseblock_register();
3617         addInstr(env, ARM64Instr_VLdStD(False/*!isLoad*/, dD, bbp, offs));
3618         return;
3619      }
3620      if (tyd == Ity_F32 && 0 == (offs & 3) && offs < (4<<12)) {
3621         HReg sD   = iselFltExpr(env, stmt->Ist.Put.data);
3622         HReg bbp  = get_baseblock_register();
3623         addInstr(env, ARM64Instr_VLdStS(False/*!isLoad*/, sD, bbp, offs));
3624         return;
3625      }
3626      if (tyd == Ity_F16 && 0 == (offs & 1) && offs < (2<<12)) {
3627         HReg hD   = iselF16Expr(env, stmt->Ist.Put.data);
3628         HReg bbp  = get_baseblock_register();
3629         addInstr(env, ARM64Instr_VLdStH(False/*!isLoad*/, hD, bbp, offs));
3630         return;
3631      }
3632
3633      break;
3634   }
3635
3636   /* --------- TMP --------- */
3637   /* assign value to temporary */
3638   case Ist_WrTmp: {
3639      IRTemp tmp = stmt->Ist.WrTmp.tmp;
3640      IRType ty  = typeOfIRTemp(env->type_env, tmp);
3641
3642      if (ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
3643         /* We could do a lot better here.  But for the time being: */
3644         HReg dst = lookupIRTemp(env, tmp);
3645         HReg rD  = iselIntExpr_R(env, stmt->Ist.WrTmp.data);
3646         addInstr(env, ARM64Instr_MovI(dst, rD));
3647         return;
3648      }
3649      if (ty == Ity_I1) {
3650         /* Here, we are generating a I1 value into a 64 bit register.
3651            Make sure the value in the register is only zero or one,
3652            but no other.  This allows optimisation of the
3653            1Uto64(tmp:I1) case, by making it simply a copy of the
3654            register holding 'tmp'.  The point being that the value in
3655            the register holding 'tmp' can only have been created
3656            here.  LATER: that seems dangerous; safer to do 'tmp & 1'
3657            in that case.  Also, could do this just with a single CINC
3658            insn. */
3659         /* CLONE-01 */
3660         HReg zero = newVRegI(env);
3661         HReg one  = newVRegI(env);
3662         HReg dst  = lookupIRTemp(env, tmp);
3663         addInstr(env, ARM64Instr_Imm64(zero, 0));
3664         addInstr(env, ARM64Instr_Imm64(one,  1));
3665         ARM64CondCode cc = iselCondCode(env, stmt->Ist.WrTmp.data);
3666         addInstr(env, ARM64Instr_CSel(dst, one, zero, cc));
3667         return;
3668      }
3669      if (ty == Ity_F64) {
3670         HReg src = iselDblExpr(env, stmt->Ist.WrTmp.data);
3671         HReg dst = lookupIRTemp(env, tmp);
3672         addInstr(env, ARM64Instr_VMov(8, dst, src));
3673         return;
3674      }
3675      if (ty == Ity_F32) {
3676         HReg src = iselFltExpr(env, stmt->Ist.WrTmp.data);
3677         HReg dst = lookupIRTemp(env, tmp);
3678         addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
3679         return;
3680      }
3681      if (ty == Ity_V128) {
3682         HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
3683         HReg dst = lookupIRTemp(env, tmp);
3684         addInstr(env, ARM64Instr_VMov(16, dst, src));
3685         return;
3686      }
3687      if (ty == Ity_V256) {
3688         HReg srcHi, srcLo, dstHi, dstLo;
3689         iselV256Expr(&srcHi,&srcLo, env, stmt->Ist.WrTmp.data);
3690         lookupIRTempPair( &dstHi, &dstLo, env, tmp);
3691         addInstr(env, ARM64Instr_VMov(16, dstHi, srcHi));
3692         addInstr(env, ARM64Instr_VMov(16, dstLo, srcLo));
3693         return;
3694      }
3695      break;
3696   }
3697
3698   /* --------- Call to DIRTY helper --------- */
3699   /* call complex ("dirty") helper function */
3700   case Ist_Dirty: {
3701      IRDirty* d = stmt->Ist.Dirty.details;
3702
3703      /* Figure out the return type, if any. */
3704      IRType retty = Ity_INVALID;
3705      if (d->tmp != IRTemp_INVALID)
3706         retty = typeOfIRTemp(env->type_env, d->tmp);
3707
3708      Bool retty_ok = False;
3709      switch (retty) {
3710         case Ity_INVALID: /* function doesn't return anything */
3711         case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
3712         case Ity_V128:
3713            retty_ok = True; break;
3714         default:
3715            break;
3716      }
3717      if (!retty_ok)
3718         break; /* will go to stmt_fail: */
3719
3720      /* Marshal args, do the call, and set the return value to 0x555..555
3721         if this is a conditional call that returns a value and the
3722         call is skipped. */
3723      UInt   addToSp = 0;
3724      RetLoc rloc    = mk_RetLoc_INVALID();
3725      doHelperCall( &addToSp, &rloc, env, d->guard, d->cee, retty, d->args );
3726      vassert(is_sane_RetLoc(rloc));
3727
3728      /* Now figure out what to do with the returned value, if any. */
3729      switch (retty) {
3730         case Ity_INVALID: {
3731            /* No return value.  Nothing to do. */
3732            vassert(d->tmp == IRTemp_INVALID);
3733            vassert(rloc.pri == RLPri_None);
3734            vassert(addToSp == 0);
3735            return;
3736         }
3737         case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: {
3738            vassert(rloc.pri == RLPri_Int);
3739            vassert(addToSp == 0);
3740            /* The returned value is in x0.  Park it in the register
3741               associated with tmp. */
3742            HReg dst = lookupIRTemp(env, d->tmp);
3743            addInstr(env, ARM64Instr_MovI(dst, hregARM64_X0()) );
3744            return;
3745         }
3746         case Ity_V128: {
3747            /* The returned value is on the stack, and *retloc tells
3748               us where.  Fish it off the stack and then move the
3749               stack pointer upwards to clear it, as directed by
3750               doHelperCall. */
3751            vassert(rloc.pri == RLPri_V128SpRel);
3752            vassert(rloc.spOff < 256); // stay sane
3753            vassert(addToSp >= 16); // ditto
3754            vassert(addToSp < 256); // ditto
3755            HReg dst = lookupIRTemp(env, d->tmp);
3756            HReg tmp = newVRegI(env); // the address of the returned value
3757            addInstr(env, ARM64Instr_FromSP(tmp)); // tmp = SP
3758            addInstr(env, ARM64Instr_Arith(tmp, tmp,
3759                                           ARM64RIA_I12((UShort)rloc.spOff, 0),
3760                                           True/*isAdd*/ ));
3761            addInstr(env, ARM64Instr_VLdStQ(True/*isLoad*/, dst, tmp));
3762            addInstr(env, ARM64Instr_AddToSP(addToSp));
3763            return;
3764         }
3765         default:
3766            /*NOTREACHED*/
3767            vassert(0);
3768      }
3769      break;
3770   }
3771
3772   /* --------- Load Linked and Store Conditional --------- */
3773   case Ist_LLSC: {
3774      if (stmt->Ist.LLSC.storedata == NULL) {
3775         /* LL */
3776         IRTemp res = stmt->Ist.LLSC.result;
3777         IRType ty  = typeOfIRTemp(env->type_env, res);
3778         if (ty == Ity_I64 || ty == Ity_I32
3779             || ty == Ity_I16 || ty == Ity_I8) {
3780            Int  szB   = 0;
3781            HReg r_dst = lookupIRTemp(env, res);
3782            HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3783            switch (ty) {
3784               case Ity_I8:  szB = 1; break;
3785               case Ity_I16: szB = 2; break;
3786               case Ity_I32: szB = 4; break;
3787               case Ity_I64: szB = 8; break;
3788               default:      vassert(0);
3789            }
3790            addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
3791            addInstr(env, ARM64Instr_LdrEX(szB));
3792            addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
3793            return;
3794         }
3795         goto stmt_fail;
3796      } else {
3797         /* SC */
3798         IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
3799         if (tyd == Ity_I64 || tyd == Ity_I32
3800             || tyd == Ity_I16 || tyd == Ity_I8) {
3801            Int  szB = 0;
3802            HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
3803            HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
3804            switch (tyd) {
3805               case Ity_I8:  szB = 1; break;
3806               case Ity_I16: szB = 2; break;
3807               case Ity_I32: szB = 4; break;
3808               case Ity_I64: szB = 8; break;
3809               default:      vassert(0);
3810            }
3811            addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
3812            addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
3813            addInstr(env, ARM64Instr_StrEX(szB));
3814         } else {
3815            goto stmt_fail;
3816         }
3817         /* now r0 is 1 if failed, 0 if success.  Change to IR
3818            conventions (0 is fail, 1 is success).  Also transfer
3819            result to r_res. */
3820         IRTemp    res   = stmt->Ist.LLSC.result;
3821         IRType    ty    = typeOfIRTemp(env->type_env, res);
3822         HReg      r_res = lookupIRTemp(env, res);
3823         ARM64RIL* one   = mb_mkARM64RIL_I(1);
3824         vassert(ty == Ity_I1);
3825         vassert(one);
3826         addInstr(env, ARM64Instr_Logic(r_res, hregARM64_X0(), one,
3827                                        ARM64lo_XOR));
3828         /* And be conservative -- mask off all but the lowest bit. */
3829         addInstr(env, ARM64Instr_Logic(r_res, r_res, one,
3830                                        ARM64lo_AND));
3831         return;
3832      }
3833      break;
3834   }
3835
3836   /* --------- MEM FENCE --------- */
3837   case Ist_MBE:
3838      switch (stmt->Ist.MBE.event) {
3839         case Imbe_Fence:
3840            addInstr(env, ARM64Instr_MFence());
3841            return;
3842         default:
3843            break;
3844      }
3845      break;
3846
3847   /* --------- INSTR MARK --------- */
3848   /* Doesn't generate any executable code ... */
3849   case Ist_IMark:
3850       return;
3851
3852   /* --------- ABI HINT --------- */
3853   /* These have no meaning (denotation in the IR) and so we ignore
3854      them ... if any actually made it this far. */
3855   case Ist_AbiHint:
3856       return;
3857
3858   /* --------- NO-OP --------- */
3859   case Ist_NoOp:
3860       return;
3861
3862   /* --------- EXIT --------- */
3863   case Ist_Exit: {
3864      if (stmt->Ist.Exit.dst->tag != Ico_U64)
3865         vpanic("isel_arm: Ist_Exit: dst is not a 64-bit value");
3866
3867      ARM64CondCode cc
3868         = iselCondCode(env, stmt->Ist.Exit.guard);
3869      ARM64AMode* amPC
3870         = mk_baseblock_64bit_access_amode(stmt->Ist.Exit.offsIP);
3871
3872      /* Case: boring transfer to known address */
3873      if (stmt->Ist.Exit.jk == Ijk_Boring) {
3874         if (env->chainingAllowed) {
3875            /* .. almost always true .. */
3876            /* Skip the event check at the dst if this is a forwards
3877               edge. */
3878            Bool toFastEP
3879               = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
3880            if (0) vex_printf("%s", toFastEP ? "Y" : ",");
3881            addInstr(env, ARM64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
3882                                             amPC, cc, toFastEP));
3883         } else {
3884            /* .. very occasionally .. */
3885            /* We can't use chaining, so ask for an assisted transfer,
3886               as that's the only alternative that is allowable. */
3887            HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
3888            addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, Ijk_Boring));
3889         }
3890         return;
3891      }
3892
3893      /* Case: assisted transfer to arbitrary address */
3894      switch (stmt->Ist.Exit.jk) {
3895         /* Keep this list in sync with that for iselNext below */
3896         case Ijk_ClientReq:
3897         case Ijk_NoDecode:
3898         case Ijk_NoRedir:
3899         case Ijk_Sys_syscall:
3900         case Ijk_InvalICache:
3901         case Ijk_FlushDCache:
3902         case Ijk_SigTRAP:
3903         case Ijk_Yield: {
3904            HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
3905            addInstr(env, ARM64Instr_XAssisted(r, amPC, cc,
3906                                               stmt->Ist.Exit.jk));
3907            return;
3908         }
3909         default:
3910            break;
3911      }
3912
3913      /* Do we ever expect to see any other kind? */
3914      goto stmt_fail;
3915   }
3916
3917   default: break;
3918   }
3919  stmt_fail:
3920   ppIRStmt(stmt);
3921   vpanic("iselStmt");
3922}
3923
3924
3925/*---------------------------------------------------------*/
3926/*--- ISEL: Basic block terminators (Nexts)             ---*/
3927/*---------------------------------------------------------*/
3928
3929static void iselNext ( ISelEnv* env,
3930                       IRExpr* next, IRJumpKind jk, Int offsIP )
3931{
3932   if (vex_traceflags & VEX_TRACE_VCODE) {
3933      vex_printf( "\n-- PUT(%d) = ", offsIP);
3934      ppIRExpr( next );
3935      vex_printf( "; exit-");
3936      ppIRJumpKind(jk);
3937      vex_printf( "\n");
3938   }
3939
3940   /* Case: boring transfer to known address */
3941   if (next->tag == Iex_Const) {
3942      IRConst* cdst = next->Iex.Const.con;
3943      vassert(cdst->tag == Ico_U64);
3944      if (jk == Ijk_Boring || jk == Ijk_Call) {
3945         /* Boring transfer to known address */
3946         ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
3947         if (env->chainingAllowed) {
3948            /* .. almost always true .. */
3949            /* Skip the event check at the dst if this is a forwards
3950               edge. */
3951            Bool toFastEP
3952               = ((Addr64)cdst->Ico.U64) > env->max_ga;
3953            if (0) vex_printf("%s", toFastEP ? "X" : ".");
3954            addInstr(env, ARM64Instr_XDirect(cdst->Ico.U64,
3955                                             amPC, ARM64cc_AL,
3956                                             toFastEP));
3957         } else {
3958            /* .. very occasionally .. */
3959            /* We can't use chaining, so ask for an assisted transfer,
3960               as that's the only alternative that is allowable. */
3961            HReg r = iselIntExpr_R(env, next);
3962            addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
3963                                               Ijk_Boring));
3964         }
3965         return;
3966      }
3967   }
3968
3969   /* Case: call/return (==boring) transfer to any address */
3970   switch (jk) {
3971      case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
3972         HReg        r    = iselIntExpr_R(env, next);
3973         ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
3974         if (env->chainingAllowed) {
3975            addInstr(env, ARM64Instr_XIndir(r, amPC, ARM64cc_AL));
3976         } else {
3977            addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL,
3978                                               Ijk_Boring));
3979         }
3980         return;
3981      }
3982      default:
3983         break;
3984   }
3985
3986   /* Case: assisted transfer to arbitrary address */
3987   switch (jk) {
3988      /* Keep this list in sync with that for Ist_Exit above */
3989      case Ijk_ClientReq:
3990      case Ijk_NoDecode:
3991      case Ijk_NoRedir:
3992      case Ijk_Sys_syscall:
3993      case Ijk_InvalICache:
3994      case Ijk_FlushDCache:
3995      case Ijk_SigTRAP:
3996      case Ijk_Yield:
3997      {
3998         HReg        r    = iselIntExpr_R(env, next);
3999         ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4000         addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
4001         return;
4002      }
4003      default:
4004         break;
4005   }
4006
4007   vex_printf( "\n-- PUT(%d) = ", offsIP);
4008   ppIRExpr( next );
4009   vex_printf( "; exit-");
4010   ppIRJumpKind(jk);
4011   vex_printf( "\n");
4012   vassert(0); // are we expecting any other kind?
4013}
4014
4015
4016/*---------------------------------------------------------*/
4017/*--- Insn selector top-level                           ---*/
4018/*---------------------------------------------------------*/
4019
4020/* Translate an entire SB to arm64 code. */
4021
4022HInstrArray* iselSB_ARM64 ( const IRSB* bb,
4023                            VexArch      arch_host,
4024                            const VexArchInfo* archinfo_host,
4025                            const VexAbiInfo*  vbi/*UNUSED*/,
4026                            Int offs_Host_EvC_Counter,
4027                            Int offs_Host_EvC_FailAddr,
4028                            Bool chainingAllowed,
4029                            Bool addProfInc,
4030                            Addr max_ga )
4031{
4032   Int        i, j;
4033   HReg       hreg, hregHI;
4034   ISelEnv*   env;
4035   UInt       hwcaps_host = archinfo_host->hwcaps;
4036   ARM64AMode *amCounter, *amFailAddr;
4037
4038   /* sanity ... */
4039   vassert(arch_host == VexArchARM64);
4040
4041   /* Check that the host's endianness is as expected. */
4042   vassert(archinfo_host->endness == VexEndnessLE);
4043
4044   /* guard against unexpected space regressions */
4045   vassert(sizeof(ARM64Instr) <= 32);
4046
4047   /* Make up an initial environment to use. */
4048   env = LibVEX_Alloc_inline(sizeof(ISelEnv));
4049   env->vreg_ctr = 0;
4050
4051   /* Set up output code array. */
4052   env->code = newHInstrArray();
4053
4054   /* Copy BB's type env. */
4055   env->type_env = bb->tyenv;
4056
4057   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
4058      change as we go along. */
4059   env->n_vregmap = bb->tyenv->types_used;
4060   env->vregmap   = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4061   env->vregmapHI = LibVEX_Alloc_inline(env->n_vregmap * sizeof(HReg));
4062
4063   /* and finally ... */
4064   env->chainingAllowed = chainingAllowed;
4065   env->hwcaps          = hwcaps_host;
4066   env->previous_rm     = NULL;
4067   env->max_ga          = max_ga;
4068
4069   /* For each IR temporary, allocate a suitably-kinded virtual
4070      register. */
4071   j = 0;
4072   for (i = 0; i < env->n_vregmap; i++) {
4073      hregHI = hreg = INVALID_HREG;
4074      switch (bb->tyenv->types[i]) {
4075         case Ity_I1:
4076         case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
4077            hreg = mkHReg(True, HRcInt64, 0, j++);
4078            break;
4079         case Ity_I128:
4080            hreg   = mkHReg(True, HRcInt64, 0, j++);
4081            hregHI = mkHReg(True, HRcInt64, 0, j++);
4082            break;
4083         case Ity_F16: // we'll use HRcFlt64 regs for F16 too
4084         case Ity_F32: // we'll use HRcFlt64 regs for F32 too
4085         case Ity_F64:
4086            hreg = mkHReg(True, HRcFlt64, 0, j++);
4087            break;
4088         case Ity_V128:
4089            hreg = mkHReg(True, HRcVec128, 0, j++);
4090            break;
4091         case Ity_V256:
4092            hreg   = mkHReg(True, HRcVec128, 0, j++);
4093            hregHI = mkHReg(True, HRcVec128, 0, j++);
4094            break;
4095         default:
4096            ppIRType(bb->tyenv->types[i]);
4097            vpanic("iselBB(arm64): IRTemp type");
4098      }
4099      env->vregmap[i]   = hreg;
4100      env->vregmapHI[i] = hregHI;
4101   }
4102   env->vreg_ctr = j;
4103
4104   /* The very first instruction must be an event check. */
4105   amCounter  = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_Counter);
4106   amFailAddr = ARM64AMode_RI9(hregARM64_X21(), offs_Host_EvC_FailAddr);
4107   addInstr(env, ARM64Instr_EvCheck(amCounter, amFailAddr));
4108
4109   /* Possibly a block counter increment (for profiling).  At this
4110      point we don't know the address of the counter, so just pretend
4111      it is zero.  It will have to be patched later, but before this
4112      translation is used, by a call to LibVEX_patchProfCtr. */
4113   if (addProfInc) {
4114      addInstr(env, ARM64Instr_ProfInc());
4115   }
4116
4117   /* Ok, finally we can iterate over the statements. */
4118   for (i = 0; i < bb->stmts_used; i++)
4119      iselStmt(env, bb->stmts[i]);
4120
4121   iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
4122
4123   /* record the number of vregs we used. */
4124   env->code->n_vregs = env->vreg_ctr;
4125   return env->code;
4126}
4127
4128
4129/*---------------------------------------------------------------*/
4130/*--- end                                   host_arm64_isel.c ---*/
4131/*---------------------------------------------------------------*/
4132