1
2/*---------------------------------------------------------------*/
3/*--- begin                                 host_arm64_defs.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2013-2015 OpenWorks
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "libvex_basictypes.h"
32#include "libvex.h"
33#include "libvex_trc_values.h"
34
35#include "main_util.h"
36#include "host_generic_regs.h"
37#include "host_arm64_defs.h"
38
39
40/* --------- Registers. --------- */
41
42/* The usual HReg abstraction.  We use the following classes only:
43     X regs (64 bit int)
44     D regs (64 bit float, also used for 32 bit float)
45     Q regs (128 bit vector)
46*/
47
48const RRegUniverse* getRRegUniverse_ARM64 ( void )
49{
50   /* The real-register universe is a big constant, so we just want to
51      initialise it once. */
52   static RRegUniverse rRegUniverse_ARM64;
53   static Bool         rRegUniverse_ARM64_initted = False;
54
55   /* Handy shorthand, nothing more */
56   RRegUniverse* ru = &rRegUniverse_ARM64;
57
58   /* This isn't thread-safe.  Sigh. */
59   if (LIKELY(rRegUniverse_ARM64_initted))
60      return ru;
61
62   RRegUniverse__init(ru);
63
64   /* Add the registers.  The initial segment of this array must be
65      those available for allocation by reg-alloc, and those that
66      follow are not available for allocation. */
67
68   ru->regs[ru->size++] = hregARM64_X22();
69   ru->regs[ru->size++] = hregARM64_X23();
70   ru->regs[ru->size++] = hregARM64_X24();
71   ru->regs[ru->size++] = hregARM64_X25();
72   ru->regs[ru->size++] = hregARM64_X26();
73   ru->regs[ru->size++] = hregARM64_X27();
74   ru->regs[ru->size++] = hregARM64_X28();
75
76   ru->regs[ru->size++] = hregARM64_X0();
77   ru->regs[ru->size++] = hregARM64_X1();
78   ru->regs[ru->size++] = hregARM64_X2();
79   ru->regs[ru->size++] = hregARM64_X3();
80   ru->regs[ru->size++] = hregARM64_X4();
81   ru->regs[ru->size++] = hregARM64_X5();
82   ru->regs[ru->size++] = hregARM64_X6();
83   ru->regs[ru->size++] = hregARM64_X7();
84   // X8 is used as a ProfInc temporary, not available to regalloc.
85   // X9 is a chaining/spill temporary, not available to regalloc.
86
87   // Do we really need all these?
88   //ru->regs[ru->size++] = hregARM64_X10();
89   //ru->regs[ru->size++] = hregARM64_X11();
90   //ru->regs[ru->size++] = hregARM64_X12();
91   //ru->regs[ru->size++] = hregARM64_X13();
92   //ru->regs[ru->size++] = hregARM64_X14();
93   //ru->regs[ru->size++] = hregARM64_X15();
94   // X21 is the guest state pointer, not available to regalloc.
95
96   // vector regs.  Unfortunately not callee-saved.
97   ru->regs[ru->size++] = hregARM64_Q16();
98   ru->regs[ru->size++] = hregARM64_Q17();
99   ru->regs[ru->size++] = hregARM64_Q18();
100   ru->regs[ru->size++] = hregARM64_Q19();
101   ru->regs[ru->size++] = hregARM64_Q20();
102
103   // F64 regs, all of which are callee-saved
104   ru->regs[ru->size++] = hregARM64_D8();
105   ru->regs[ru->size++] = hregARM64_D9();
106   ru->regs[ru->size++] = hregARM64_D10();
107   ru->regs[ru->size++] = hregARM64_D11();
108   ru->regs[ru->size++] = hregARM64_D12();
109   ru->regs[ru->size++] = hregARM64_D13();
110
111   ru->allocable = ru->size;
112   /* And other regs, not available to the allocator. */
113
114   // unavail: x21 as GSP
115   // x8 is used as a ProfInc temporary
116   // x9 is used as a spill/reload/chaining/call temporary
117   // x30 as LR
118   // x31 because dealing with the SP-vs-ZR overloading is too
119   // confusing, and we don't need to do so, so let's just avoid
120   // the problem
121   //
122   // Currently, we have 15 allocatable integer registers:
123   // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
124   //
125   // Hence for the allocatable integer registers we have:
126   //
127   // callee-saved: 22 23 24 25 26 27 28
128   // caller-saved: 0 1 2 3 4 5 6 7
129   //
130   // If the set of available registers changes or if the e/r status
131   // changes, be sure to re-check/sync the definition of
132   // getRegUsage for ARM64Instr_Call too.
133
134   ru->regs[ru->size++] = hregARM64_X8();
135   ru->regs[ru->size++] = hregARM64_X9();
136   ru->regs[ru->size++] = hregARM64_X21();
137
138   rRegUniverse_ARM64_initted = True;
139
140   RRegUniverse__check_is_sane(ru);
141   return ru;
142}
143
144
145void ppHRegARM64 ( HReg reg )  {
146   Int r;
147   /* Be generic for all virtual regs. */
148   if (hregIsVirtual(reg)) {
149      ppHReg(reg);
150      return;
151   }
152   /* But specific for real regs. */
153   switch (hregClass(reg)) {
154      case HRcInt64:
155         r = hregEncoding(reg);
156         vassert(r >= 0 && r < 31);
157         vex_printf("x%d", r);
158         return;
159      case HRcFlt64:
160         r = hregEncoding(reg);
161         vassert(r >= 0 && r < 32);
162         vex_printf("d%d", r);
163         return;
164      case HRcVec128:
165         r = hregEncoding(reg);
166         vassert(r >= 0 && r < 32);
167         vex_printf("q%d", r);
168         return;
169      default:
170         vpanic("ppHRegARM64");
171   }
172}
173
174static void ppHRegARM64asSreg ( HReg reg ) {
175   ppHRegARM64(reg);
176   vex_printf("(S-reg)");
177}
178
179static void ppHRegARM64asHreg ( HReg reg ) {
180   ppHRegARM64(reg);
181   vex_printf("(H-reg)");
182}
183
184
185/* --------- Condition codes, ARM64 encoding. --------- */
186
187static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
188   switch (cond) {
189       case ARM64cc_EQ:  return "eq";
190       case ARM64cc_NE:  return "ne";
191       case ARM64cc_CS:  return "cs";
192       case ARM64cc_CC:  return "cc";
193       case ARM64cc_MI:  return "mi";
194       case ARM64cc_PL:  return "pl";
195       case ARM64cc_VS:  return "vs";
196       case ARM64cc_VC:  return "vc";
197       case ARM64cc_HI:  return "hi";
198       case ARM64cc_LS:  return "ls";
199       case ARM64cc_GE:  return "ge";
200       case ARM64cc_LT:  return "lt";
201       case ARM64cc_GT:  return "gt";
202       case ARM64cc_LE:  return "le";
203       case ARM64cc_AL:  return "al"; // default
204       case ARM64cc_NV:  return "nv";
205       default: vpanic("showARM64CondCode");
206   }
207}
208
209
210/* --------- Memory address expressions (amodes). --------- */
211
212ARM64AMode* ARM64AMode_RI9  ( HReg reg, Int simm9 ) {
213   ARM64AMode* am        = LibVEX_Alloc_inline(sizeof(ARM64AMode));
214   am->tag               = ARM64am_RI9;
215   am->ARM64am.RI9.reg   = reg;
216   am->ARM64am.RI9.simm9 = simm9;
217   vassert(-256 <= simm9 && simm9 <= 255);
218   return am;
219}
220
221ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
222   ARM64AMode* am          = LibVEX_Alloc_inline(sizeof(ARM64AMode));
223   am->tag                 = ARM64am_RI12;
224   am->ARM64am.RI12.reg    = reg;
225   am->ARM64am.RI12.uimm12 = uimm12;
226   am->ARM64am.RI12.szB    = szB;
227   vassert(uimm12 >= 0 && uimm12 <= 4095);
228   switch (szB) {
229      case 1: case 2: case 4: case 8: break;
230      default: vassert(0);
231   }
232   return am;
233}
234
235ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
236   ARM64AMode* am       = LibVEX_Alloc_inline(sizeof(ARM64AMode));
237   am->tag              = ARM64am_RR;
238   am->ARM64am.RR.base  = base;
239   am->ARM64am.RR.index = index;
240   return am;
241}
242
243static void ppARM64AMode ( ARM64AMode* am ) {
244   switch (am->tag) {
245      case ARM64am_RI9:
246         vex_printf("%d(", am->ARM64am.RI9.simm9);
247         ppHRegARM64(am->ARM64am.RI9.reg);
248         vex_printf(")");
249         break;
250      case ARM64am_RI12:
251         vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
252                           * (UInt)am->ARM64am.RI12.uimm12);
253         ppHRegARM64(am->ARM64am.RI12.reg);
254         vex_printf(")");
255         break;
256      case ARM64am_RR:
257         vex_printf("(");
258         ppHRegARM64(am->ARM64am.RR.base);
259         vex_printf(",");
260         ppHRegARM64(am->ARM64am.RR.index);
261         vex_printf(")");
262         break;
263      default:
264         vassert(0);
265   }
266}
267
268static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
269   switch (am->tag) {
270      case ARM64am_RI9:
271         addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
272         return;
273      case ARM64am_RI12:
274         addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
275         return;
276      case ARM64am_RR:
277         addHRegUse(u, HRmRead, am->ARM64am.RR.base);
278         addHRegUse(u, HRmRead, am->ARM64am.RR.index);
279         return;
280      default:
281         vpanic("addRegUsage_ARM64Amode");
282   }
283}
284
285static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
286   switch (am->tag) {
287      case ARM64am_RI9:
288         am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
289         return;
290      case ARM64am_RI12:
291         am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
292         return;
293      case ARM64am_RR:
294         am->ARM64am.RR.base  = lookupHRegRemap(m, am->ARM64am.RR.base);
295         am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
296         return;
297      default:
298         vpanic("mapRegs_ARM64Amode");
299   }
300}
301
302
303/* --------- Reg or uimm12<<{0,12} operands --------- */
304
305ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
306   ARM64RIA* riA           = LibVEX_Alloc_inline(sizeof(ARM64RIA));
307   riA->tag                = ARM64riA_I12;
308   riA->ARM64riA.I12.imm12 = imm12;
309   riA->ARM64riA.I12.shift = shift;
310   vassert(imm12 < 4096);
311   vassert(shift == 0 || shift == 12);
312   return riA;
313}
314ARM64RIA* ARM64RIA_R ( HReg reg ) {
315   ARM64RIA* riA       = LibVEX_Alloc_inline(sizeof(ARM64RIA));
316   riA->tag            = ARM64riA_R;
317   riA->ARM64riA.R.reg = reg;
318   return riA;
319}
320
321static void ppARM64RIA ( ARM64RIA* riA ) {
322   switch (riA->tag) {
323      case ARM64riA_I12:
324         vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
325                                 << riA->ARM64riA.I12.shift));
326         break;
327      case ARM64riA_R:
328         ppHRegARM64(riA->ARM64riA.R.reg);
329         break;
330      default:
331         vassert(0);
332   }
333}
334
335static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
336   switch (riA->tag) {
337      case ARM64riA_I12:
338         return;
339      case ARM64riA_R:
340         addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
341         return;
342      default:
343         vpanic("addRegUsage_ARM64RIA");
344   }
345}
346
347static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
348   switch (riA->tag) {
349      case ARM64riA_I12:
350         return;
351      case ARM64riA_R:
352         riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
353         return;
354      default:
355         vpanic("mapRegs_ARM64RIA");
356   }
357}
358
359
360/* --------- Reg or "bitfield" (logic immediate) operands --------- */
361
362ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
363   ARM64RIL* riL          = LibVEX_Alloc_inline(sizeof(ARM64RIL));
364   riL->tag               = ARM64riL_I13;
365   riL->ARM64riL.I13.bitN = bitN;
366   riL->ARM64riL.I13.immR = immR;
367   riL->ARM64riL.I13.immS = immS;
368   vassert(bitN < 2);
369   vassert(immR < 64);
370   vassert(immS < 64);
371   return riL;
372}
373ARM64RIL* ARM64RIL_R ( HReg reg ) {
374   ARM64RIL* riL       = LibVEX_Alloc_inline(sizeof(ARM64RIL));
375   riL->tag            = ARM64riL_R;
376   riL->ARM64riL.R.reg = reg;
377   return riL;
378}
379
380static void ppARM64RIL ( ARM64RIL* riL ) {
381   switch (riL->tag) {
382      case ARM64riL_I13:
383         vex_printf("#nrs(%u,%u,%u)",
384                     (UInt)riL->ARM64riL.I13.bitN,
385                     (UInt)riL->ARM64riL.I13.immR,
386                     (UInt)riL->ARM64riL.I13.immS);
387         break;
388      case ARM64riL_R:
389         ppHRegARM64(riL->ARM64riL.R.reg);
390         break;
391      default:
392         vassert(0);
393   }
394}
395
396static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
397   switch (riL->tag) {
398      case ARM64riL_I13:
399         return;
400      case ARM64riL_R:
401         addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
402         return;
403      default:
404         vpanic("addRegUsage_ARM64RIL");
405   }
406}
407
408static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
409   switch (riL->tag) {
410      case ARM64riL_I13:
411         return;
412      case ARM64riL_R:
413         riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
414         return;
415      default:
416         vpanic("mapRegs_ARM64RIL");
417   }
418}
419
420
421/* --------------- Reg or uimm6 operands --------------- */
422
423ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
424   ARM64RI6* ri6         = LibVEX_Alloc_inline(sizeof(ARM64RI6));
425   ri6->tag              = ARM64ri6_I6;
426   ri6->ARM64ri6.I6.imm6 = imm6;
427   vassert(imm6 > 0 && imm6 < 64);
428   return ri6;
429}
430ARM64RI6* ARM64RI6_R ( HReg reg ) {
431   ARM64RI6* ri6       = LibVEX_Alloc_inline(sizeof(ARM64RI6));
432   ri6->tag            = ARM64ri6_R;
433   ri6->ARM64ri6.R.reg = reg;
434   return ri6;
435}
436
437static void ppARM64RI6 ( ARM64RI6* ri6 ) {
438   switch (ri6->tag) {
439      case ARM64ri6_I6:
440         vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
441         break;
442      case ARM64ri6_R:
443         ppHRegARM64(ri6->ARM64ri6.R.reg);
444         break;
445      default:
446         vassert(0);
447   }
448}
449
450static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
451   switch (ri6->tag) {
452      case ARM64ri6_I6:
453         return;
454      case ARM64ri6_R:
455         addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
456         return;
457      default:
458         vpanic("addRegUsage_ARM64RI6");
459   }
460}
461
462static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
463   switch (ri6->tag) {
464      case ARM64ri6_I6:
465         return;
466      case ARM64ri6_R:
467         ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
468         return;
469      default:
470         vpanic("mapRegs_ARM64RI6");
471   }
472}
473
474
475/* --------- Instructions. --------- */
476
477static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
478   switch (op) {
479      case ARM64lo_AND: return "and";
480      case ARM64lo_OR:  return "orr";
481      case ARM64lo_XOR: return "eor";
482      default: vpanic("showARM64LogicOp");
483   }
484}
485
486static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
487   switch (op) {
488      case ARM64sh_SHL: return "lsl";
489      case ARM64sh_SHR: return "lsr";
490      case ARM64sh_SAR: return "asr";
491      default: vpanic("showARM64ShiftOp");
492   }
493}
494
495static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
496   switch (op) {
497      case ARM64un_NEG: return "neg";
498      case ARM64un_NOT: return "not";
499      case ARM64un_CLZ: return "clz";
500      default: vpanic("showARM64UnaryOp");
501   }
502}
503
504static const HChar* showARM64MulOp ( ARM64MulOp op ) {
505   switch (op) {
506      case ARM64mul_PLAIN: return "mul  ";
507      case ARM64mul_ZX:    return "umulh";
508      case ARM64mul_SX:    return "smulh";
509      default: vpanic("showARM64MulOp");
510   }
511}
512
513static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
514                                     /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
515                                     ARM64CvtOp op ) {
516   switch (op) {
517      case ARM64cvt_F32_I32S:
518         *syn = 's'; *fszB = 4; *iszB = 4; break;
519      case ARM64cvt_F64_I32S:
520         *syn = 's'; *fszB = 8; *iszB = 4; break;
521      case ARM64cvt_F32_I64S:
522         *syn = 's'; *fszB = 4; *iszB = 8; break;
523      case ARM64cvt_F64_I64S:
524         *syn = 's'; *fszB = 8; *iszB = 8; break;
525      case ARM64cvt_F32_I32U:
526         *syn = 'u'; *fszB = 4; *iszB = 4; break;
527      case ARM64cvt_F64_I32U:
528         *syn = 'u'; *fszB = 8; *iszB = 4; break;
529      case ARM64cvt_F32_I64U:
530         *syn = 'u'; *fszB = 4; *iszB = 8; break;
531      case ARM64cvt_F64_I64U:
532         *syn = 'u'; *fszB = 8; *iszB = 8; break;
533      default:
534         vpanic("characteriseARM64CvtOp");
535  }
536}
537
538static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
539   switch (op) {
540      case ARM64fpb_ADD: return "add";
541      case ARM64fpb_SUB: return "sub";
542      case ARM64fpb_MUL: return "mul";
543      case ARM64fpb_DIV: return "div";
544      default: vpanic("showARM64FpBinOp");
545   }
546}
547
548static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
549   switch (op) {
550      case ARM64fpu_NEG:   return "neg  ";
551      case ARM64fpu_ABS:   return "abs  ";
552      case ARM64fpu_SQRT:  return "sqrt ";
553      case ARM64fpu_RINT:  return "rinti";
554      case ARM64fpu_RECPX: return "recpx";
555      default: vpanic("showARM64FpUnaryOp");
556   }
557}
558
559static void showARM64VecBinOp(/*OUT*/const HChar** nm,
560                              /*OUT*/const HChar** ar, ARM64VecBinOp op ) {
561   switch (op) {
562      case ARM64vecb_ADD64x2:      *nm = "add   ";    *ar = "2d";   return;
563      case ARM64vecb_ADD32x4:      *nm = "add   ";    *ar = "4s";   return;
564      case ARM64vecb_ADD16x8:      *nm = "add   ";    *ar = "8h";   return;
565      case ARM64vecb_ADD8x16:      *nm = "add   ";    *ar = "16b";  return;
566      case ARM64vecb_SUB64x2:      *nm = "sub   ";    *ar = "2d";   return;
567      case ARM64vecb_SUB32x4:      *nm = "sub   ";    *ar = "4s";   return;
568      case ARM64vecb_SUB16x8:      *nm = "sub   ";    *ar = "8h";   return;
569      case ARM64vecb_SUB8x16:      *nm = "sub   ";    *ar = "16b";  return;
570      case ARM64vecb_MUL32x4:      *nm = "mul   ";    *ar = "4s";   return;
571      case ARM64vecb_MUL16x8:      *nm = "mul   ";    *ar = "8h";   return;
572      case ARM64vecb_MUL8x16:      *nm = "mul   ";    *ar = "16b";  return;
573      case ARM64vecb_FADD64x2:     *nm = "fadd  ";    *ar = "2d";   return;
574      case ARM64vecb_FSUB64x2:     *nm = "fsub  ";    *ar = "2d";   return;
575      case ARM64vecb_FMUL64x2:     *nm = "fmul  ";    *ar = "2d";   return;
576      case ARM64vecb_FDIV64x2:     *nm = "fdiv  ";    *ar = "2d";   return;
577      case ARM64vecb_FADD32x4:     *nm = "fadd  ";    *ar = "4s";   return;
578      case ARM64vecb_FSUB32x4:     *nm = "fsub  ";    *ar = "4s";   return;
579      case ARM64vecb_FMUL32x4:     *nm = "fmul  ";    *ar = "4s";   return;
580      case ARM64vecb_FDIV32x4:     *nm = "fdiv  ";    *ar = "4s";   return;
581      case ARM64vecb_FMAX64x2:     *nm = "fmax  ";    *ar = "2d";   return;
582      case ARM64vecb_FMAX32x4:     *nm = "fmax  ";    *ar = "4s";   return;
583      case ARM64vecb_FMIN64x2:     *nm = "fmin  ";    *ar = "2d";   return;
584      case ARM64vecb_FMIN32x4:     *nm = "fmin  ";    *ar = "4s";   return;
585      case ARM64vecb_UMAX32x4:     *nm = "umax  ";    *ar = "4s";   return;
586      case ARM64vecb_UMAX16x8:     *nm = "umax  ";    *ar = "8h";   return;
587      case ARM64vecb_UMAX8x16:     *nm = "umax  ";    *ar = "16b";  return;
588      case ARM64vecb_UMIN32x4:     *nm = "umin  ";    *ar = "4s";   return;
589      case ARM64vecb_UMIN16x8:     *nm = "umin  ";    *ar = "8h";   return;
590      case ARM64vecb_UMIN8x16:     *nm = "umin  ";    *ar = "16b";  return;
591      case ARM64vecb_SMAX32x4:     *nm = "smax  ";    *ar = "4s";   return;
592      case ARM64vecb_SMAX16x8:     *nm = "smax  ";    *ar = "8h";   return;
593      case ARM64vecb_SMAX8x16:     *nm = "smax  ";    *ar = "16b";  return;
594      case ARM64vecb_SMIN32x4:     *nm = "smin  ";    *ar = "4s";   return;
595      case ARM64vecb_SMIN16x8:     *nm = "smin  ";    *ar = "8h";   return;
596      case ARM64vecb_SMIN8x16:     *nm = "smin  ";    *ar = "16b";  return;
597      case ARM64vecb_AND:          *nm = "and   ";    *ar = "16b";  return;
598      case ARM64vecb_ORR:          *nm = "orr   ";    *ar = "16b";  return;
599      case ARM64vecb_XOR:          *nm = "eor   ";    *ar = "16b";  return;
600      case ARM64vecb_CMEQ64x2:     *nm = "cmeq  ";    *ar = "2d";   return;
601      case ARM64vecb_CMEQ32x4:     *nm = "cmeq  ";    *ar = "4s";   return;
602      case ARM64vecb_CMEQ16x8:     *nm = "cmeq  ";    *ar = "8h";   return;
603      case ARM64vecb_CMEQ8x16:     *nm = "cmeq  ";    *ar = "16b";  return;
604      case ARM64vecb_CMHI64x2:     *nm = "cmhi  ";    *ar = "2d";   return;
605      case ARM64vecb_CMHI32x4:     *nm = "cmhi  ";    *ar = "4s";   return;
606      case ARM64vecb_CMHI16x8:     *nm = "cmhi  ";    *ar = "8h";   return;
607      case ARM64vecb_CMHI8x16:     *nm = "cmhi  ";    *ar = "16b";  return;
608      case ARM64vecb_CMGT64x2:     *nm = "cmgt  ";    *ar = "2d";   return;
609      case ARM64vecb_CMGT32x4:     *nm = "cmgt  ";    *ar = "4s";   return;
610      case ARM64vecb_CMGT16x8:     *nm = "cmgt  ";    *ar = "8h";   return;
611      case ARM64vecb_CMGT8x16:     *nm = "cmgt  ";    *ar = "16b";  return;
612      case ARM64vecb_FCMEQ64x2:    *nm = "fcmeq ";    *ar = "2d";   return;
613      case ARM64vecb_FCMEQ32x4:    *nm = "fcmeq ";    *ar = "4s";   return;
614      case ARM64vecb_FCMGE64x2:    *nm = "fcmge ";    *ar = "2d";   return;
615      case ARM64vecb_FCMGE32x4:    *nm = "fcmge ";    *ar = "4s";   return;
616      case ARM64vecb_FCMGT64x2:    *nm = "fcmgt ";    *ar = "2d";   return;
617      case ARM64vecb_FCMGT32x4:    *nm = "fcmgt ";    *ar = "4s";   return;
618      case ARM64vecb_TBL1:         *nm = "tbl   ";    *ar = "16b";  return;
619      case ARM64vecb_UZP164x2:     *nm = "uzp1  ";    *ar = "2d";   return;
620      case ARM64vecb_UZP132x4:     *nm = "uzp1  ";    *ar = "4s";   return;
621      case ARM64vecb_UZP116x8:     *nm = "uzp1  ";    *ar = "8h";   return;
622      case ARM64vecb_UZP18x16:     *nm = "uzp1  ";    *ar = "16b";  return;
623      case ARM64vecb_UZP264x2:     *nm = "uzp2  ";    *ar = "2d";   return;
624      case ARM64vecb_UZP232x4:     *nm = "uzp2  ";    *ar = "4s";   return;
625      case ARM64vecb_UZP216x8:     *nm = "uzp2  ";    *ar = "8h";   return;
626      case ARM64vecb_UZP28x16:     *nm = "uzp2  ";    *ar = "16b";  return;
627      case ARM64vecb_ZIP132x4:     *nm = "zip1  ";    *ar = "4s";   return;
628      case ARM64vecb_ZIP116x8:     *nm = "zip1  ";    *ar = "8h";   return;
629      case ARM64vecb_ZIP18x16:     *nm = "zip1  ";    *ar = "16b";  return;
630      case ARM64vecb_ZIP232x4:     *nm = "zip2  ";    *ar = "4s";   return;
631      case ARM64vecb_ZIP216x8:     *nm = "zip2  ";    *ar = "8h";   return;
632      case ARM64vecb_ZIP28x16:     *nm = "zip2  ";    *ar = "16b";  return;
633      case ARM64vecb_PMUL8x16:     *nm = "pmul  ";    *ar = "16b";  return;
634      case ARM64vecb_PMULL8x8:     *nm = "pmull ";    *ar = "8hbb"; return;
635      case ARM64vecb_UMULL2DSS:    *nm = "umull ";    *ar = "2dss"; return;
636      case ARM64vecb_UMULL4SHH:    *nm = "umull ";    *ar = "4shh"; return;
637      case ARM64vecb_UMULL8HBB:    *nm = "umull ";    *ar = "8hbb"; return;
638      case ARM64vecb_SMULL2DSS:    *nm = "smull ";    *ar = "2dss"; return;
639      case ARM64vecb_SMULL4SHH:    *nm = "smull ";    *ar = "4shh"; return;
640      case ARM64vecb_SMULL8HBB:    *nm = "smull ";    *ar = "8hbb"; return;
641      case ARM64vecb_SQADD64x2:    *nm = "sqadd ";    *ar = "2d";   return;
642      case ARM64vecb_SQADD32x4:    *nm = "sqadd ";    *ar = "4s";   return;
643      case ARM64vecb_SQADD16x8:    *nm = "sqadd ";    *ar = "8h";   return;
644      case ARM64vecb_SQADD8x16:    *nm = "sqadd ";    *ar = "16b";  return;
645      case ARM64vecb_UQADD64x2:    *nm = "uqadd ";    *ar = "2d";   return;
646      case ARM64vecb_UQADD32x4:    *nm = "uqadd ";    *ar = "4s";   return;
647      case ARM64vecb_UQADD16x8:    *nm = "uqadd ";    *ar = "8h";   return;
648      case ARM64vecb_UQADD8x16:    *nm = "uqadd ";    *ar = "16b";  return;
649      case ARM64vecb_SQSUB64x2:    *nm = "sqsub ";    *ar = "2d";   return;
650      case ARM64vecb_SQSUB32x4:    *nm = "sqsub ";    *ar = "4s";   return;
651      case ARM64vecb_SQSUB16x8:    *nm = "sqsub ";    *ar = "8h";   return;
652      case ARM64vecb_SQSUB8x16:    *nm = "sqsub ";    *ar = "16b";  return;
653      case ARM64vecb_UQSUB64x2:    *nm = "uqsub ";    *ar = "2d";   return;
654      case ARM64vecb_UQSUB32x4:    *nm = "uqsub ";    *ar = "4s";   return;
655      case ARM64vecb_UQSUB16x8:    *nm = "uqsub ";    *ar = "8h";   return;
656      case ARM64vecb_UQSUB8x16:    *nm = "uqsub ";    *ar = "16b";  return;
657      case ARM64vecb_SQDMULL2DSS:  *nm = "sqdmull";   *ar = "2dss"; return;
658      case ARM64vecb_SQDMULL4SHH:  *nm = "sqdmull";   *ar = "4shh"; return;
659      case ARM64vecb_SQDMULH32x4:  *nm = "sqdmulh";   *ar = "4s";   return;
660      case ARM64vecb_SQDMULH16x8:  *nm = "sqdmulh";   *ar = "8h";   return;
661      case ARM64vecb_SQRDMULH32x4: *nm = "sqrdmulh";  *ar = "4s";   return;
662      case ARM64vecb_SQRDMULH16x8: *nm = "sqrdmulh";  *ar = "8h";   return;
663      case ARM64vecb_SQSHL64x2:    *nm = "sqshl ";    *ar = "2d";   return;
664      case ARM64vecb_SQSHL32x4:    *nm = "sqshl ";    *ar = "4s";   return;
665      case ARM64vecb_SQSHL16x8:    *nm = "sqshl ";    *ar = "8h";   return;
666      case ARM64vecb_SQSHL8x16:    *nm = "sqshl ";    *ar = "16b";  return;
667      case ARM64vecb_UQSHL64x2:    *nm = "uqshl ";    *ar = "2d";   return;
668      case ARM64vecb_UQSHL32x4:    *nm = "uqshl ";    *ar = "4s";   return;
669      case ARM64vecb_UQSHL16x8:    *nm = "uqshl ";    *ar = "8h";   return;
670      case ARM64vecb_UQSHL8x16:    *nm = "uqshl ";    *ar = "16b";  return;
671      case ARM64vecb_SQRSHL64x2:   *nm = "sqrshl";    *ar = "2d";   return;
672      case ARM64vecb_SQRSHL32x4:   *nm = "sqrshl";    *ar = "4s";   return;
673      case ARM64vecb_SQRSHL16x8:   *nm = "sqrshl";    *ar = "8h";   return;
674      case ARM64vecb_SQRSHL8x16:   *nm = "sqrshl";    *ar = "16b";  return;
675      case ARM64vecb_UQRSHL64x2:   *nm = "uqrshl";    *ar = "2d";   return;
676      case ARM64vecb_UQRSHL32x4:   *nm = "uqrshl";    *ar = "4s";   return;
677      case ARM64vecb_UQRSHL16x8:   *nm = "uqrshl";    *ar = "8h";   return;
678      case ARM64vecb_UQRSHL8x16:   *nm = "uqrshl";    *ar = "16b";  return;
679      case ARM64vecb_SSHL64x2:     *nm = "sshl  ";    *ar = "2d";   return;
680      case ARM64vecb_SSHL32x4:     *nm = "sshl  ";    *ar = "4s";   return;
681      case ARM64vecb_SSHL16x8:     *nm = "sshl  ";    *ar = "8h";   return;
682      case ARM64vecb_SSHL8x16:     *nm = "sshl  ";    *ar = "16b";  return;
683      case ARM64vecb_USHL64x2:     *nm = "ushl  ";    *ar = "2d";   return;
684      case ARM64vecb_USHL32x4:     *nm = "ushl  ";    *ar = "4s";   return;
685      case ARM64vecb_USHL16x8:     *nm = "ushl  ";    *ar = "8h";   return;
686      case ARM64vecb_USHL8x16:     *nm = "ushl  ";    *ar = "16b";  return;
687      case ARM64vecb_SRSHL64x2:    *nm = "srshl ";    *ar = "2d";   return;
688      case ARM64vecb_SRSHL32x4:    *nm = "srshl ";    *ar = "4s";   return;
689      case ARM64vecb_SRSHL16x8:    *nm = "srshl ";    *ar = "8h";   return;
690      case ARM64vecb_SRSHL8x16:    *nm = "srshl ";    *ar = "16b";  return;
691      case ARM64vecb_URSHL64x2:    *nm = "urshl ";    *ar = "2d";   return;
692      case ARM64vecb_URSHL32x4:    *nm = "urshl ";    *ar = "4s";   return;
693      case ARM64vecb_URSHL16x8:    *nm = "urshl ";    *ar = "8h";   return;
694      case ARM64vecb_URSHL8x16:    *nm = "urshl ";    *ar = "16b";  return;
695      case ARM64vecb_FRECPS64x2:   *nm = "frecps";    *ar = "2d";   return;
696      case ARM64vecb_FRECPS32x4:   *nm = "frecps";    *ar = "4s";   return;
697      case ARM64vecb_FRSQRTS64x2:  *nm = "frsqrts";   *ar = "2d";   return;
698      case ARM64vecb_FRSQRTS32x4:  *nm = "frsqrts";   *ar = "4s";   return;
699      default: vpanic("showARM64VecBinOp");
700   }
701}
702
703static void showARM64VecModifyOp(/*OUT*/const HChar** nm,
704                                 /*OUT*/const HChar** ar,
705                                 ARM64VecModifyOp op ) {
706   switch (op) {
707      case ARM64vecmo_SUQADD64x2:   *nm = "suqadd";    *ar = "2d";   return;
708      case ARM64vecmo_SUQADD32x4:   *nm = "suqadd";    *ar = "4s";   return;
709      case ARM64vecmo_SUQADD16x8:   *nm = "suqadd";    *ar = "8h";   return;
710      case ARM64vecmo_SUQADD8x16:   *nm = "suqadd";    *ar = "16b";  return;
711      case ARM64vecmo_USQADD64x2:   *nm = "usqadd";    *ar = "2d";   return;
712      case ARM64vecmo_USQADD32x4:   *nm = "usqadd";    *ar = "4s";   return;
713      case ARM64vecmo_USQADD16x8:   *nm = "usqadd";    *ar = "8h";   return;
714      case ARM64vecmo_USQADD8x16:   *nm = "usqadd";    *ar = "16b";  return;
715      default: vpanic("showARM64VecModifyOp");
716   }
717}
718
719static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
720                                /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
721{
722   switch (op) {
723      case ARM64vecu_FNEG64x2:    *nm = "fneg ";   *ar = "2d";  return;
724      case ARM64vecu_FNEG32x4:    *nm = "fneg ";   *ar = "4s";  return;
725      case ARM64vecu_FABS64x2:    *nm = "fabs ";   *ar = "2d";  return;
726      case ARM64vecu_FABS32x4:    *nm = "fabs ";   *ar = "4s";  return;
727      case ARM64vecu_NOT:         *nm = "not  ";   *ar = "all"; return;
728      case ARM64vecu_ABS64x2:     *nm = "abs  ";   *ar = "2d";  return;
729      case ARM64vecu_ABS32x4:     *nm = "abs  ";   *ar = "4s";  return;
730      case ARM64vecu_ABS16x8:     *nm = "abs  ";   *ar = "8h";  return;
731      case ARM64vecu_ABS8x16:     *nm = "abs  ";   *ar = "16b"; return;
732      case ARM64vecu_CLS32x4:     *nm = "cls  ";   *ar = "4s";  return;
733      case ARM64vecu_CLS16x8:     *nm = "cls  ";   *ar = "8h";  return;
734      case ARM64vecu_CLS8x16:     *nm = "cls  ";   *ar = "16b"; return;
735      case ARM64vecu_CLZ32x4:     *nm = "clz  ";   *ar = "4s";  return;
736      case ARM64vecu_CLZ16x8:     *nm = "clz  ";   *ar = "8h";  return;
737      case ARM64vecu_CLZ8x16:     *nm = "clz  ";   *ar = "16b"; return;
738      case ARM64vecu_CNT8x16:     *nm = "cnt  ";   *ar = "16b"; return;
739      case ARM64vecu_RBIT:        *nm = "rbit ";   *ar = "16b"; return;
740      case ARM64vecu_REV1616B:    *nm = "rev16";   *ar = "16b"; return;
741      case ARM64vecu_REV3216B:    *nm = "rev32";   *ar = "16b"; return;
742      case ARM64vecu_REV328H:     *nm = "rev32";   *ar = "8h";  return;
743      case ARM64vecu_REV6416B:    *nm = "rev64";   *ar = "16b"; return;
744      case ARM64vecu_REV648H:     *nm = "rev64";   *ar = "8h";  return;
745      case ARM64vecu_REV644S:     *nm = "rev64";   *ar = "4s";  return;
746      case ARM64vecu_URECPE32x4:  *nm = "urecpe";  *ar = "4s";  return;
747      case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s";  return;
748      case ARM64vecu_FRECPE64x2:  *nm = "frecpe";  *ar = "2d";  return;
749      case ARM64vecu_FRECPE32x4:  *nm = "frecpe";  *ar = "4s";  return;
750      case ARM64vecu_FRSQRTE64x2: *nm = "frsqrte"; *ar = "2d";  return;
751      case ARM64vecu_FRSQRTE32x4: *nm = "frsqrte"; *ar = "4s";  return;
752      case ARM64vecu_FSQRT64x2:   *nm = "fsqrt";   *ar = "2d";  return;
753      case ARM64vecu_FSQRT32x4:   *nm = "fsqrt";   *ar = "4s";  return;
754      default: vpanic("showARM64VecUnaryOp");
755   }
756}
757
758static void showARM64VecShiftImmOp(/*OUT*/const HChar** nm,
759                                   /*OUT*/const HChar** ar,
760                                   ARM64VecShiftImmOp op )
761{
762   switch (op) {
763      case ARM64vecshi_USHR64x2:    *nm = "ushr  ";   *ar = "2d";  return;
764      case ARM64vecshi_USHR32x4:    *nm = "ushr  ";   *ar = "4s";  return;
765      case ARM64vecshi_USHR16x8:    *nm = "ushr  ";   *ar = "8h";  return;
766      case ARM64vecshi_USHR8x16:    *nm = "ushr  ";   *ar = "16b"; return;
767      case ARM64vecshi_SSHR64x2:    *nm = "sshr  ";   *ar = "2d";  return;
768      case ARM64vecshi_SSHR32x4:    *nm = "sshr  ";   *ar = "4s";  return;
769      case ARM64vecshi_SSHR16x8:    *nm = "sshr  ";   *ar = "8h";  return;
770      case ARM64vecshi_SSHR8x16:    *nm = "sshr  ";   *ar = "16b"; return;
771      case ARM64vecshi_SHL64x2:     *nm = "shl   ";   *ar = "2d";  return;
772      case ARM64vecshi_SHL32x4:     *nm = "shl   ";   *ar = "4s";  return;
773      case ARM64vecshi_SHL16x8:     *nm = "shl   ";   *ar = "8h";  return;
774      case ARM64vecshi_SHL8x16:     *nm = "shl   ";   *ar = "16b"; return;
775      case ARM64vecshi_SQSHRN2SD:   *nm = "sqshrn";   *ar = "2sd"; return;
776      case ARM64vecshi_SQSHRN4HS:   *nm = "sqshrn";   *ar = "4hs"; return;
777      case ARM64vecshi_SQSHRN8BH:   *nm = "sqshrn";   *ar = "8bh"; return;
778      case ARM64vecshi_UQSHRN2SD:   *nm = "uqshrn";   *ar = "2sd"; return;
779      case ARM64vecshi_UQSHRN4HS:   *nm = "uqshrn";   *ar = "4hs"; return;
780      case ARM64vecshi_UQSHRN8BH:   *nm = "uqshrn";   *ar = "8bh"; return;
781      case ARM64vecshi_SQSHRUN2SD:  *nm = "sqshrun";  *ar = "2sd"; return;
782      case ARM64vecshi_SQSHRUN4HS:  *nm = "sqshrun";  *ar = "4hs"; return;
783      case ARM64vecshi_SQSHRUN8BH:  *nm = "sqshrun";  *ar = "8bh"; return;
784      case ARM64vecshi_SQRSHRN2SD:  *nm = "sqrshrn";  *ar = "2sd"; return;
785      case ARM64vecshi_SQRSHRN4HS:  *nm = "sqrshrn";  *ar = "4hs"; return;
786      case ARM64vecshi_SQRSHRN8BH:  *nm = "sqrshrn";  *ar = "8bh"; return;
787      case ARM64vecshi_UQRSHRN2SD:  *nm = "uqrshrn";  *ar = "2sd"; return;
788      case ARM64vecshi_UQRSHRN4HS:  *nm = "uqrshrn";  *ar = "4hs"; return;
789      case ARM64vecshi_UQRSHRN8BH:  *nm = "uqrshrn";  *ar = "8bh"; return;
790      case ARM64vecshi_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
791      case ARM64vecshi_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
792      case ARM64vecshi_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
793      case ARM64vecshi_UQSHL64x2:   *nm = "uqshl ";   *ar = "2d";  return;
794      case ARM64vecshi_UQSHL32x4:   *nm = "uqshl ";   *ar = "4s";  return;
795      case ARM64vecshi_UQSHL16x8:   *nm = "uqshl ";   *ar = "8h";  return;
796      case ARM64vecshi_UQSHL8x16:   *nm = "uqshl ";   *ar = "16b"; return;
797      case ARM64vecshi_SQSHL64x2:   *nm = "sqshl ";   *ar = "2d";  return;
798      case ARM64vecshi_SQSHL32x4:   *nm = "sqshl ";   *ar = "4s";  return;
799      case ARM64vecshi_SQSHL16x8:   *nm = "sqshl ";   *ar = "8h";  return;
800      case ARM64vecshi_SQSHL8x16:   *nm = "sqshl ";   *ar = "16b"; return;
801      case ARM64vecshi_SQSHLU64x2:  *nm = "sqshlu";   *ar = "2d";  return;
802      case ARM64vecshi_SQSHLU32x4:  *nm = "sqshlu";   *ar = "4s";  return;
803      case ARM64vecshi_SQSHLU16x8:  *nm = "sqshlu";   *ar = "8h";  return;
804      case ARM64vecshi_SQSHLU8x16:  *nm = "sqshlu";   *ar = "16b"; return;
805      default: vpanic("showARM64VecShiftImmOp");
806   }
807}
808
809static const HChar* showARM64VecNarrowOp(ARM64VecNarrowOp op) {
810   switch (op) {
811      case ARM64vecna_XTN:    return "xtn   ";
812      case ARM64vecna_SQXTN:  return "sqxtn ";
813      case ARM64vecna_UQXTN:  return "uqxtn ";
814      case ARM64vecna_SQXTUN: return "sqxtun";
815      default: vpanic("showARM64VecNarrowOp");
816   }
817}
818
819ARM64Instr* ARM64Instr_Arith ( HReg dst,
820                               HReg argL, ARM64RIA* argR, Bool isAdd ) {
821   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
822   i->tag                 = ARM64in_Arith;
823   i->ARM64in.Arith.dst   = dst;
824   i->ARM64in.Arith.argL  = argL;
825   i->ARM64in.Arith.argR  = argR;
826   i->ARM64in.Arith.isAdd = isAdd;
827   return i;
828}
829ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
830   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
831   i->tag              = ARM64in_Cmp;
832   i->ARM64in.Cmp.argL = argL;
833   i->ARM64in.Cmp.argR = argR;
834   i->ARM64in.Cmp.is64 = is64;
835   return i;
836}
837ARM64Instr* ARM64Instr_Logic ( HReg dst,
838                               HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
839   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
840   i->tag                 = ARM64in_Logic;
841   i->ARM64in.Logic.dst   = dst;
842   i->ARM64in.Logic.argL  = argL;
843   i->ARM64in.Logic.argR  = argR;
844   i->ARM64in.Logic.op    = op;
845   return i;
846}
847ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
848   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
849   i->tag               = ARM64in_Test;
850   i->ARM64in.Test.argL = argL;
851   i->ARM64in.Test.argR = argR;
852   return i;
853}
854ARM64Instr* ARM64Instr_Shift ( HReg dst,
855                               HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
856   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
857   i->tag                = ARM64in_Shift;
858   i->ARM64in.Shift.dst  = dst;
859   i->ARM64in.Shift.argL = argL;
860   i->ARM64in.Shift.argR = argR;
861   i->ARM64in.Shift.op   = op;
862   return i;
863}
864ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
865   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
866   i->tag               = ARM64in_Unary;
867   i->ARM64in.Unary.dst = dst;
868   i->ARM64in.Unary.src = src;
869   i->ARM64in.Unary.op  = op;
870   return i;
871}
872ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
873   ARM64Instr* i      = LibVEX_Alloc_inline(sizeof(ARM64Instr));
874   i->tag             = ARM64in_MovI;
875   i->ARM64in.MovI.dst = dst;
876   i->ARM64in.MovI.src = src;
877   vassert(hregClass(src) == HRcInt64);
878   vassert(hregClass(dst) == HRcInt64);
879   return i;
880}
881ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
882   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
883   i->tag                 = ARM64in_Imm64;
884   i->ARM64in.Imm64.dst   = dst;
885   i->ARM64in.Imm64.imm64 = imm64;
886   return i;
887}
888ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
889   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
890   i->tag                   = ARM64in_LdSt64;
891   i->ARM64in.LdSt64.isLoad = isLoad;
892   i->ARM64in.LdSt64.rD     = rD;
893   i->ARM64in.LdSt64.amode  = amode;
894   return i;
895}
896ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
897   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
898   i->tag                   = ARM64in_LdSt32;
899   i->ARM64in.LdSt32.isLoad = isLoad;
900   i->ARM64in.LdSt32.rD     = rD;
901   i->ARM64in.LdSt32.amode  = amode;
902   return i;
903}
904ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
905   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
906   i->tag                   = ARM64in_LdSt16;
907   i->ARM64in.LdSt16.isLoad = isLoad;
908   i->ARM64in.LdSt16.rD     = rD;
909   i->ARM64in.LdSt16.amode  = amode;
910   return i;
911}
912ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
913   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
914   i->tag                  = ARM64in_LdSt8;
915   i->ARM64in.LdSt8.isLoad = isLoad;
916   i->ARM64in.LdSt8.rD     = rD;
917   i->ARM64in.LdSt8.amode  = amode;
918   return i;
919}
920ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
921                                 ARM64CondCode cond, Bool toFastEP ) {
922   ARM64Instr* i               = LibVEX_Alloc_inline(sizeof(ARM64Instr));
923   i->tag                      = ARM64in_XDirect;
924   i->ARM64in.XDirect.dstGA    = dstGA;
925   i->ARM64in.XDirect.amPC     = amPC;
926   i->ARM64in.XDirect.cond     = cond;
927   i->ARM64in.XDirect.toFastEP = toFastEP;
928   return i;
929}
930ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
931                                ARM64CondCode cond ) {
932   ARM64Instr* i           = LibVEX_Alloc_inline(sizeof(ARM64Instr));
933   i->tag                  = ARM64in_XIndir;
934   i->ARM64in.XIndir.dstGA = dstGA;
935   i->ARM64in.XIndir.amPC  = amPC;
936   i->ARM64in.XIndir.cond  = cond;
937   return i;
938}
939ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
940                                   ARM64CondCode cond, IRJumpKind jk ) {
941   ARM64Instr* i              = LibVEX_Alloc_inline(sizeof(ARM64Instr));
942   i->tag                     = ARM64in_XAssisted;
943   i->ARM64in.XAssisted.dstGA = dstGA;
944   i->ARM64in.XAssisted.amPC  = amPC;
945   i->ARM64in.XAssisted.cond  = cond;
946   i->ARM64in.XAssisted.jk    = jk;
947   return i;
948}
949ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
950                              ARM64CondCode cond ) {
951   ARM64Instr* i        = LibVEX_Alloc_inline(sizeof(ARM64Instr));
952   i->tag               = ARM64in_CSel;
953   i->ARM64in.CSel.dst  = dst;
954   i->ARM64in.CSel.argL = argL;
955   i->ARM64in.CSel.argR = argR;
956   i->ARM64in.CSel.cond = cond;
957   return i;
958}
959ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, Addr64 target, Int nArgRegs,
960                              RetLoc rloc ) {
961   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
962   i->tag                   = ARM64in_Call;
963   i->ARM64in.Call.cond     = cond;
964   i->ARM64in.Call.target   = target;
965   i->ARM64in.Call.nArgRegs = nArgRegs;
966   i->ARM64in.Call.rloc     = rloc;
967   vassert(is_sane_RetLoc(rloc));
968   return i;
969}
970extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
971   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
972   i->tag                  = ARM64in_AddToSP;
973   i->ARM64in.AddToSP.simm = simm;
974   vassert(-4096 < simm && simm < 4096);
975   vassert(0 == (simm & 0xF));
976   return i;
977}
978extern ARM64Instr* ARM64Instr_FromSP  ( HReg dst ) {
979   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
980   i->tag                = ARM64in_FromSP;
981   i->ARM64in.FromSP.dst = dst;
982   return i;
983}
984ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
985                             ARM64MulOp op ) {
986   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
987   i->tag              = ARM64in_Mul;
988   i->ARM64in.Mul.dst  = dst;
989   i->ARM64in.Mul.argL = argL;
990   i->ARM64in.Mul.argR = argR;
991   i->ARM64in.Mul.op   = op;
992   return i;
993}
994ARM64Instr* ARM64Instr_LdrEX ( Int szB ) {
995   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
996   i->tag               = ARM64in_LdrEX;
997   i->ARM64in.LdrEX.szB = szB;
998   vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
999   return i;
1000}
1001ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
1002   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1003   i->tag               = ARM64in_StrEX;
1004   i->ARM64in.StrEX.szB = szB;
1005   vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1006   return i;
1007}
1008ARM64Instr* ARM64Instr_MFence ( void ) {
1009   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1010   i->tag        = ARM64in_MFence;
1011   return i;
1012}
1013ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1014   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1015   i->tag                   = ARM64in_VLdStH;
1016   i->ARM64in.VLdStH.isLoad = isLoad;
1017   i->ARM64in.VLdStH.hD     = sD;
1018   i->ARM64in.VLdStH.rN     = rN;
1019   i->ARM64in.VLdStH.uimm12 = uimm12;
1020   vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
1021   return i;
1022}
1023ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1024   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1025   i->tag                   = ARM64in_VLdStS;
1026   i->ARM64in.VLdStS.isLoad = isLoad;
1027   i->ARM64in.VLdStS.sD     = sD;
1028   i->ARM64in.VLdStS.rN     = rN;
1029   i->ARM64in.VLdStS.uimm12 = uimm12;
1030   vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
1031   return i;
1032}
1033ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
1034   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1035   i->tag                   = ARM64in_VLdStD;
1036   i->ARM64in.VLdStD.isLoad = isLoad;
1037   i->ARM64in.VLdStD.dD     = dD;
1038   i->ARM64in.VLdStD.rN     = rN;
1039   i->ARM64in.VLdStD.uimm12 = uimm12;
1040   vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
1041   return i;
1042}
1043ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
1044   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1045   i->tag                   = ARM64in_VLdStQ;
1046   i->ARM64in.VLdStQ.isLoad = isLoad;
1047   i->ARM64in.VLdStQ.rQ     = rQ;
1048   i->ARM64in.VLdStQ.rN     = rN;
1049   return i;
1050}
1051ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
1052   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1053   i->tag                 = ARM64in_VCvtI2F;
1054   i->ARM64in.VCvtI2F.how = how;
1055   i->ARM64in.VCvtI2F.rD  = rD;
1056   i->ARM64in.VCvtI2F.rS  = rS;
1057   return i;
1058}
1059ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
1060                                 UChar armRM ) {
1061   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1062   i->tag                   = ARM64in_VCvtF2I;
1063   i->ARM64in.VCvtF2I.how   = how;
1064   i->ARM64in.VCvtF2I.rD    = rD;
1065   i->ARM64in.VCvtF2I.rS    = rS;
1066   i->ARM64in.VCvtF2I.armRM = armRM;
1067   vassert(armRM <= 3);
1068   return i;
1069}
1070ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1071   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1072   i->tag                 = ARM64in_VCvtSD;
1073   i->ARM64in.VCvtSD.sToD = sToD;
1074   i->ARM64in.VCvtSD.dst  = dst;
1075   i->ARM64in.VCvtSD.src  = src;
1076   return i;
1077}
1078ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ) {
1079   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1080   i->tag                 = ARM64in_VCvtHS;
1081   i->ARM64in.VCvtHS.hToS = hToS;
1082   i->ARM64in.VCvtHS.dst  = dst;
1083   i->ARM64in.VCvtHS.src  = src;
1084   return i;
1085}
1086ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ) {
1087   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1088   i->tag                 = ARM64in_VCvtHD;
1089   i->ARM64in.VCvtHD.hToD = hToD;
1090   i->ARM64in.VCvtHD.dst  = dst;
1091   i->ARM64in.VCvtHD.src  = src;
1092   return i;
1093}
1094ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1095   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1096   i->tag                 = ARM64in_VUnaryD;
1097   i->ARM64in.VUnaryD.op  = op;
1098   i->ARM64in.VUnaryD.dst = dst;
1099   i->ARM64in.VUnaryD.src = src;
1100   return i;
1101}
1102ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1103   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1104   i->tag                 = ARM64in_VUnaryS;
1105   i->ARM64in.VUnaryS.op  = op;
1106   i->ARM64in.VUnaryS.dst = dst;
1107   i->ARM64in.VUnaryS.src = src;
1108   return i;
1109}
1110ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
1111                               HReg dst, HReg argL, HReg argR ) {
1112   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1113   i->tag                = ARM64in_VBinD;
1114   i->ARM64in.VBinD.op   = op;
1115   i->ARM64in.VBinD.dst  = dst;
1116   i->ARM64in.VBinD.argL = argL;
1117   i->ARM64in.VBinD.argR = argR;
1118   return i;
1119}
1120ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
1121                               HReg dst, HReg argL, HReg argR ) {
1122   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1123   i->tag                = ARM64in_VBinS;
1124   i->ARM64in.VBinS.op   = op;
1125   i->ARM64in.VBinS.dst  = dst;
1126   i->ARM64in.VBinS.argL = argL;
1127   i->ARM64in.VBinS.argR = argR;
1128   return i;
1129}
1130ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
1131   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1132   i->tag                = ARM64in_VCmpD;
1133   i->ARM64in.VCmpD.argL = argL;
1134   i->ARM64in.VCmpD.argR = argR;
1135   return i;
1136}
1137ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
1138   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1139   i->tag                = ARM64in_VCmpS;
1140   i->ARM64in.VCmpS.argL = argL;
1141   i->ARM64in.VCmpS.argR = argR;
1142   return i;
1143}
1144ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR,
1145                                ARM64CondCode cond, Bool isD ) {
1146   ARM64Instr* i          = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1147   i->tag                 = ARM64in_VFCSel;
1148   i->ARM64in.VFCSel.dst  = dst;
1149   i->ARM64in.VFCSel.argL = argL;
1150   i->ARM64in.VFCSel.argR = argR;
1151   i->ARM64in.VFCSel.cond = cond;
1152   i->ARM64in.VFCSel.isD  = isD;
1153   return i;
1154}
1155ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
1156   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1157   i->tag                 = ARM64in_FPCR;
1158   i->ARM64in.FPCR.toFPCR = toFPCR;
1159   i->ARM64in.FPCR.iReg   = iReg;
1160   return i;
1161}
1162ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg ) {
1163   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1164   i->tag                 = ARM64in_FPSR;
1165   i->ARM64in.FPSR.toFPSR = toFPSR;
1166   i->ARM64in.FPSR.iReg   = iReg;
1167   return i;
1168}
1169ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op,
1170                               HReg dst, HReg argL, HReg argR ) {
1171   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1172   i->tag                = ARM64in_VBinV;
1173   i->ARM64in.VBinV.op   = op;
1174   i->ARM64in.VBinV.dst  = dst;
1175   i->ARM64in.VBinV.argL = argL;
1176   i->ARM64in.VBinV.argR = argR;
1177   return i;
1178}
1179ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp op, HReg mod, HReg arg ) {
1180   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1181   i->tag                  = ARM64in_VModifyV;
1182   i->ARM64in.VModifyV.op  = op;
1183   i->ARM64in.VModifyV.mod = mod;
1184   i->ARM64in.VModifyV.arg = arg;
1185   return i;
1186}
1187ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
1188   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1189   i->tag                 = ARM64in_VUnaryV;
1190   i->ARM64in.VUnaryV.op  = op;
1191   i->ARM64in.VUnaryV.dst = dst;
1192   i->ARM64in.VUnaryV.arg = arg;
1193   return i;
1194}
1195ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op,
1196                                  UInt dszBlg2, HReg dst, HReg src ) {
1197   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1198   i->tag                      = ARM64in_VNarrowV;
1199   i->ARM64in.VNarrowV.op      = op;
1200   i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
1201   i->ARM64in.VNarrowV.dst     = dst;
1202   i->ARM64in.VNarrowV.src     = src;
1203   vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
1204   return i;
1205}
1206ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op,
1207                                    HReg dst, HReg src, UInt amt ) {
1208   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1209   i->tag                    = ARM64in_VShiftImmV;
1210   i->ARM64in.VShiftImmV.op  = op;
1211   i->ARM64in.VShiftImmV.dst = dst;
1212   i->ARM64in.VShiftImmV.src = src;
1213   i->ARM64in.VShiftImmV.amt = amt;
1214   UInt minSh = 0;
1215   UInt maxSh = 0;
1216   switch (op) {
1217      /* For right shifts, the allowed shift amounts are 1 .. lane_size.
1218         For left shifts,  the allowed shift amounts are 0 .. lane_size-1.
1219      */
1220      case ARM64vecshi_USHR64x2: case ARM64vecshi_SSHR64x2:
1221      case ARM64vecshi_UQSHRN2SD: case ARM64vecshi_SQSHRN2SD:
1222      case ARM64vecshi_SQSHRUN2SD:
1223      case ARM64vecshi_UQRSHRN2SD: case ARM64vecshi_SQRSHRN2SD:
1224      case ARM64vecshi_SQRSHRUN2SD:
1225         minSh = 1; maxSh = 64; break;
1226      case ARM64vecshi_SHL64x2:
1227      case ARM64vecshi_UQSHL64x2: case ARM64vecshi_SQSHL64x2:
1228      case ARM64vecshi_SQSHLU64x2:
1229         minSh = 0; maxSh = 63; break;
1230      case ARM64vecshi_USHR32x4: case ARM64vecshi_SSHR32x4:
1231      case ARM64vecshi_UQSHRN4HS: case ARM64vecshi_SQSHRN4HS:
1232      case ARM64vecshi_SQSHRUN4HS:
1233      case ARM64vecshi_UQRSHRN4HS: case ARM64vecshi_SQRSHRN4HS:
1234      case ARM64vecshi_SQRSHRUN4HS:
1235         minSh = 1; maxSh = 32; break;
1236      case ARM64vecshi_SHL32x4:
1237      case ARM64vecshi_UQSHL32x4: case ARM64vecshi_SQSHL32x4:
1238      case ARM64vecshi_SQSHLU32x4:
1239         minSh = 0; maxSh = 31; break;
1240      case ARM64vecshi_USHR16x8: case ARM64vecshi_SSHR16x8:
1241      case ARM64vecshi_UQSHRN8BH: case ARM64vecshi_SQSHRN8BH:
1242      case ARM64vecshi_SQSHRUN8BH:
1243      case ARM64vecshi_UQRSHRN8BH: case ARM64vecshi_SQRSHRN8BH:
1244      case ARM64vecshi_SQRSHRUN8BH:
1245         minSh = 1; maxSh = 16; break;
1246      case ARM64vecshi_SHL16x8:
1247      case ARM64vecshi_UQSHL16x8: case ARM64vecshi_SQSHL16x8:
1248      case ARM64vecshi_SQSHLU16x8:
1249         minSh = 0; maxSh = 15; break;
1250      case ARM64vecshi_USHR8x16: case ARM64vecshi_SSHR8x16:
1251         minSh = 1; maxSh = 8; break;
1252      case ARM64vecshi_SHL8x16:
1253      case ARM64vecshi_UQSHL8x16: case ARM64vecshi_SQSHL8x16:
1254      case ARM64vecshi_SQSHLU8x16:
1255         minSh = 0; maxSh = 7; break;
1256      default:
1257         vassert(0);
1258   }
1259   vassert(maxSh > 0);
1260   vassert(amt >= minSh && amt <= maxSh);
1261   return i;
1262}
1263ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
1264   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1265   i->tag                 = ARM64in_VExtV;
1266   i->ARM64in.VExtV.dst   = dst;
1267   i->ARM64in.VExtV.srcLo = srcLo;
1268   i->ARM64in.VExtV.srcHi = srcHi;
1269   i->ARM64in.VExtV.amtB  = amtB;
1270   vassert(amtB >= 1 && amtB <= 15);
1271   return i;
1272}
1273ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
1274   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1275   i->tag               = ARM64in_VImmQ;
1276   i->ARM64in.VImmQ.rQ  = rQ;
1277   i->ARM64in.VImmQ.imm = imm;
1278   /* Check that this is something that can actually be emitted. */
1279   switch (imm) {
1280      case 0x0000: case 0x0001: case 0x0003:
1281      case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
1282         break;
1283      default:
1284         vassert(0);
1285   }
1286   return i;
1287}
1288ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
1289   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1290   i->tag                = ARM64in_VDfromX;
1291   i->ARM64in.VDfromX.rD = rD;
1292   i->ARM64in.VDfromX.rX = rX;
1293   return i;
1294}
1295ARM64Instr* ARM64Instr_VQfromX ( HReg rQ, HReg rXlo ) {
1296   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1297   i->tag                  = ARM64in_VQfromX;
1298   i->ARM64in.VQfromX.rQ   = rQ;
1299   i->ARM64in.VQfromX.rXlo = rXlo;
1300   return i;
1301}
1302ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
1303   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1304   i->tag                   = ARM64in_VQfromXX;
1305   i->ARM64in.VQfromXX.rQ   = rQ;
1306   i->ARM64in.VQfromXX.rXhi = rXhi;
1307   i->ARM64in.VQfromXX.rXlo = rXlo;
1308   return i;
1309}
1310ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
1311   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1312   i->tag                    = ARM64in_VXfromQ;
1313   i->ARM64in.VXfromQ.rX     = rX;
1314   i->ARM64in.VXfromQ.rQ     = rQ;
1315   i->ARM64in.VXfromQ.laneNo = laneNo;
1316   vassert(laneNo <= 1);
1317   return i;
1318}
1319ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) {
1320   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1321   i->tag                      = ARM64in_VXfromDorS;
1322   i->ARM64in.VXfromDorS.rX    = rX;
1323   i->ARM64in.VXfromDorS.rDorS = rDorS;
1324   i->ARM64in.VXfromDorS.fromD = fromD;
1325   return i;
1326}
1327ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
1328   ARM64Instr* i       = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1329   i->tag              = ARM64in_VMov;
1330   i->ARM64in.VMov.szB = szB;
1331   i->ARM64in.VMov.dst = dst;
1332   i->ARM64in.VMov.src = src;
1333   switch (szB) {
1334      case 16:
1335        vassert(hregClass(src) == HRcVec128);
1336        vassert(hregClass(dst) == HRcVec128);
1337        break;
1338      case 8:
1339        vassert(hregClass(src) == HRcFlt64);
1340        vassert(hregClass(dst) == HRcFlt64);
1341        break;
1342      default:
1343        vpanic("ARM64Instr_VMov");
1344   }
1345   return i;
1346}
1347ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
1348                                 ARM64AMode* amFailAddr ) {
1349   ARM64Instr* i                 = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1350   i->tag                        = ARM64in_EvCheck;
1351   i->ARM64in.EvCheck.amCounter  = amCounter;
1352   i->ARM64in.EvCheck.amFailAddr = amFailAddr;
1353   return i;
1354}
1355ARM64Instr* ARM64Instr_ProfInc ( void ) {
1356   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1357   i->tag        = ARM64in_ProfInc;
1358   return i;
1359}
1360
1361/* ... */
1362
1363void ppARM64Instr ( const ARM64Instr* i ) {
1364   switch (i->tag) {
1365      case ARM64in_Arith:
1366         vex_printf("%s    ", i->ARM64in.Arith.isAdd ? "add" : "sub");
1367         ppHRegARM64(i->ARM64in.Arith.dst);
1368         vex_printf(", ");
1369         ppHRegARM64(i->ARM64in.Arith.argL);
1370         vex_printf(", ");
1371         ppARM64RIA(i->ARM64in.Arith.argR);
1372         return;
1373      case ARM64in_Cmp:
1374         vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? "   " : "(w)" );
1375         ppHRegARM64(i->ARM64in.Cmp.argL);
1376         vex_printf(", ");
1377         ppARM64RIA(i->ARM64in.Cmp.argR);
1378         return;
1379      case ARM64in_Logic:
1380         vex_printf("%s    ", showARM64LogicOp(i->ARM64in.Logic.op));
1381         ppHRegARM64(i->ARM64in.Logic.dst);
1382         vex_printf(", ");
1383         ppHRegARM64(i->ARM64in.Logic.argL);
1384         vex_printf(", ");
1385         ppARM64RIL(i->ARM64in.Logic.argR);
1386         return;
1387      case ARM64in_Test:
1388         vex_printf("tst    ");
1389         ppHRegARM64(i->ARM64in.Test.argL);
1390         vex_printf(", ");
1391         ppARM64RIL(i->ARM64in.Test.argR);
1392         return;
1393      case ARM64in_Shift:
1394         vex_printf("%s    ", showARM64ShiftOp(i->ARM64in.Shift.op));
1395         ppHRegARM64(i->ARM64in.Shift.dst);
1396         vex_printf(", ");
1397         ppHRegARM64(i->ARM64in.Shift.argL);
1398         vex_printf(", ");
1399         ppARM64RI6(i->ARM64in.Shift.argR);
1400         return;
1401      case ARM64in_Unary:
1402         vex_printf("%s    ", showARM64UnaryOp(i->ARM64in.Unary.op));
1403         ppHRegARM64(i->ARM64in.Unary.dst);
1404         vex_printf(", ");
1405         ppHRegARM64(i->ARM64in.Unary.src);
1406         return;
1407      case ARM64in_MovI:
1408         vex_printf("mov    ");
1409         ppHRegARM64(i->ARM64in.MovI.dst);
1410         vex_printf(", ");
1411         ppHRegARM64(i->ARM64in.MovI.src);
1412         return;
1413      case ARM64in_Imm64:
1414         vex_printf("imm64  ");
1415         ppHRegARM64(i->ARM64in.Imm64.dst);
1416         vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
1417         return;
1418      case ARM64in_LdSt64:
1419         if (i->ARM64in.LdSt64.isLoad) {
1420            vex_printf("ldr    ");
1421            ppHRegARM64(i->ARM64in.LdSt64.rD);
1422            vex_printf(", ");
1423            ppARM64AMode(i->ARM64in.LdSt64.amode);
1424         } else {
1425            vex_printf("str    ");
1426            ppARM64AMode(i->ARM64in.LdSt64.amode);
1427            vex_printf(", ");
1428            ppHRegARM64(i->ARM64in.LdSt64.rD);
1429         }
1430         return;
1431      case ARM64in_LdSt32:
1432         if (i->ARM64in.LdSt32.isLoad) {
1433            vex_printf("ldruw  ");
1434            ppHRegARM64(i->ARM64in.LdSt32.rD);
1435            vex_printf(", ");
1436            ppARM64AMode(i->ARM64in.LdSt32.amode);
1437         } else {
1438            vex_printf("strw   ");
1439            ppARM64AMode(i->ARM64in.LdSt32.amode);
1440            vex_printf(", ");
1441            ppHRegARM64(i->ARM64in.LdSt32.rD);
1442         }
1443         return;
1444      case ARM64in_LdSt16:
1445         if (i->ARM64in.LdSt16.isLoad) {
1446            vex_printf("ldruh  ");
1447            ppHRegARM64(i->ARM64in.LdSt16.rD);
1448            vex_printf(", ");
1449            ppARM64AMode(i->ARM64in.LdSt16.amode);
1450         } else {
1451            vex_printf("strh   ");
1452            ppARM64AMode(i->ARM64in.LdSt16.amode);
1453            vex_printf(", ");
1454            ppHRegARM64(i->ARM64in.LdSt16.rD);
1455         }
1456         return;
1457      case ARM64in_LdSt8:
1458         if (i->ARM64in.LdSt8.isLoad) {
1459            vex_printf("ldrub  ");
1460            ppHRegARM64(i->ARM64in.LdSt8.rD);
1461            vex_printf(", ");
1462            ppARM64AMode(i->ARM64in.LdSt8.amode);
1463         } else {
1464            vex_printf("strb   ");
1465            ppARM64AMode(i->ARM64in.LdSt8.amode);
1466            vex_printf(", ");
1467            ppHRegARM64(i->ARM64in.LdSt8.rD);
1468         }
1469         return;
1470      case ARM64in_XDirect:
1471         vex_printf("(xDirect) ");
1472         vex_printf("if (%%pstate.%s) { ",
1473                    showARM64CondCode(i->ARM64in.XDirect.cond));
1474         vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
1475         vex_printf("str x9,");
1476         ppARM64AMode(i->ARM64in.XDirect.amPC);
1477         vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
1478                    i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
1479         vex_printf("blr x9 }");
1480         return;
1481      case ARM64in_XIndir:
1482         vex_printf("(xIndir) ");
1483         vex_printf("if (%%pstate.%s) { ",
1484                    showARM64CondCode(i->ARM64in.XIndir.cond));
1485         vex_printf("str ");
1486         ppHRegARM64(i->ARM64in.XIndir.dstGA);
1487         vex_printf(",");
1488         ppARM64AMode(i->ARM64in.XIndir.amPC);
1489         vex_printf("; imm64 x9,$disp_cp_xindir; ");
1490         vex_printf("br x9 }");
1491         return;
1492      case ARM64in_XAssisted:
1493         vex_printf("(xAssisted) ");
1494         vex_printf("if (%%pstate.%s) { ",
1495                    showARM64CondCode(i->ARM64in.XAssisted.cond));
1496         vex_printf("str ");
1497         ppHRegARM64(i->ARM64in.XAssisted.dstGA);
1498         vex_printf(",");
1499         ppARM64AMode(i->ARM64in.XAssisted.amPC);
1500         vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
1501                    (Int)i->ARM64in.XAssisted.jk);
1502         vex_printf("imm64 x9,$disp_cp_xassisted; ");
1503         vex_printf("br x9 }");
1504         return;
1505      case ARM64in_CSel:
1506         vex_printf("csel   ");
1507         ppHRegARM64(i->ARM64in.CSel.dst);
1508         vex_printf(", ");
1509         ppHRegARM64(i->ARM64in.CSel.argL);
1510         vex_printf(", ");
1511         ppHRegARM64(i->ARM64in.CSel.argR);
1512         vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
1513         return;
1514      case ARM64in_Call:
1515         vex_printf("call%s ",
1516                    i->ARM64in.Call.cond==ARM64cc_AL
1517                       ? "  " : showARM64CondCode(i->ARM64in.Call.cond));
1518         vex_printf("0x%llx [nArgRegs=%d, ",
1519                    i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
1520         ppRetLoc(i->ARM64in.Call.rloc);
1521         vex_printf("]");
1522         return;
1523      case ARM64in_AddToSP: {
1524         Int simm = i->ARM64in.AddToSP.simm;
1525         vex_printf("%s    xsp, xsp, #%d", simm < 0 ? "sub" : "add",
1526                                           simm < 0 ? -simm : simm);
1527         return;
1528      }
1529      case ARM64in_FromSP:
1530         vex_printf("mov    ");
1531         ppHRegARM64(i->ARM64in.FromSP.dst);
1532         vex_printf(", xsp");
1533         return;
1534      case ARM64in_Mul:
1535         vex_printf("%s  ", showARM64MulOp(i->ARM64in.Mul.op));
1536         ppHRegARM64(i->ARM64in.Mul.dst);
1537         vex_printf(", ");
1538         ppHRegARM64(i->ARM64in.Mul.argL);
1539         vex_printf(", ");
1540         ppHRegARM64(i->ARM64in.Mul.argR);
1541         return;
1542
1543      case ARM64in_LdrEX: {
1544         const HChar* sz = " ";
1545         switch (i->ARM64in.LdrEX.szB) {
1546            case 1: sz = "b"; break;
1547            case 2: sz = "h"; break;
1548            case 4: case 8: break;
1549            default: vassert(0);
1550         }
1551         vex_printf("ldxr%s  %c2, [x4]",
1552                    sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w');
1553         return;
1554      }
1555      case ARM64in_StrEX: {
1556         const HChar* sz = " ";
1557         switch (i->ARM64in.StrEX.szB) {
1558            case 1: sz = "b"; break;
1559            case 2: sz = "h"; break;
1560            case 4: case 8: break;
1561            default: vassert(0);
1562         }
1563         vex_printf("stxr%s  w0, %c2, [x4]",
1564                    sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
1565         return;
1566      }
1567      case ARM64in_MFence:
1568         vex_printf("(mfence) dsb sy; dmb sy; isb");
1569         return;
1570      case ARM64in_VLdStH:
1571         if (i->ARM64in.VLdStH.isLoad) {
1572            vex_printf("ldr    ");
1573            ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1574            vex_printf(", %u(", i->ARM64in.VLdStH.uimm12);
1575            ppHRegARM64(i->ARM64in.VLdStH.rN);
1576            vex_printf(")");
1577         } else {
1578            vex_printf("str    ");
1579            vex_printf("%u(", i->ARM64in.VLdStH.uimm12);
1580            ppHRegARM64(i->ARM64in.VLdStH.rN);
1581            vex_printf("), ");
1582            ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1583         }
1584         return;
1585      case ARM64in_VLdStS:
1586         if (i->ARM64in.VLdStS.isLoad) {
1587            vex_printf("ldr    ");
1588            ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1589            vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
1590            ppHRegARM64(i->ARM64in.VLdStS.rN);
1591            vex_printf(")");
1592         } else {
1593            vex_printf("str    ");
1594            vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
1595            ppHRegARM64(i->ARM64in.VLdStS.rN);
1596            vex_printf("), ");
1597            ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1598         }
1599         return;
1600      case ARM64in_VLdStD:
1601         if (i->ARM64in.VLdStD.isLoad) {
1602            vex_printf("ldr    ");
1603            ppHRegARM64(i->ARM64in.VLdStD.dD);
1604            vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
1605            ppHRegARM64(i->ARM64in.VLdStD.rN);
1606            vex_printf(")");
1607         } else {
1608            vex_printf("str    ");
1609            vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
1610            ppHRegARM64(i->ARM64in.VLdStD.rN);
1611            vex_printf("), ");
1612            ppHRegARM64(i->ARM64in.VLdStD.dD);
1613         }
1614         return;
1615      case ARM64in_VLdStQ:
1616         if (i->ARM64in.VLdStQ.isLoad)
1617            vex_printf("ld1.2d {");
1618         else
1619            vex_printf("st1.2d {");
1620         ppHRegARM64(i->ARM64in.VLdStQ.rQ);
1621         vex_printf("}, [");
1622         ppHRegARM64(i->ARM64in.VLdStQ.rN);
1623         vex_printf("]");
1624         return;
1625      case ARM64in_VCvtI2F: {
1626         HChar syn  = '?';
1627         UInt  fszB = 0;
1628         UInt  iszB = 0;
1629         characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
1630         vex_printf("%ccvtf  ", syn);
1631         ppHRegARM64(i->ARM64in.VCvtI2F.rD);
1632         vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
1633         ppHRegARM64(i->ARM64in.VCvtI2F.rS);
1634         vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
1635         return;
1636      }
1637      case ARM64in_VCvtF2I: {
1638         HChar syn  = '?';
1639         UInt  fszB = 0;
1640         UInt  iszB = 0;
1641         HChar rmo  = '?';
1642         characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
1643         UChar armRM = i->ARM64in.VCvtF2I.armRM;
1644         if (armRM < 4) rmo = "npmz"[armRM];
1645         vex_printf("fcvt%c%c ", rmo, syn);
1646         ppHRegARM64(i->ARM64in.VCvtF2I.rD);
1647         vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
1648         ppHRegARM64(i->ARM64in.VCvtF2I.rS);
1649         vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
1650         return;
1651      }
1652      case ARM64in_VCvtSD:
1653         vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
1654         if (i->ARM64in.VCvtSD.sToD) {
1655            ppHRegARM64(i->ARM64in.VCvtSD.dst);
1656            vex_printf(", ");
1657            ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
1658         } else {
1659            ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
1660            vex_printf(", ");
1661            ppHRegARM64(i->ARM64in.VCvtSD.src);
1662         }
1663         return;
1664      case ARM64in_VCvtHS:
1665         vex_printf("fcvt%s ", i->ARM64in.VCvtHS.hToS ? "h2s" : "s2h");
1666         if (i->ARM64in.VCvtHS.hToS) {
1667            ppHRegARM64asSreg(i->ARM64in.VCvtHS.dst);
1668            vex_printf(", ");
1669            ppHRegARM64asHreg(i->ARM64in.VCvtHS.src);
1670         } else {
1671            ppHRegARM64asHreg(i->ARM64in.VCvtHS.dst);
1672            vex_printf(", ");
1673            ppHRegARM64asSreg(i->ARM64in.VCvtHS.src);
1674         }
1675         return;
1676      case ARM64in_VCvtHD:
1677         vex_printf("fcvt%s ", i->ARM64in.VCvtHD.hToD ? "h2d" : "d2h");
1678         if (i->ARM64in.VCvtHD.hToD) {
1679            ppHRegARM64(i->ARM64in.VCvtHD.dst);
1680            vex_printf(", ");
1681            ppHRegARM64asHreg(i->ARM64in.VCvtHD.src);
1682         } else {
1683            ppHRegARM64asHreg(i->ARM64in.VCvtHD.dst);
1684            vex_printf(", ");
1685            ppHRegARM64(i->ARM64in.VCvtHD.src);
1686         }
1687         return;
1688      case ARM64in_VUnaryD:
1689         vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
1690         ppHRegARM64(i->ARM64in.VUnaryD.dst);
1691         vex_printf(", ");
1692         ppHRegARM64(i->ARM64in.VUnaryD.src);
1693         return;
1694      case ARM64in_VUnaryS:
1695         vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
1696         ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
1697         vex_printf(", ");
1698         ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
1699         return;
1700      case ARM64in_VBinD:
1701         vex_printf("f%s   ", showARM64FpBinOp(i->ARM64in.VBinD.op));
1702         ppHRegARM64(i->ARM64in.VBinD.dst);
1703         vex_printf(", ");
1704         ppHRegARM64(i->ARM64in.VBinD.argL);
1705         vex_printf(", ");
1706         ppHRegARM64(i->ARM64in.VBinD.argR);
1707         return;
1708      case ARM64in_VBinS:
1709         vex_printf("f%s   ", showARM64FpBinOp(i->ARM64in.VBinS.op));
1710         ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
1711         vex_printf(", ");
1712         ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
1713         vex_printf(", ");
1714         ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
1715         return;
1716      case ARM64in_VCmpD:
1717         vex_printf("fcmp   ");
1718         ppHRegARM64(i->ARM64in.VCmpD.argL);
1719         vex_printf(", ");
1720         ppHRegARM64(i->ARM64in.VCmpD.argR);
1721         return;
1722      case ARM64in_VCmpS:
1723         vex_printf("fcmp   ");
1724         ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
1725         vex_printf(", ");
1726         ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
1727         return;
1728      case ARM64in_VFCSel: {
1729         void (*ppHRegARM64fp)(HReg)
1730            = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg);
1731         vex_printf("fcsel  ");
1732         ppHRegARM64fp(i->ARM64in.VFCSel.dst);
1733         vex_printf(", ");
1734         ppHRegARM64fp(i->ARM64in.VFCSel.argL);
1735         vex_printf(", ");
1736         ppHRegARM64fp(i->ARM64in.VFCSel.argR);
1737         vex_printf(", %s", showARM64CondCode(i->ARM64in.VFCSel.cond));
1738         return;
1739      }
1740      case ARM64in_FPCR:
1741         if (i->ARM64in.FPCR.toFPCR) {
1742            vex_printf("msr    fpcr, ");
1743            ppHRegARM64(i->ARM64in.FPCR.iReg);
1744         } else {
1745            vex_printf("mrs    ");
1746            ppHRegARM64(i->ARM64in.FPCR.iReg);
1747            vex_printf(", fpcr");
1748         }
1749         return;
1750      case ARM64in_FPSR:
1751         if (i->ARM64in.FPSR.toFPSR) {
1752            vex_printf("msr    fpsr, ");
1753            ppHRegARM64(i->ARM64in.FPSR.iReg);
1754         } else {
1755            vex_printf("mrs    ");
1756            ppHRegARM64(i->ARM64in.FPSR.iReg);
1757            vex_printf(", fpsr");
1758         }
1759         return;
1760      case ARM64in_VBinV: {
1761         const HChar* nm = "??";
1762         const HChar* ar = "??";
1763         showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op);
1764         vex_printf("%s ", nm);
1765         ppHRegARM64(i->ARM64in.VBinV.dst);
1766         vex_printf(".%s, ", ar);
1767         ppHRegARM64(i->ARM64in.VBinV.argL);
1768         vex_printf(".%s, ", ar);
1769         ppHRegARM64(i->ARM64in.VBinV.argR);
1770         vex_printf(".%s", ar);
1771         return;
1772      }
1773      case ARM64in_VModifyV: {
1774         const HChar* nm = "??";
1775         const HChar* ar = "??";
1776         showARM64VecModifyOp(&nm, &ar, i->ARM64in.VModifyV.op);
1777         vex_printf("%s ", nm);
1778         ppHRegARM64(i->ARM64in.VModifyV.mod);
1779         vex_printf(".%s, ", ar);
1780         ppHRegARM64(i->ARM64in.VModifyV.arg);
1781         vex_printf(".%s", ar);
1782         return;
1783      }
1784      case ARM64in_VUnaryV: {
1785         const HChar* nm = "??";
1786         const HChar* ar = "??";
1787         showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op);
1788         vex_printf("%s  ", nm);
1789         ppHRegARM64(i->ARM64in.VUnaryV.dst);
1790         vex_printf(".%s, ", ar);
1791         ppHRegARM64(i->ARM64in.VUnaryV.arg);
1792         vex_printf(".%s", ar);
1793         return;
1794      }
1795      case ARM64in_VNarrowV: {
1796         UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
1797         const HChar* darr[3] = { "8b", "4h", "2s" };
1798         const HChar* sarr[3] = { "8h", "4s", "2d" };
1799         const HChar* nm = showARM64VecNarrowOp(i->ARM64in.VNarrowV.op);
1800         vex_printf("%s ", nm);
1801         ppHRegARM64(i->ARM64in.VNarrowV.dst);
1802         vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
1803         ppHRegARM64(i->ARM64in.VNarrowV.src);
1804         vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??");
1805         return;
1806      }
1807      case ARM64in_VShiftImmV: {
1808         const HChar* nm = "??";
1809         const HChar* ar = "??";
1810         showARM64VecShiftImmOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
1811         vex_printf("%s ", nm);
1812         ppHRegARM64(i->ARM64in.VShiftImmV.dst);
1813         vex_printf(".%s, ", ar);
1814         ppHRegARM64(i->ARM64in.VShiftImmV.src);
1815         vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
1816         return;
1817      }
1818      case ARM64in_VExtV: {
1819         vex_printf("ext    ");
1820         ppHRegARM64(i->ARM64in.VExtV.dst);
1821         vex_printf(".16b, ");
1822         ppHRegARM64(i->ARM64in.VExtV.srcLo);
1823         vex_printf(".16b, ");
1824         ppHRegARM64(i->ARM64in.VExtV.srcHi);
1825         vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
1826         return;
1827      }
1828      case ARM64in_VImmQ:
1829         vex_printf("qimm   ");
1830         ppHRegARM64(i->ARM64in.VImmQ.rQ);
1831         vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
1832         return;
1833      case ARM64in_VDfromX:
1834         vex_printf("fmov   ");
1835         ppHRegARM64(i->ARM64in.VDfromX.rD);
1836         vex_printf(", ");
1837         ppHRegARM64(i->ARM64in.VDfromX.rX);
1838         return;
1839      case ARM64in_VQfromX:
1840         vex_printf("fmov   ");
1841         ppHRegARM64(i->ARM64in.VQfromX.rQ);
1842         vex_printf(".d[0], ");
1843         ppHRegARM64(i->ARM64in.VQfromX.rXlo);
1844         return;
1845      case ARM64in_VQfromXX:
1846         vex_printf("qFromXX ");
1847         ppHRegARM64(i->ARM64in.VQfromXX.rQ);
1848         vex_printf(", ");
1849         ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
1850         vex_printf(", ");
1851         ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
1852         return;
1853      case ARM64in_VXfromQ:
1854         vex_printf("fmov   ");
1855         ppHRegARM64(i->ARM64in.VXfromQ.rX);
1856         vex_printf(", ");
1857         ppHRegARM64(i->ARM64in.VXfromQ.rQ);
1858         vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
1859         return;
1860      case ARM64in_VXfromDorS:
1861         vex_printf("fmov   ");
1862         ppHRegARM64(i->ARM64in.VXfromDorS.rX);
1863         vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W');
1864         ppHRegARM64(i->ARM64in.VXfromDorS.rDorS);
1865         vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S');
1866         return;
1867      case ARM64in_VMov: {
1868         UChar aux = '?';
1869         switch (i->ARM64in.VMov.szB) {
1870            case 16: aux = 'q'; break;
1871            case 8:  aux = 'd'; break;
1872            case 4:  aux = 's'; break;
1873            default: break;
1874         }
1875         vex_printf("mov(%c) ", aux);
1876         ppHRegARM64(i->ARM64in.VMov.dst);
1877         vex_printf(", ");
1878         ppHRegARM64(i->ARM64in.VMov.src);
1879         return;
1880      }
1881      case ARM64in_EvCheck:
1882         vex_printf("(evCheck) ldr w9,");
1883         ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1884         vex_printf("; subs w9,w9,$1; str w9,");
1885         ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1886         vex_printf("; bpl nofail; ldr x9,");
1887         ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
1888         vex_printf("; br x9; nofail:");
1889         return;
1890      case ARM64in_ProfInc:
1891         vex_printf("(profInc) imm64-fixed4 x9,$NotKnownYet; "
1892                    "ldr x8,[x9]; add x8,x8,#1, str x8,[x9]");
1893         return;
1894      default:
1895         vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
1896         vpanic("ppARM64Instr(1)");
1897         return;
1898   }
1899}
1900
1901
1902/* --------- Helpers for register allocation. --------- */
1903
1904void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
1905{
1906   vassert(mode64 == True);
1907   initHRegUsage(u);
1908   switch (i->tag) {
1909      case ARM64in_Arith:
1910         addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
1911         addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
1912         addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
1913         return;
1914      case ARM64in_Cmp:
1915         addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
1916         addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
1917         return;
1918      case ARM64in_Logic:
1919         addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
1920         addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
1921         addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
1922         return;
1923      case ARM64in_Test:
1924         addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
1925         addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
1926         return;
1927      case ARM64in_Shift:
1928         addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
1929         addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
1930         addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
1931         return;
1932      case ARM64in_Unary:
1933         addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
1934         addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
1935         return;
1936      case ARM64in_MovI:
1937         addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
1938         addHRegUse(u, HRmRead,  i->ARM64in.MovI.src);
1939         return;
1940      case ARM64in_Imm64:
1941         addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
1942         return;
1943      case ARM64in_LdSt64:
1944         addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
1945         if (i->ARM64in.LdSt64.isLoad) {
1946            addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
1947         } else {
1948            addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
1949         }
1950         return;
1951      case ARM64in_LdSt32:
1952         addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
1953         if (i->ARM64in.LdSt32.isLoad) {
1954            addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
1955         } else {
1956            addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
1957         }
1958         return;
1959      case ARM64in_LdSt16:
1960         addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
1961         if (i->ARM64in.LdSt16.isLoad) {
1962            addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
1963         } else {
1964            addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
1965         }
1966         return;
1967      case ARM64in_LdSt8:
1968         addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
1969         if (i->ARM64in.LdSt8.isLoad) {
1970            addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
1971         } else {
1972            addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
1973         }
1974         return;
1975      /* XDirect/XIndir/XAssisted are also a bit subtle.  They
1976         conditionally exit the block.  Hence we only need to list (1)
1977         the registers that they read, and (2) the registers that they
1978         write in the case where the block is not exited.  (2) is
1979         empty, hence only (1) is relevant here. */
1980      case ARM64in_XDirect:
1981         addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
1982         return;
1983      case ARM64in_XIndir:
1984         addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
1985         addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
1986         return;
1987      case ARM64in_XAssisted:
1988         addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
1989         addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
1990         return;
1991      case ARM64in_CSel:
1992         addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
1993         addHRegUse(u, HRmRead,  i->ARM64in.CSel.argL);
1994         addHRegUse(u, HRmRead,  i->ARM64in.CSel.argR);
1995         return;
1996      case ARM64in_Call:
1997         /* logic and comments copied/modified from x86 back end */
1998         /* This is a bit subtle. */
1999         /* First off, claim it trashes all the caller-saved regs
2000            which fall within the register allocator's jurisdiction.
2001            These I believe to be x0 to x7 and the 128-bit vector
2002            registers in use, q16 .. q20. */
2003         addHRegUse(u, HRmWrite, hregARM64_X0());
2004         addHRegUse(u, HRmWrite, hregARM64_X1());
2005         addHRegUse(u, HRmWrite, hregARM64_X2());
2006         addHRegUse(u, HRmWrite, hregARM64_X3());
2007         addHRegUse(u, HRmWrite, hregARM64_X4());
2008         addHRegUse(u, HRmWrite, hregARM64_X5());
2009         addHRegUse(u, HRmWrite, hregARM64_X6());
2010         addHRegUse(u, HRmWrite, hregARM64_X7());
2011         addHRegUse(u, HRmWrite, hregARM64_Q16());
2012         addHRegUse(u, HRmWrite, hregARM64_Q17());
2013         addHRegUse(u, HRmWrite, hregARM64_Q18());
2014         addHRegUse(u, HRmWrite, hregARM64_Q19());
2015         addHRegUse(u, HRmWrite, hregARM64_Q20());
2016         /* Now we have to state any parameter-carrying registers
2017            which might be read.  This depends on nArgRegs. */
2018            switch (i->ARM64in.Call.nArgRegs) {
2019            case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
2020            case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
2021            case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
2022            case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
2023            case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
2024            case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
2025            case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
2026            case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
2027            case 0: break;
2028            default: vpanic("getRegUsage_ARM64:Call:regparms");
2029         }
2030         /* Finally, there is the issue that the insn trashes a
2031            register because the literal target address has to be
2032            loaded into a register.  However, we reserve x9 for that
2033            purpose so there's no further complexity here.  Stating x9
2034            as trashed is pointless since it's not under the control
2035            of the allocator, but what the hell. */
2036         addHRegUse(u, HRmWrite, hregARM64_X9());
2037         return;
2038      case ARM64in_AddToSP:
2039         /* Only changes SP, but regalloc doesn't control that, hence
2040            we don't care. */
2041         return;
2042      case ARM64in_FromSP:
2043         addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
2044         return;
2045      case ARM64in_Mul:
2046         addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
2047         addHRegUse(u, HRmRead,  i->ARM64in.Mul.argL);
2048         addHRegUse(u, HRmRead,  i->ARM64in.Mul.argR);
2049         return;
2050      case ARM64in_LdrEX:
2051         addHRegUse(u, HRmRead, hregARM64_X4());
2052         addHRegUse(u, HRmWrite, hregARM64_X2());
2053         return;
2054      case ARM64in_StrEX:
2055         addHRegUse(u, HRmRead, hregARM64_X4());
2056         addHRegUse(u, HRmWrite, hregARM64_X0());
2057         addHRegUse(u, HRmRead, hregARM64_X2());
2058         return;
2059      case ARM64in_MFence:
2060         return;
2061      case ARM64in_VLdStH:
2062         addHRegUse(u, HRmRead, i->ARM64in.VLdStH.rN);
2063         if (i->ARM64in.VLdStH.isLoad) {
2064            addHRegUse(u, HRmWrite, i->ARM64in.VLdStH.hD);
2065         } else {
2066            addHRegUse(u, HRmRead, i->ARM64in.VLdStH.hD);
2067         }
2068         return;
2069      case ARM64in_VLdStS:
2070         addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
2071         if (i->ARM64in.VLdStS.isLoad) {
2072            addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
2073         } else {
2074            addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
2075         }
2076         return;
2077      case ARM64in_VLdStD:
2078         addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
2079         if (i->ARM64in.VLdStD.isLoad) {
2080            addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
2081         } else {
2082            addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
2083         }
2084         return;
2085      case ARM64in_VLdStQ:
2086         addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
2087         if (i->ARM64in.VLdStQ.isLoad)
2088            addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
2089         else
2090            addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
2091         return;
2092      case ARM64in_VCvtI2F:
2093         addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
2094         addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
2095         return;
2096      case ARM64in_VCvtF2I:
2097         addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
2098         addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
2099         return;
2100      case ARM64in_VCvtSD:
2101         addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
2102         addHRegUse(u, HRmRead,  i->ARM64in.VCvtSD.src);
2103         return;
2104      case ARM64in_VCvtHS:
2105         addHRegUse(u, HRmWrite, i->ARM64in.VCvtHS.dst);
2106         addHRegUse(u, HRmRead,  i->ARM64in.VCvtHS.src);
2107         return;
2108      case ARM64in_VCvtHD:
2109         addHRegUse(u, HRmWrite, i->ARM64in.VCvtHD.dst);
2110         addHRegUse(u, HRmRead,  i->ARM64in.VCvtHD.src);
2111         return;
2112      case ARM64in_VUnaryD:
2113         addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
2114         addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
2115         return;
2116      case ARM64in_VUnaryS:
2117         addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
2118         addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
2119         return;
2120      case ARM64in_VBinD:
2121         addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
2122         addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
2123         addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
2124         return;
2125      case ARM64in_VBinS:
2126         addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
2127         addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
2128         addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
2129         return;
2130      case ARM64in_VCmpD:
2131         addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
2132         addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
2133         return;
2134      case ARM64in_VCmpS:
2135         addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
2136         addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
2137         return;
2138      case ARM64in_VFCSel:
2139         addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argL);
2140         addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argR);
2141         addHRegUse(u, HRmWrite, i->ARM64in.VFCSel.dst);
2142         return;
2143      case ARM64in_FPCR:
2144         if (i->ARM64in.FPCR.toFPCR)
2145            addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
2146         else
2147            addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
2148         return;
2149      case ARM64in_FPSR:
2150         if (i->ARM64in.FPSR.toFPSR)
2151            addHRegUse(u, HRmRead, i->ARM64in.FPSR.iReg);
2152         else
2153            addHRegUse(u, HRmWrite, i->ARM64in.FPSR.iReg);
2154         return;
2155      case ARM64in_VBinV:
2156         addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst);
2157         addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
2158         addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
2159         return;
2160      case ARM64in_VModifyV:
2161         addHRegUse(u, HRmWrite, i->ARM64in.VModifyV.mod);
2162         addHRegUse(u, HRmRead, i->ARM64in.VModifyV.mod);
2163         addHRegUse(u, HRmRead, i->ARM64in.VModifyV.arg);
2164         return;
2165      case ARM64in_VUnaryV:
2166         addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
2167         addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
2168         return;
2169      case ARM64in_VNarrowV:
2170         addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst);
2171         addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src);
2172         return;
2173      case ARM64in_VShiftImmV:
2174         addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
2175         addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
2176         return;
2177      case ARM64in_VExtV:
2178         addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
2179         addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
2180         addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
2181         return;
2182      case ARM64in_VImmQ:
2183         addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
2184         return;
2185      case ARM64in_VDfromX:
2186         addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
2187         addHRegUse(u, HRmRead,  i->ARM64in.VDfromX.rX);
2188         return;
2189      case ARM64in_VQfromX:
2190         addHRegUse(u, HRmWrite, i->ARM64in.VQfromX.rQ);
2191         addHRegUse(u, HRmRead,  i->ARM64in.VQfromX.rXlo);
2192         return;
2193      case ARM64in_VQfromXX:
2194         addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
2195         addHRegUse(u, HRmRead,  i->ARM64in.VQfromXX.rXhi);
2196         addHRegUse(u, HRmRead,  i->ARM64in.VQfromXX.rXlo);
2197         return;
2198      case ARM64in_VXfromQ:
2199         addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
2200         addHRegUse(u, HRmRead,  i->ARM64in.VXfromQ.rQ);
2201         return;
2202      case ARM64in_VXfromDorS:
2203         addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX);
2204         addHRegUse(u, HRmRead,  i->ARM64in.VXfromDorS.rDorS);
2205         return;
2206      case ARM64in_VMov:
2207         addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
2208         addHRegUse(u, HRmRead,  i->ARM64in.VMov.src);
2209         return;
2210      case ARM64in_EvCheck:
2211         /* We expect both amodes only to mention x21, so this is in
2212            fact pointless, since x21 isn't allocatable, but
2213            anyway.. */
2214         addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
2215         addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
2216         addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
2217         return;
2218      case ARM64in_ProfInc:
2219         /* Again, pointless to actually state these since neither
2220            is available to RA. */
2221         addHRegUse(u, HRmWrite, hregARM64_X9()); /* unavail to RA */
2222         addHRegUse(u, HRmWrite, hregARM64_X8()); /* unavail to RA */
2223         return;
2224      default:
2225         ppARM64Instr(i);
2226         vpanic("getRegUsage_ARM64Instr");
2227   }
2228}
2229
2230
2231void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
2232{
2233   vassert(mode64 == True);
2234   switch (i->tag) {
2235      case ARM64in_Arith:
2236         i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
2237         i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
2238         mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
2239         return;
2240      case ARM64in_Cmp:
2241         i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
2242         mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
2243         return;
2244      case ARM64in_Logic:
2245         i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
2246         i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
2247         mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2248         return;
2249      case ARM64in_Test:
2250         i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
2251         mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2252         return;
2253      case ARM64in_Shift:
2254         i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
2255         i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
2256         mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
2257         return;
2258      case ARM64in_Unary:
2259         i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
2260         i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
2261         return;
2262      case ARM64in_MovI:
2263         i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
2264         i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
2265         return;
2266      case ARM64in_Imm64:
2267         i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
2268         return;
2269      case ARM64in_LdSt64:
2270         i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
2271         mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
2272         return;
2273      case ARM64in_LdSt32:
2274         i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
2275         mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
2276         return;
2277      case ARM64in_LdSt16:
2278         i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
2279         mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
2280         return;
2281      case ARM64in_LdSt8:
2282         i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
2283         mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
2284         return;
2285      case ARM64in_XDirect:
2286         mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
2287         return;
2288      case ARM64in_XIndir:
2289         i->ARM64in.XIndir.dstGA
2290            = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
2291         mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
2292         return;
2293      case ARM64in_XAssisted:
2294         i->ARM64in.XAssisted.dstGA
2295            = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
2296         mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
2297         return;
2298      case ARM64in_CSel:
2299         i->ARM64in.CSel.dst  = lookupHRegRemap(m, i->ARM64in.CSel.dst);
2300         i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
2301         i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
2302         return;
2303      case ARM64in_Call:
2304         return;
2305      case ARM64in_AddToSP:
2306         return;
2307      case ARM64in_FromSP:
2308         i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
2309         return;
2310      case ARM64in_Mul:
2311         i->ARM64in.Mul.dst  = lookupHRegRemap(m, i->ARM64in.Mul.dst);
2312         i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
2313         i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
2314         break;
2315      case ARM64in_LdrEX:
2316         return;
2317      case ARM64in_StrEX:
2318         return;
2319      case ARM64in_MFence:
2320         return;
2321      case ARM64in_VLdStH:
2322         i->ARM64in.VLdStH.hD = lookupHRegRemap(m, i->ARM64in.VLdStH.hD);
2323         i->ARM64in.VLdStH.rN = lookupHRegRemap(m, i->ARM64in.VLdStH.rN);
2324         return;
2325      case ARM64in_VLdStS:
2326         i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
2327         i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
2328         return;
2329      case ARM64in_VLdStD:
2330         i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
2331         i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
2332         return;
2333      case ARM64in_VLdStQ:
2334         i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
2335         i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
2336         return;
2337      case ARM64in_VCvtI2F:
2338         i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
2339         i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
2340         return;
2341      case ARM64in_VCvtF2I:
2342         i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
2343         i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
2344         return;
2345      case ARM64in_VCvtSD:
2346         i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
2347         i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
2348         return;
2349      case ARM64in_VCvtHS:
2350         i->ARM64in.VCvtHS.dst = lookupHRegRemap(m, i->ARM64in.VCvtHS.dst);
2351         i->ARM64in.VCvtHS.src = lookupHRegRemap(m, i->ARM64in.VCvtHS.src);
2352         return;
2353      case ARM64in_VCvtHD:
2354         i->ARM64in.VCvtHD.dst = lookupHRegRemap(m, i->ARM64in.VCvtHD.dst);
2355         i->ARM64in.VCvtHD.src = lookupHRegRemap(m, i->ARM64in.VCvtHD.src);
2356         return;
2357      case ARM64in_VUnaryD:
2358         i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
2359         i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
2360         return;
2361      case ARM64in_VUnaryS:
2362         i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
2363         i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
2364         return;
2365      case ARM64in_VBinD:
2366         i->ARM64in.VBinD.dst  = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
2367         i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
2368         i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
2369         return;
2370      case ARM64in_VBinS:
2371         i->ARM64in.VBinS.dst  = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
2372         i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
2373         i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
2374         return;
2375      case ARM64in_VCmpD:
2376         i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
2377         i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
2378         return;
2379      case ARM64in_VCmpS:
2380         i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
2381         i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
2382         return;
2383      case ARM64in_VFCSel:
2384         i->ARM64in.VFCSel.argL = lookupHRegRemap(m, i->ARM64in.VFCSel.argL);
2385         i->ARM64in.VFCSel.argR = lookupHRegRemap(m, i->ARM64in.VFCSel.argR);
2386         i->ARM64in.VFCSel.dst  = lookupHRegRemap(m, i->ARM64in.VFCSel.dst);
2387         return;
2388      case ARM64in_FPCR:
2389         i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
2390         return;
2391      case ARM64in_FPSR:
2392         i->ARM64in.FPSR.iReg = lookupHRegRemap(m, i->ARM64in.FPSR.iReg);
2393         return;
2394      case ARM64in_VBinV:
2395         i->ARM64in.VBinV.dst  = lookupHRegRemap(m, i->ARM64in.VBinV.dst);
2396         i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
2397         i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
2398         return;
2399      case ARM64in_VModifyV:
2400         i->ARM64in.VModifyV.mod = lookupHRegRemap(m, i->ARM64in.VModifyV.mod);
2401         i->ARM64in.VModifyV.arg = lookupHRegRemap(m, i->ARM64in.VModifyV.arg);
2402         return;
2403      case ARM64in_VUnaryV:
2404         i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
2405         i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
2406         return;
2407      case ARM64in_VNarrowV:
2408         i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst);
2409         i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src);
2410         return;
2411      case ARM64in_VShiftImmV:
2412         i->ARM64in.VShiftImmV.dst
2413            = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst);
2414         i->ARM64in.VShiftImmV.src
2415            = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
2416         return;
2417      case ARM64in_VExtV:
2418         i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
2419         i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
2420         i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
2421         return;
2422      case ARM64in_VImmQ:
2423         i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
2424         return;
2425      case ARM64in_VDfromX:
2426         i->ARM64in.VDfromX.rD
2427            = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
2428         i->ARM64in.VDfromX.rX
2429            = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
2430         return;
2431      case ARM64in_VQfromX:
2432         i->ARM64in.VQfromX.rQ
2433            = lookupHRegRemap(m, i->ARM64in.VQfromX.rQ);
2434         i->ARM64in.VQfromX.rXlo
2435            = lookupHRegRemap(m, i->ARM64in.VQfromX.rXlo);
2436         return;
2437      case ARM64in_VQfromXX:
2438         i->ARM64in.VQfromXX.rQ
2439            = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
2440         i->ARM64in.VQfromXX.rXhi
2441            = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
2442         i->ARM64in.VQfromXX.rXlo
2443            = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
2444         return;
2445      case ARM64in_VXfromQ:
2446         i->ARM64in.VXfromQ.rX
2447            = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
2448         i->ARM64in.VXfromQ.rQ
2449            = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
2450         return;
2451      case ARM64in_VXfromDorS:
2452         i->ARM64in.VXfromDorS.rX
2453            = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX);
2454         i->ARM64in.VXfromDorS.rDorS
2455            = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS);
2456         return;
2457      case ARM64in_VMov:
2458         i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
2459         i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
2460         return;
2461      case ARM64in_EvCheck:
2462         /* We expect both amodes only to mention x21, so this is in
2463            fact pointless, since x21 isn't allocatable, but
2464            anyway.. */
2465         mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
2466         mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
2467         return;
2468      case ARM64in_ProfInc:
2469         /* hardwires x8 and x9 -- nothing to modify. */
2470         return;
2471      default:
2472         ppARM64Instr(i);
2473         vpanic("mapRegs_ARM64Instr");
2474   }
2475}
2476
2477/* Figure out if i represents a reg-reg move, and if so assign the
2478   source and destination to *src and *dst.  If in doubt say No.  Used
2479   by the register allocator to do move coalescing.
2480*/
2481Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst )
2482{
2483   switch (i->tag) {
2484      case ARM64in_MovI:
2485         *src = i->ARM64in.MovI.src;
2486         *dst = i->ARM64in.MovI.dst;
2487         return True;
2488      case ARM64in_VMov:
2489         *src = i->ARM64in.VMov.src;
2490         *dst = i->ARM64in.VMov.dst;
2491         return True;
2492      default:
2493         break;
2494   }
2495
2496   return False;
2497}
2498
2499
2500/* Generate arm spill/reload instructions under the direction of the
2501   register allocator.  Note it's critical these don't write the
2502   condition codes. */
2503
2504void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2505                      HReg rreg, Int offsetB, Bool mode64 )
2506{
2507   HRegClass rclass;
2508   vassert(offsetB >= 0);
2509   vassert(!hregIsVirtual(rreg));
2510   vassert(mode64 == True);
2511   *i1 = *i2 = NULL;
2512   rclass = hregClass(rreg);
2513   switch (rclass) {
2514      case HRcInt64:
2515         vassert(0 == (offsetB & 7));
2516         offsetB >>= 3;
2517         vassert(offsetB < 4096);
2518         *i1 = ARM64Instr_LdSt64(
2519                  False/*!isLoad*/,
2520                  rreg,
2521                  ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2522               );
2523         return;
2524      case HRcFlt64:
2525         vassert(0 == (offsetB & 7));
2526         vassert(offsetB >= 0 && offsetB < 32768);
2527         *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
2528                                 rreg, hregARM64_X21(), offsetB);
2529         return;
2530      case HRcVec128: {
2531         HReg x21  = hregARM64_X21();  // baseblock
2532         HReg x9   = hregARM64_X9();   // spill temporary
2533         vassert(0 == (offsetB & 15)); // check sane alignment
2534         vassert(offsetB < 4096);
2535         *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2536         *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
2537         return;
2538      }
2539      default:
2540         ppHRegClass(rclass);
2541         vpanic("genSpill_ARM: unimplemented regclass");
2542   }
2543}
2544
2545void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2546                       HReg rreg, Int offsetB, Bool mode64 )
2547{
2548   HRegClass rclass;
2549   vassert(offsetB >= 0);
2550   vassert(!hregIsVirtual(rreg));
2551   vassert(mode64 == True);
2552   *i1 = *i2 = NULL;
2553   rclass = hregClass(rreg);
2554   switch (rclass) {
2555      case HRcInt64:
2556         vassert(0 == (offsetB & 7));
2557         offsetB >>= 3;
2558         vassert(offsetB < 4096);
2559         *i1 = ARM64Instr_LdSt64(
2560                  True/*isLoad*/,
2561                  rreg,
2562                  ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2563               );
2564         return;
2565      case HRcFlt64:
2566         vassert(0 == (offsetB & 7));
2567         vassert(offsetB >= 0 && offsetB < 32768);
2568         *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
2569                                 rreg, hregARM64_X21(), offsetB);
2570         return;
2571      case HRcVec128: {
2572         HReg x21  = hregARM64_X21();  // baseblock
2573         HReg x9   = hregARM64_X9();   // spill temporary
2574         vassert(0 == (offsetB & 15)); // check sane alignment
2575         vassert(offsetB < 4096);
2576         *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2577         *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
2578         return;
2579      }
2580      default:
2581         ppHRegClass(rclass);
2582         vpanic("genReload_ARM: unimplemented regclass");
2583   }
2584}
2585
2586
2587/* Emit an instruction into buf and return the number of bytes used.
2588   Note that buf is not the insn's final place, and therefore it is
2589   imperative to emit position-independent code. */
2590
2591static inline UInt iregEnc ( HReg r )
2592{
2593   UInt n;
2594   vassert(hregClass(r) == HRcInt64);
2595   vassert(!hregIsVirtual(r));
2596   n = hregEncoding(r);
2597   vassert(n <= 30);
2598   return n;
2599}
2600
2601static inline UInt dregEnc ( HReg r )
2602{
2603   UInt n;
2604   vassert(hregClass(r) == HRcFlt64);
2605   vassert(!hregIsVirtual(r));
2606   n = hregEncoding(r);
2607   vassert(n <= 31);
2608   return n;
2609}
2610
2611static inline UInt qregEnc ( HReg r )
2612{
2613   UInt n;
2614   vassert(hregClass(r) == HRcVec128);
2615   vassert(!hregIsVirtual(r));
2616   n = hregEncoding(r);
2617   vassert(n <= 31);
2618   return n;
2619}
2620
2621#define BITS4(zzb3,zzb2,zzb1,zzb0) \
2622   (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2623
2624#define X00  BITS4(0,0, 0,0)
2625#define X01  BITS4(0,0, 0,1)
2626#define X10  BITS4(0,0, 1,0)
2627#define X11  BITS4(0,0, 1,1)
2628
2629#define X000 BITS4(0, 0,0,0)
2630#define X001 BITS4(0, 0,0,1)
2631#define X010 BITS4(0, 0,1,0)
2632#define X011 BITS4(0, 0,1,1)
2633#define X100 BITS4(0, 1,0,0)
2634#define X101 BITS4(0, 1,0,1)
2635#define X110 BITS4(0, 1,1,0)
2636#define X111 BITS4(0, 1,1,1)
2637
2638#define X0000 BITS4(0,0,0,0)
2639#define X0001 BITS4(0,0,0,1)
2640#define X0010 BITS4(0,0,1,0)
2641#define X0011 BITS4(0,0,1,1)
2642
2643#define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
2644  ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
2645
2646#define X00000   BITS8(0,0,0, 0,0,0,0,0)
2647#define X00001   BITS8(0,0,0, 0,0,0,0,1)
2648#define X00110   BITS8(0,0,0, 0,0,1,1,0)
2649#define X00111   BITS8(0,0,0, 0,0,1,1,1)
2650#define X01000   BITS8(0,0,0, 0,1,0,0,0)
2651#define X10000   BITS8(0,0,0, 1,0,0,0,0)
2652#define X11000   BITS8(0,0,0, 1,1,0,0,0)
2653#define X11110   BITS8(0,0,0, 1,1,1,1,0)
2654#define X11111   BITS8(0,0,0, 1,1,1,1,1)
2655
2656#define X000000  BITS8(0,0, 0,0,0,0,0,0)
2657#define X000001  BITS8(0,0, 0,0,0,0,0,1)
2658#define X000010  BITS8(0,0, 0,0,0,0,1,0)
2659#define X000011  BITS8(0,0, 0,0,0,0,1,1)
2660#define X000100  BITS8(0,0, 0,0,0,1,0,0)
2661#define X000110  BITS8(0,0, 0,0,0,1,1,0)
2662#define X000111  BITS8(0,0, 0,0,0,1,1,1)
2663#define X001000  BITS8(0,0, 0,0,1,0,0,0)
2664#define X001001  BITS8(0,0, 0,0,1,0,0,1)
2665#define X001010  BITS8(0,0, 0,0,1,0,1,0)
2666#define X001011  BITS8(0,0, 0,0,1,0,1,1)
2667#define X001101  BITS8(0,0, 0,0,1,1,0,1)
2668#define X001110  BITS8(0,0, 0,0,1,1,1,0)
2669#define X001111  BITS8(0,0, 0,0,1,1,1,1)
2670#define X010000  BITS8(0,0, 0,1,0,0,0,0)
2671#define X010001  BITS8(0,0, 0,1,0,0,0,1)
2672#define X010010  BITS8(0,0, 0,1,0,0,1,0)
2673#define X010011  BITS8(0,0, 0,1,0,0,1,1)
2674#define X010101  BITS8(0,0, 0,1,0,1,0,1)
2675#define X010110  BITS8(0,0, 0,1,0,1,1,0)
2676#define X010111  BITS8(0,0, 0,1,0,1,1,1)
2677#define X011001  BITS8(0,0, 0,1,1,0,0,1)
2678#define X011010  BITS8(0,0, 0,1,1,0,1,0)
2679#define X011011  BITS8(0,0, 0,1,1,0,1,1)
2680#define X011101  BITS8(0,0, 0,1,1,1,0,1)
2681#define X011110  BITS8(0,0, 0,1,1,1,1,0)
2682#define X011111  BITS8(0,0, 0,1,1,1,1,1)
2683#define X100001  BITS8(0,0, 1,0,0,0,0,1)
2684#define X100011  BITS8(0,0, 1,0,0,0,1,1)
2685#define X100100  BITS8(0,0, 1,0,0,1,0,0)
2686#define X100101  BITS8(0,0, 1,0,0,1,0,1)
2687#define X100110  BITS8(0,0, 1,0,0,1,1,0)
2688#define X100111  BITS8(0,0, 1,0,0,1,1,1)
2689#define X101101  BITS8(0,0, 1,0,1,1,0,1)
2690#define X101110  BITS8(0,0, 1,0,1,1,1,0)
2691#define X110000  BITS8(0,0, 1,1,0,0,0,0)
2692#define X110001  BITS8(0,0, 1,1,0,0,0,1)
2693#define X110010  BITS8(0,0, 1,1,0,0,1,0)
2694#define X110100  BITS8(0,0, 1,1,0,1,0,0)
2695#define X110101  BITS8(0,0, 1,1,0,1,0,1)
2696#define X110110  BITS8(0,0, 1,1,0,1,1,0)
2697#define X110111  BITS8(0,0, 1,1,0,1,1,1)
2698#define X111000  BITS8(0,0, 1,1,1,0,0,0)
2699#define X111001  BITS8(0,0, 1,1,1,0,0,1)
2700#define X111101  BITS8(0,0, 1,1,1,1,0,1)
2701#define X111110  BITS8(0,0, 1,1,1,1,1,0)
2702#define X111111  BITS8(0,0, 1,1,1,1,1,1)
2703
2704#define X0001000  BITS8(0, 0,0,0,1,0,0,0)
2705#define X0010000  BITS8(0, 0,0,1,0,0,0,0)
2706#define X0100000  BITS8(0, 0,1,0,0,0,0,0)
2707#define X1000000  BITS8(0, 1,0,0,0,0,0,0)
2708
2709#define X00100000  BITS8(0,0,1,0,0,0,0,0)
2710#define X00100001  BITS8(0,0,1,0,0,0,0,1)
2711#define X00100010  BITS8(0,0,1,0,0,0,1,0)
2712#define X00100011  BITS8(0,0,1,0,0,0,1,1)
2713#define X01010000  BITS8(0,1,0,1,0,0,0,0)
2714#define X01010001  BITS8(0,1,0,1,0,0,0,1)
2715#define X01010100  BITS8(0,1,0,1,0,1,0,0)
2716#define X01011000  BITS8(0,1,0,1,1,0,0,0)
2717#define X01100000  BITS8(0,1,1,0,0,0,0,0)
2718#define X01100001  BITS8(0,1,1,0,0,0,0,1)
2719#define X01100010  BITS8(0,1,1,0,0,0,1,0)
2720#define X01100011  BITS8(0,1,1,0,0,0,1,1)
2721#define X01110000  BITS8(0,1,1,1,0,0,0,0)
2722#define X01110001  BITS8(0,1,1,1,0,0,0,1)
2723#define X01110010  BITS8(0,1,1,1,0,0,1,0)
2724#define X01110011  BITS8(0,1,1,1,0,0,1,1)
2725#define X01110100  BITS8(0,1,1,1,0,1,0,0)
2726#define X01110101  BITS8(0,1,1,1,0,1,0,1)
2727#define X01110110  BITS8(0,1,1,1,0,1,1,0)
2728#define X01110111  BITS8(0,1,1,1,0,1,1,1)
2729#define X11000001  BITS8(1,1,0,0,0,0,0,1)
2730#define X11000011  BITS8(1,1,0,0,0,0,1,1)
2731#define X11010100  BITS8(1,1,0,1,0,1,0,0)
2732#define X11010110  BITS8(1,1,0,1,0,1,1,0)
2733#define X11011000  BITS8(1,1,0,1,1,0,0,0)
2734#define X11011010  BITS8(1,1,0,1,1,0,1,0)
2735#define X11011110  BITS8(1,1,0,1,1,1,1,0)
2736#define X11100010  BITS8(1,1,1,0,0,0,1,0)
2737#define X11110001  BITS8(1,1,1,1,0,0,0,1)
2738#define X11110011  BITS8(1,1,1,1,0,0,1,1)
2739#define X11110101  BITS8(1,1,1,1,0,1,0,1)
2740#define X11110111  BITS8(1,1,1,1,0,1,1,1)
2741
2742
2743/* --- 4 fields --- */
2744
2745static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
2746   vassert(8+19+1+4 == 32);
2747   vassert(f1 < (1<<8));
2748   vassert(f2 < (1<<19));
2749   vassert(f3 < (1<<1));
2750   vassert(f4 < (1<<4));
2751   UInt w = 0;
2752   w = (w <<  8) | f1;
2753   w = (w << 19) | f2;
2754   w = (w <<  1) | f3;
2755   w = (w <<  4) | f4;
2756   return w;
2757}
2758
2759/* --- 5 fields --- */
2760
2761static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
2762                                  UInt f3, UInt f4, UInt f5 ) {
2763   vassert(3+6+2+16+5 == 32);
2764   vassert(f1 < (1<<3));
2765   vassert(f2 < (1<<6));
2766   vassert(f3 < (1<<2));
2767   vassert(f4 < (1<<16));
2768   vassert(f5 < (1<<5));
2769   UInt w = 0;
2770   w = (w <<  3) | f1;
2771   w = (w <<  6) | f2;
2772   w = (w <<  2) | f3;
2773   w = (w << 16) | f4;
2774   w = (w <<  5) | f5;
2775   return w;
2776}
2777
2778/* --- 6 fields --- */
2779
2780static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
2781                                    UInt f4, UInt f5, UInt f6 ) {
2782   vassert(2+6+2+12+5+5 == 32);
2783   vassert(f1 < (1<<2));
2784   vassert(f2 < (1<<6));
2785   vassert(f3 < (1<<2));
2786   vassert(f4 < (1<<12));
2787   vassert(f5 < (1<<5));
2788   vassert(f6 < (1<<5));
2789   UInt w = 0;
2790   w = (w <<  2) | f1;
2791   w = (w <<  6) | f2;
2792   w = (w <<  2) | f3;
2793   w = (w << 12) | f4;
2794   w = (w <<  5) | f5;
2795   w = (w <<  5) | f6;
2796   return w;
2797}
2798
2799static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
2800                                   UInt f4, UInt f5, UInt f6 ) {
2801   vassert(3+8+5+6+5+5 == 32);
2802   vassert(f1 < (1<<3));
2803   vassert(f2 < (1<<8));
2804   vassert(f3 < (1<<5));
2805   vassert(f4 < (1<<6));
2806   vassert(f5 < (1<<5));
2807   vassert(f6 < (1<<5));
2808   UInt w = 0;
2809   w = (w <<  3) | f1;
2810   w = (w <<  8) | f2;
2811   w = (w <<  5) | f3;
2812   w = (w <<  6) | f4;
2813   w = (w <<  5) | f5;
2814   w = (w <<  5) | f6;
2815   return w;
2816}
2817
2818static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
2819                                   UInt f4, UInt f5, UInt f6 ) {
2820   vassert(3+8+5+6+5+5 == 32);
2821   vassert(f1 < (1<<3));
2822   vassert(f2 < (1<<5));
2823   vassert(f3 < (1<<8));
2824   vassert(f4 < (1<<6));
2825   vassert(f5 < (1<<5));
2826   vassert(f6 < (1<<5));
2827   UInt w = 0;
2828   w = (w <<  3) | f1;
2829   w = (w <<  5) | f2;
2830   w = (w <<  8) | f3;
2831   w = (w <<  6) | f4;
2832   w = (w <<  5) | f5;
2833   w = (w <<  5) | f6;
2834   return w;
2835}
2836
2837static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3,
2838                                   UInt f4, UInt f5, UInt f6 ) {
2839   vassert(3+6+7+6+5+5 == 32);
2840   vassert(f1 < (1<<3));
2841   vassert(f2 < (1<<6));
2842   vassert(f3 < (1<<7));
2843   vassert(f4 < (1<<6));
2844   vassert(f5 < (1<<5));
2845   vassert(f6 < (1<<5));
2846   UInt w = 0;
2847   w = (w <<  3) | f1;
2848   w = (w <<  6) | f2;
2849   w = (w <<  7) | f3;
2850   w = (w <<  6) | f4;
2851   w = (w <<  5) | f5;
2852   w = (w <<  5) | f6;
2853   return w;
2854}
2855
2856/* --- 7 fields --- */
2857
2858static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
2859                                     UInt f4, UInt f5, UInt f6, UInt f7 ) {
2860   vassert(2+6+3+9+2+5+5 == 32);
2861   vassert(f1 < (1<<2));
2862   vassert(f2 < (1<<6));
2863   vassert(f3 < (1<<3));
2864   vassert(f4 < (1<<9));
2865   vassert(f5 < (1<<2));
2866   vassert(f6 < (1<<5));
2867   vassert(f7 < (1<<5));
2868   UInt w = 0;
2869   w = (w << 2) | f1;
2870   w = (w << 6) | f2;
2871   w = (w << 3) | f3;
2872   w = (w << 9) | f4;
2873   w = (w << 2) | f5;
2874   w = (w << 5) | f6;
2875   w = (w << 5) | f7;
2876   return w;
2877}
2878
2879static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
2880                                     UInt f4, UInt f5, UInt f6, UInt f7 ) {
2881   vassert(3+6+1+6+6+5+5 == 32);
2882   vassert(f1 < (1<<3));
2883   vassert(f2 < (1<<6));
2884   vassert(f3 < (1<<1));
2885   vassert(f4 < (1<<6));
2886   vassert(f5 < (1<<6));
2887   vassert(f6 < (1<<5));
2888   vassert(f7 < (1<<5));
2889   UInt w = 0;
2890   w = (w << 3) | f1;
2891   w = (w << 6) | f2;
2892   w = (w << 1) | f3;
2893   w = (w << 6) | f4;
2894   w = (w << 6) | f5;
2895   w = (w << 5) | f6;
2896   w = (w << 5) | f7;
2897   return w;
2898}
2899
2900
2901//ZZ #define X0000  BITS4(0,0,0,0)
2902//ZZ #define X0001  BITS4(0,0,0,1)
2903//ZZ #define X0010  BITS4(0,0,1,0)
2904//ZZ #define X0011  BITS4(0,0,1,1)
2905//ZZ #define X0100  BITS4(0,1,0,0)
2906//ZZ #define X0101  BITS4(0,1,0,1)
2907//ZZ #define X0110  BITS4(0,1,1,0)
2908//ZZ #define X0111  BITS4(0,1,1,1)
2909//ZZ #define X1000  BITS4(1,0,0,0)
2910//ZZ #define X1001  BITS4(1,0,0,1)
2911//ZZ #define X1010  BITS4(1,0,1,0)
2912//ZZ #define X1011  BITS4(1,0,1,1)
2913//ZZ #define X1100  BITS4(1,1,0,0)
2914//ZZ #define X1101  BITS4(1,1,0,1)
2915//ZZ #define X1110  BITS4(1,1,1,0)
2916//ZZ #define X1111  BITS4(1,1,1,1)
2917/*
2918#define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2919   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2920    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2921    (((zzx3) & 0xF) << 12))
2922
2923#define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
2924   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2925    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2926    (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
2927
2928#define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
2929   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2930    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2931    (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
2932
2933#define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2934  ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2935   (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2936   (((zzx0) & 0xF) << 0))
2937
2938#define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
2939   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2940    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2941    (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
2942    (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
2943
2944#define XX______(zzx7,zzx6) \
2945   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2946*/
2947
2948
2949/* Get an immediate into a register, using only that register. */
2950static UInt* imm64_to_ireg ( UInt* p, Int xD, ULong imm64 )
2951{
2952   if (imm64 == 0) {
2953      // This has to be special-cased, since the logic below
2954      // will leave the register unchanged in this case.
2955      // MOVZ xD, #0, LSL #0
2956      *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
2957      return p;
2958   }
2959
2960   // There must be at least one non-zero halfword.  Find the
2961   // lowest nonzero such, and use MOVZ to install it and zero
2962   // out the rest of the register.
2963   UShort h[4];
2964   h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
2965   h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
2966   h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
2967   h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
2968
2969   UInt i;
2970   for (i = 0; i < 4; i++) {
2971      if (h[i] != 0)
2972         break;
2973   }
2974   vassert(i < 4);
2975
2976   // MOVZ xD, h[i], LSL (16*i)
2977   *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
2978
2979   // Work on upwards through h[i], using MOVK to stuff in any
2980   // remaining nonzero elements.
2981   i++;
2982   for (; i < 4; i++) {
2983      if (h[i] == 0)
2984         continue;
2985      // MOVK xD, h[i], LSL (16*i)
2986      *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
2987   }
2988
2989   return p;
2990}
2991
2992/* Get an immediate into a register, using only that register, and
2993   generating exactly 4 instructions, regardless of the value of the
2994   immediate. This is used when generating sections of code that need
2995   to be patched later, so as to guarantee a specific size. */
2996static UInt* imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
2997{
2998   UShort h[4];
2999   h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3000   h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3001   h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3002   h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
3003   // Work on upwards through h[i], using MOVK to stuff in the
3004   // remaining elements.
3005   UInt i;
3006   for (i = 0; i < 4; i++) {
3007      if (i == 0) {
3008         // MOVZ xD, h[0], LSL (16*0)
3009         *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3010      } else {
3011         // MOVK xD, h[i], LSL (16*i)
3012         *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3013      }
3014   }
3015   return p;
3016}
3017
3018/* Check whether p points at a 4-insn sequence cooked up by
3019   imm64_to_ireg_EXACTLY4(). */
3020static Bool is_imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3021{
3022   UShort h[4];
3023   h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3024   h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3025   h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3026   h[0] = (UShort)((imm64 >>  0) & 0xFFFF);
3027   // Work on upwards through h[i], using MOVK to stuff in the
3028   // remaining elements.
3029   UInt i;
3030   for (i = 0; i < 4; i++) {
3031      UInt expected;
3032      if (i == 0) {
3033         // MOVZ xD, h[0], LSL (16*0)
3034         expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3035      } else {
3036         // MOVK xD, h[i], LSL (16*i)
3037         expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3038      }
3039      if (p[i] != expected)
3040         return False;
3041   }
3042   return True;
3043}
3044
3045
3046/* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
3047   rD, using the given amode for the address. */
3048static UInt* do_load_or_store8 ( UInt* p,
3049                                 Bool isLoad, UInt wD, ARM64AMode* am )
3050{
3051   vassert(wD <= 30);
3052   if (am->tag == ARM64am_RI9) {
3053      /* STURB Wd, [Xn|SP + simm9]:  00 111000 000 simm9 00 n d
3054         LDURB Wd, [Xn|SP + simm9]:  00 111000 010 simm9 00 n d
3055      */
3056      Int simm9 = am->ARM64am.RI9.simm9;
3057      vassert(-256 <= simm9 && simm9 <= 255);
3058      UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
3059                                   simm9 & 0x1FF, X00,
3060                                   iregEnc(am->ARM64am.RI9.reg), wD);
3061      *p++ = instr;
3062      return p;
3063   }
3064   if (am->tag == ARM64am_RI12) {
3065      /* STRB Wd, [Xn|SP + uimm12 * 1]:  00 111 001 00 imm12 n d
3066         LDRB Wd, [Xn|SP + uimm12 * 1]:  00 111 001 01 imm12 n d
3067      */
3068      UInt uimm12 = am->ARM64am.RI12.uimm12;
3069      UInt scale  = am->ARM64am.RI12.szB;
3070      vassert(scale == 1); /* failure of this is serious.  Do not ignore. */
3071      UInt xN    = iregEnc(am->ARM64am.RI12.reg);
3072      vassert(xN <= 30);
3073      UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
3074                                  uimm12, xN, wD);
3075      *p++ = instr;
3076      return p;
3077   }
3078   if (am->tag == ARM64am_RR) {
3079      /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
3080         LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
3081      */
3082      UInt xN = iregEnc(am->ARM64am.RR.base);
3083      UInt xM = iregEnc(am->ARM64am.RR.index);
3084      vassert(xN <= 30);
3085      UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
3086                                 xM, X011010, xN, wD);
3087      *p++ = instr;
3088      return p;
3089   }
3090   vpanic("do_load_or_store8");
3091   vassert(0);
3092}
3093
3094
3095/* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
3096   rD, using the given amode for the address. */
3097static UInt* do_load_or_store16 ( UInt* p,
3098                                  Bool isLoad, UInt wD, ARM64AMode* am )
3099{
3100   vassert(wD <= 30);
3101   if (am->tag == ARM64am_RI9) {
3102      /* STURH Wd, [Xn|SP + simm9]:  01 111000 000 simm9 00 n d
3103         LDURH Wd, [Xn|SP + simm9]:  01 111000 010 simm9 00 n d
3104      */
3105      Int simm9 = am->ARM64am.RI9.simm9;
3106      vassert(-256 <= simm9 && simm9 <= 255);
3107      UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
3108                                   simm9 & 0x1FF, X00,
3109                                   iregEnc(am->ARM64am.RI9.reg), wD);
3110      *p++ = instr;
3111      return p;
3112   }
3113   if (am->tag == ARM64am_RI12) {
3114      /* STRH Wd, [Xn|SP + uimm12 * 2]:  01 111 001 00 imm12 n d
3115         LDRH Wd, [Xn|SP + uimm12 * 2]:  01 111 001 01 imm12 n d
3116      */
3117      UInt uimm12 = am->ARM64am.RI12.uimm12;
3118      UInt scale  = am->ARM64am.RI12.szB;
3119      vassert(scale == 2); /* failure of this is serious.  Do not ignore. */
3120      UInt xN    = iregEnc(am->ARM64am.RI12.reg);
3121      vassert(xN <= 30);
3122      UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
3123                                  uimm12, xN, wD);
3124      *p++ = instr;
3125      return p;
3126   }
3127   if (am->tag == ARM64am_RR) {
3128      /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
3129         LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
3130      */
3131      UInt xN = iregEnc(am->ARM64am.RR.base);
3132      UInt xM = iregEnc(am->ARM64am.RR.index);
3133      vassert(xN <= 30);
3134      UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
3135                                 xM, X011010, xN, wD);
3136      *p++ = instr;
3137      return p;
3138   }
3139   vpanic("do_load_or_store16");
3140   vassert(0);
3141}
3142
3143
3144/* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
3145   rD, using the given amode for the address. */
3146static UInt* do_load_or_store32 ( UInt* p,
3147                                  Bool isLoad, UInt wD, ARM64AMode* am )
3148{
3149   vassert(wD <= 30);
3150   if (am->tag == ARM64am_RI9) {
3151      /* STUR Wd, [Xn|SP + simm9]:  10 111000 000 simm9 00 n d
3152         LDUR Wd, [Xn|SP + simm9]:  10 111000 010 simm9 00 n d
3153      */
3154      Int simm9 = am->ARM64am.RI9.simm9;
3155      vassert(-256 <= simm9 && simm9 <= 255);
3156      UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
3157                                   simm9 & 0x1FF, X00,
3158                                   iregEnc(am->ARM64am.RI9.reg), wD);
3159      *p++ = instr;
3160      return p;
3161   }
3162   if (am->tag == ARM64am_RI12) {
3163      /* STR Wd, [Xn|SP + uimm12 * 4]:  10 111 001 00 imm12 n d
3164         LDR Wd, [Xn|SP + uimm12 * 4]:  10 111 001 01 imm12 n d
3165      */
3166      UInt uimm12 = am->ARM64am.RI12.uimm12;
3167      UInt scale  = am->ARM64am.RI12.szB;
3168      vassert(scale == 4); /* failure of this is serious.  Do not ignore. */
3169      UInt xN    = iregEnc(am->ARM64am.RI12.reg);
3170      vassert(xN <= 30);
3171      UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
3172                                  uimm12, xN, wD);
3173      *p++ = instr;
3174      return p;
3175   }
3176   if (am->tag == ARM64am_RR) {
3177      /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
3178         LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
3179      */
3180      UInt xN = iregEnc(am->ARM64am.RR.base);
3181      UInt xM = iregEnc(am->ARM64am.RR.index);
3182      vassert(xN <= 30);
3183      UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
3184                                 xM, X011010, xN, wD);
3185      *p++ = instr;
3186      return p;
3187   }
3188   vpanic("do_load_or_store32");
3189   vassert(0);
3190}
3191
3192
3193/* Generate a 64 bit load or store to/from xD, using the given amode
3194   for the address. */
3195static UInt* do_load_or_store64 ( UInt* p,
3196                                  Bool isLoad, UInt xD, ARM64AMode* am )
3197{
3198   /* In all these cases, Rn can't be 31 since that means SP. */
3199   vassert(xD <= 30);
3200   if (am->tag == ARM64am_RI9) {
3201      /* STUR Xd, [Xn|SP + simm9]:  11 111000 000 simm9 00 n d
3202         LDUR Xd, [Xn|SP + simm9]:  11 111000 010 simm9 00 n d
3203      */
3204      Int simm9 = am->ARM64am.RI9.simm9;
3205      vassert(-256 <= simm9 && simm9 <= 255);
3206      UInt xN = iregEnc(am->ARM64am.RI9.reg);
3207      vassert(xN <= 30);
3208      UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
3209                                   simm9 & 0x1FF, X00, xN, xD);
3210      *p++ = instr;
3211      return p;
3212   }
3213   if (am->tag == ARM64am_RI12) {
3214      /* STR Xd, [Xn|SP + uimm12 * 8]:  11 111 001 00 imm12 n d
3215         LDR Xd, [Xn|SP + uimm12 * 8]:  11 111 001 01 imm12 n d
3216      */
3217      UInt uimm12 = am->ARM64am.RI12.uimm12;
3218      UInt scale  = am->ARM64am.RI12.szB;
3219      vassert(scale == 8); /* failure of this is serious.  Do not ignore. */
3220      UInt xN    = iregEnc(am->ARM64am.RI12.reg);
3221      vassert(xN <= 30);
3222      UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
3223                                  uimm12, xN, xD);
3224      *p++ = instr;
3225      return p;
3226   }
3227   if (am->tag == ARM64am_RR) {
3228      /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
3229         LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
3230      */
3231      UInt xN = iregEnc(am->ARM64am.RR.base);
3232      UInt xM = iregEnc(am->ARM64am.RR.index);
3233      vassert(xN <= 30);
3234      UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
3235                                 xM, X011010, xN, xD);
3236      *p++ = instr;
3237      return p;
3238   }
3239   vpanic("do_load_or_store64");
3240   vassert(0);
3241}
3242
3243
3244/* Emit an instruction into buf and return the number of bytes used.
3245   Note that buf is not the insn's final place, and therefore it is
3246   imperative to emit position-independent code.  If the emitted
3247   instruction was a profiler inc, set *is_profInc to True, else
3248   leave it unchanged. */
3249
3250Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
3251                      UChar* buf, Int nbuf, const ARM64Instr* i,
3252                      Bool mode64, VexEndness endness_host,
3253                      const void* disp_cp_chain_me_to_slowEP,
3254                      const void* disp_cp_chain_me_to_fastEP,
3255                      const void* disp_cp_xindir,
3256                      const void* disp_cp_xassisted )
3257{
3258   UInt* p = (UInt*)buf;
3259   vassert(nbuf >= 32);
3260   vassert(mode64 == True);
3261   vassert(0 == (((HWord)buf) & 3));
3262
3263   switch (i->tag) {
3264      case ARM64in_Arith: {
3265         UInt      rD   = iregEnc(i->ARM64in.Arith.dst);
3266         UInt      rN   = iregEnc(i->ARM64in.Arith.argL);
3267         ARM64RIA* argR = i->ARM64in.Arith.argR;
3268         switch (argR->tag) {
3269            case ARM64riA_I12:
3270               *p++ = X_2_6_2_12_5_5(
3271                         i->ARM64in.Arith.isAdd ? X10 : X11,
3272                         X010001,
3273                         argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3274                         argR->ARM64riA.I12.imm12, rN, rD
3275                      );
3276               break;
3277            case ARM64riA_R: {
3278               UInt rM = iregEnc(i->ARM64in.Arith.argR->ARM64riA.R.reg);
3279               *p++ = X_3_8_5_6_5_5(
3280                         i->ARM64in.Arith.isAdd ? X100 : X110,
3281                         X01011000, rM, X000000, rN, rD
3282                      );
3283               break;
3284            }
3285            default:
3286               goto bad;
3287         }
3288         goto done;
3289      }
3290      case ARM64in_Cmp: {
3291         UInt      rD   = 31; /* XZR, we are going to dump the result */
3292         UInt      rN   = iregEnc(i->ARM64in.Cmp.argL);
3293         ARM64RIA* argR = i->ARM64in.Cmp.argR;
3294         Bool      is64 = i->ARM64in.Cmp.is64;
3295         switch (argR->tag) {
3296            case ARM64riA_I12:
3297               /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
3298               /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
3299               *p++ = X_2_6_2_12_5_5(
3300                         is64 ? X11 : X01, X110001,
3301                         argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3302                         argR->ARM64riA.I12.imm12, rN, rD);
3303               break;
3304            case ARM64riA_R: {
3305               /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
3306               /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
3307               UInt rM = iregEnc(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
3308               *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
3309                                    X01011000, rM, X000000, rN, rD);
3310               break;
3311            }
3312            default:
3313               goto bad;
3314         }
3315         goto done;
3316      }
3317      case ARM64in_Logic: {
3318         UInt      rD   = iregEnc(i->ARM64in.Logic.dst);
3319         UInt      rN   = iregEnc(i->ARM64in.Logic.argL);
3320         ARM64RIL* argR = i->ARM64in.Logic.argR;
3321         UInt      opc  = 0; /* invalid */
3322         vassert(rD < 31);
3323         vassert(rN < 31);
3324         switch (i->ARM64in.Logic.op) {
3325            case ARM64lo_OR:  opc = X101; break;
3326            case ARM64lo_AND: opc = X100; break;
3327            case ARM64lo_XOR: opc = X110; break;
3328            default: break;
3329         }
3330         vassert(opc != 0);
3331         switch (argR->tag) {
3332            case ARM64riL_I13: {
3333               /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
3334               /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
3335               /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
3336               *p++ = X_3_6_1_6_6_5_5(
3337                         opc, X100100, argR->ARM64riL.I13.bitN,
3338                         argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3339                         rN, rD
3340                      );
3341               break;
3342            }
3343            case ARM64riL_R: {
3344               /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
3345               /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
3346               /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
3347               UInt rM = iregEnc(argR->ARM64riL.R.reg);
3348               vassert(rM < 31);
3349               *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
3350               break;
3351            }
3352            default:
3353               goto bad;
3354         }
3355         goto done;
3356      }
3357      case ARM64in_Test: {
3358         UInt      rD   = 31; /* XZR, we are going to dump the result */
3359         UInt      rN   = iregEnc(i->ARM64in.Test.argL);
3360         ARM64RIL* argR = i->ARM64in.Test.argR;
3361         switch (argR->tag) {
3362            case ARM64riL_I13: {
3363               /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
3364               *p++ = X_3_6_1_6_6_5_5(
3365                         X111, X100100, argR->ARM64riL.I13.bitN,
3366                         argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3367                         rN, rD
3368                      );
3369               break;
3370            }
3371            default:
3372               goto bad;
3373         }
3374         goto done;
3375      }
3376      case ARM64in_Shift: {
3377         UInt      rD   = iregEnc(i->ARM64in.Shift.dst);
3378         UInt      rN   = iregEnc(i->ARM64in.Shift.argL);
3379         ARM64RI6* argR = i->ARM64in.Shift.argR;
3380         vassert(rD < 31);
3381         vassert(rN < 31);
3382         switch (argR->tag) {
3383            case ARM64ri6_I6: {
3384               /* 110 1001101 (63-sh) (64-sh) nn dd   LSL Xd, Xn, sh */
3385               /* 110 1001101 sh      63      nn dd   LSR Xd, Xn, sh */
3386               /* 100 1001101 sh      63      nn dd   ASR Xd, Xn, sh */
3387               UInt sh = argR->ARM64ri6.I6.imm6;
3388               vassert(sh > 0 && sh < 64);
3389               switch (i->ARM64in.Shift.op) {
3390                  case ARM64sh_SHL:
3391                     *p++ = X_3_6_1_6_6_5_5(X110, X100110,
3392                                            1, 64-sh, 63-sh, rN, rD);
3393                     break;
3394                  case ARM64sh_SHR:
3395                     *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
3396                     break;
3397                  case ARM64sh_SAR:
3398                     *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
3399                     break;
3400                  default:
3401                     vassert(0);
3402               }
3403               break;
3404            }
3405            case ARM64ri6_R: {
3406               /* 100 1101 0110 mm 001000 nn dd   LSL Xd, Xn, Xm */
3407               /* 100 1101 0110 mm 001001 nn dd   LSR Xd, Xn, Xm */
3408               /* 100 1101 0110 mm 001010 nn dd   ASR Xd, Xn, Xm */
3409               UInt rM = iregEnc(argR->ARM64ri6.R.reg);
3410               vassert(rM < 31);
3411               UInt subOpc = 0;
3412               switch (i->ARM64in.Shift.op) {
3413                  case ARM64sh_SHL: subOpc = X001000; break;
3414                  case ARM64sh_SHR: subOpc = X001001; break;
3415                  case ARM64sh_SAR: subOpc = X001010; break;
3416                  default: vassert(0);
3417               }
3418               *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
3419               break;
3420            }
3421            default:
3422               vassert(0);
3423         }
3424         goto done;
3425      }
3426      case ARM64in_Unary: {
3427         UInt rDst = iregEnc(i->ARM64in.Unary.dst);
3428         UInt rSrc = iregEnc(i->ARM64in.Unary.src);
3429         switch (i->ARM64in.Unary.op) {
3430            case ARM64un_CLZ:
3431               /* 1 10 1101 0110 00000 00010 0 nn dd   CLZ Xd, Xn */
3432               /* 1 10 1101 0110 00000 00010 1 nn dd   CLS Xd, Xn (unimp) */
3433               *p++ = X_3_8_5_6_5_5(X110,
3434                                    X11010110, X00000, X000100, rSrc, rDst);
3435               goto done;
3436            case ARM64un_NEG:
3437               /* 1 10 01011 000 m 000000 11111 d  NEG Xd,Xm */
3438               /* 0 10 01011 000 m 000000 11111 d  NEG Wd,Wm (unimp) */
3439               *p++ = X_3_8_5_6_5_5(X110,
3440                                    X01011000, rSrc, X000000, X11111, rDst);
3441               goto done;
3442            case ARM64un_NOT: {
3443               /* 1 01 01010 00 1 m 000000 11111 d   MVN Xd,Xm */
3444               *p++ = X_3_8_5_6_5_5(X101,
3445                                    X01010001, rSrc, X000000, X11111, rDst);
3446               goto done;
3447            }
3448            default:
3449               break;
3450         }
3451         goto bad;
3452      }
3453      case ARM64in_MovI: {
3454         /* We generate the "preferred form", ORR Xd, XZR, Xm
3455            101 01010 00 0 m 000000 11111 d
3456         */
3457         UInt instr = 0xAA0003E0;
3458         UInt d     = iregEnc(i->ARM64in.MovI.dst);
3459         UInt m     = iregEnc(i->ARM64in.MovI.src);
3460         *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
3461         goto done;
3462      }
3463      case ARM64in_Imm64: {
3464         p = imm64_to_ireg( p, iregEnc(i->ARM64in.Imm64.dst),
3465                               i->ARM64in.Imm64.imm64 );
3466         goto done;
3467      }
3468      case ARM64in_LdSt64: {
3469         p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
3470                                 iregEnc(i->ARM64in.LdSt64.rD),
3471                                 i->ARM64in.LdSt64.amode );
3472         goto done;
3473      }
3474      case ARM64in_LdSt32: {
3475         p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
3476                                 iregEnc(i->ARM64in.LdSt32.rD),
3477                                 i->ARM64in.LdSt32.amode );
3478         goto done;
3479      }
3480      case ARM64in_LdSt16: {
3481         p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
3482                                 iregEnc(i->ARM64in.LdSt16.rD),
3483                                 i->ARM64in.LdSt16.amode );
3484         goto done;
3485      }
3486      case ARM64in_LdSt8: {
3487         p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
3488                                iregEnc(i->ARM64in.LdSt8.rD),
3489                                i->ARM64in.LdSt8.amode );
3490         goto done;
3491      }
3492
3493      case ARM64in_XDirect: {
3494         /* NB: what goes on here has to be very closely coordinated
3495            with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
3496         /* We're generating chain-me requests here, so we need to be
3497            sure this is actually allowed -- no-redir translations
3498            can't use chain-me's.  Hence: */
3499         vassert(disp_cp_chain_me_to_slowEP != NULL);
3500         vassert(disp_cp_chain_me_to_fastEP != NULL);
3501
3502         /* Use ptmp for backpatching conditional jumps. */
3503         UInt* ptmp = NULL;
3504
3505         /* First off, if this is conditional, create a conditional
3506            jump over the rest of it.  Or at least, leave a space for
3507            it that we will shortly fill in. */
3508         if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3509            vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3510            ptmp = p;
3511            *p++ = 0;
3512         }
3513
3514         /* Update the guest PC. */
3515         /* imm64 x9, dstGA */
3516         /* str   x9, amPC */
3517         p = imm64_to_ireg(p, /*x*/9, i->ARM64in.XDirect.dstGA);
3518         p = do_load_or_store64(p, False/*!isLoad*/,
3519                                /*x*/9, i->ARM64in.XDirect.amPC);
3520
3521         /* --- FIRST PATCHABLE BYTE follows --- */
3522         /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3523            calling to) backs up the return address, so as to find the
3524            address of the first patchable byte.  So: don't change the
3525            number of instructions (5) below. */
3526         /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
3527         /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
3528         /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
3529         /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
3530         /* blr  x9 */
3531         const void* disp_cp_chain_me
3532                  = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3533                                                : disp_cp_chain_me_to_slowEP;
3534         p = imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
3535         *p++ = 0xD63F0120;
3536         /* --- END of PATCHABLE BYTES --- */
3537
3538         /* Fix up the conditional jump, if there was one. */
3539         if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3540            Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3541            vassert(delta > 0 && delta < 40);
3542            vassert((delta & 3) == 0);
3543            UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3544            vassert(notCond <= 13); /* Neither AL nor NV */
3545            vassert(ptmp != NULL);
3546            delta = delta >> 2;
3547            *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3548         }
3549         goto done;
3550      }
3551
3552      case ARM64in_XIndir: {
3553         // XIndir is more or less the same as XAssisted, except
3554         // we don't have a trc value to hand back, so there's no
3555         // write to r21
3556         /* Use ptmp for backpatching conditional jumps. */
3557         //UInt* ptmp = NULL;
3558
3559         /* First off, if this is conditional, create a conditional
3560            jump over the rest of it.  Or at least, leave a space for
3561            it that we will shortly fill in. */
3562         if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3563            vassert(0); //ATC
3564//ZZ             vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3565//ZZ             ptmp = p;
3566//ZZ             *p++ = 0;
3567         }
3568
3569         /* Update the guest PC. */
3570         /* str r-dstGA, amPC */
3571         p = do_load_or_store64(p, False/*!isLoad*/,
3572                                iregEnc(i->ARM64in.XIndir.dstGA),
3573                                i->ARM64in.XIndir.amPC);
3574
3575         /* imm64 x9, VG_(disp_cp_xindir) */
3576         /* br    x9 */
3577         p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xindir);
3578         *p++ = 0xD61F0120; /* br x9 */
3579
3580         /* Fix up the conditional jump, if there was one. */
3581         if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3582            vassert(0); //ATC
3583//ZZ             Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3584//ZZ             vassert(delta > 0 && delta < 40);
3585//ZZ             vassert((delta & 3) == 0);
3586//ZZ             UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3587//ZZ             vassert(notCond <= 13); /* Neither AL nor NV */
3588//ZZ             delta = (delta >> 2) - 2;
3589//ZZ             *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3590         }
3591         goto done;
3592      }
3593
3594      case ARM64in_XAssisted: {
3595         /* Use ptmp for backpatching conditional jumps. */
3596         UInt* ptmp = NULL;
3597
3598         /* First off, if this is conditional, create a conditional
3599            jump over the rest of it.  Or at least, leave a space for
3600            it that we will shortly fill in.  I think this can only
3601            ever happen when VEX is driven by the switchbacker. */
3602         if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3603            vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3604            ptmp = p;
3605            *p++ = 0;
3606         }
3607
3608         /* Update the guest PC. */
3609         /* str r-dstGA, amPC */
3610         p = do_load_or_store64(p, False/*!isLoad*/,
3611                                iregEnc(i->ARM64in.XAssisted.dstGA),
3612                                i->ARM64in.XAssisted.amPC);
3613
3614         /* movw r21,  $magic_number */
3615         UInt trcval = 0;
3616         switch (i->ARM64in.XAssisted.jk) {
3617            case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
3618            case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3619            //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
3620            case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
3621            //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
3622            //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
3623            case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
3624            case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3625            case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
3626            case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
3627            case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
3628            //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
3629            case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
3630            /* We don't expect to see the following being assisted. */
3631            //case Ijk_Ret:
3632            //case Ijk_Call:
3633            /* fallthrough */
3634            default:
3635               ppIRJumpKind(i->ARM64in.XAssisted.jk);
3636               vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
3637                      "unexpected jump kind");
3638         }
3639         vassert(trcval != 0);
3640         p = imm64_to_ireg(p, /*x*/21, (ULong)trcval);
3641
3642         /* imm64 x9, VG_(disp_cp_xassisted) */
3643         /* br    x9 */
3644         p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xassisted);
3645         *p++ = 0xD61F0120; /* br x9 */
3646
3647         /* Fix up the conditional jump, if there was one. */
3648         if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3649            Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3650            vassert(delta > 0 && delta < 40);
3651            vassert((delta & 3) == 0);
3652            UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3653            vassert(notCond <= 13); /* Neither AL nor NV */
3654            vassert(ptmp != NULL);
3655            delta = delta >> 2;
3656            *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3657         }
3658         goto done;
3659      }
3660
3661      case ARM64in_CSel: {
3662         /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
3663         UInt dd   = iregEnc(i->ARM64in.CSel.dst);
3664         UInt nn   = iregEnc(i->ARM64in.CSel.argL);
3665         UInt mm   = iregEnc(i->ARM64in.CSel.argR);
3666         UInt cond = (UInt)i->ARM64in.CSel.cond;
3667         vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16);
3668         *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
3669         goto done;
3670      }
3671
3672      case ARM64in_Call: {
3673         /* We'll use x9 as a scratch register to put the target
3674            address in. */
3675         if (i->ARM64in.Call.cond != ARM64cc_AL
3676             && i->ARM64in.Call.rloc.pri != RLPri_None) {
3677            /* The call might not happen (it isn't unconditional) and
3678               it returns a result.  In this case we will need to
3679               generate a control flow diamond to put 0x555..555 in
3680               the return register(s) in the case where the call
3681               doesn't happen.  If this ever becomes necessary, maybe
3682               copy code from the 32-bit ARM equivalent.  Until that
3683               day, just give up. */
3684            goto bad;
3685         }
3686
3687         UInt* ptmp = NULL;
3688         if (i->ARM64in.Call.cond != ARM64cc_AL) {
3689            /* Create a hole to put a conditional branch in.  We'll
3690               patch it once we know the branch length. */
3691            ptmp = p;
3692            *p++ = 0;
3693         }
3694
3695         // x9 = &target
3696         p = imm64_to_ireg( (UInt*)p, /*x*/9, (ULong)i->ARM64in.Call.target );
3697         // blr x9
3698         *p++ = 0xD63F0120;
3699
3700         // Patch the hole if necessary
3701         if (i->ARM64in.Call.cond != ARM64cc_AL) {
3702            ULong dist = (ULong)(p - ptmp);
3703            /* imm64_to_ireg produces between 1 and 4 insns, and
3704               then there's the BLR itself.  Hence: */
3705            vassert(dist >= 2 && dist <= 5);
3706            vassert(ptmp != NULL);
3707            // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
3708            *ptmp = X_8_19_1_4(X01010100, dist, 0,
3709                               1 ^ (UInt)i->ARM64in.Call.cond);
3710         } else {
3711            vassert(ptmp == NULL);
3712         }
3713
3714         goto done;
3715      }
3716
3717      case ARM64in_AddToSP: {
3718         /* 10,0 10001 00 imm12 11111 11111  ADD xsp, xsp, #imm12
3719            11,0 10001 00 imm12 11111 11111  SUB xsp, xsp, #imm12
3720         */
3721         Int simm12 = i->ARM64in.AddToSP.simm;
3722         vassert(-4096 < simm12 && simm12 < 4096);
3723         vassert(0 == (simm12 & 0xF));
3724         if (simm12 >= 0) {
3725            *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
3726         } else {
3727            *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
3728         }
3729         goto done;
3730      }
3731
3732      case ARM64in_FromSP: {
3733         /* 10,0 10001 00 0..(12)..0 11111 dd  MOV Xd, xsp */
3734         UInt dd = iregEnc(i->ARM64in.FromSP.dst);
3735         vassert(dd < 31);
3736         *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
3737         goto done;
3738      }
3739
3740      case ARM64in_Mul: {
3741         /* 100 11011 110 mm 011111 nn dd   UMULH Xd, Xn,Xm
3742            100 11011 010 mm 011111 nn dd   SMULH Xd, Xn,Xm
3743            100 11011 000 mm 011111 nn dd   MUL   Xd, Xn,Xm
3744         */
3745         UInt dd = iregEnc(i->ARM64in.Mul.dst);
3746         UInt nn = iregEnc(i->ARM64in.Mul.argL);
3747         UInt mm = iregEnc(i->ARM64in.Mul.argR);
3748         vassert(dd < 31 && nn < 31 && mm < 31);
3749         switch (i->ARM64in.Mul.op) {
3750            case ARM64mul_ZX:
3751               *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
3752               goto done;
3753            case ARM64mul_SX:
3754               *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
3755               goto done;
3756            case ARM64mul_PLAIN:
3757               *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
3758               goto done;
3759            default:
3760               vassert(0);
3761         }
3762         goto bad;
3763      }
3764      case ARM64in_LdrEX: {
3765         /* 085F7C82   ldxrb w2, [x4]
3766            485F7C82   ldxrh w2, [x4]
3767            885F7C82   ldxr  w2, [x4]
3768            C85F7C82   ldxr  x2, [x4]
3769         */
3770         switch (i->ARM64in.LdrEX.szB) {
3771            case 1: *p++ = 0x085F7C82; goto done;
3772            case 2: *p++ = 0x485F7C82; goto done;
3773            case 4: *p++ = 0x885F7C82; goto done;
3774            case 8: *p++ = 0xC85F7C82; goto done;
3775            default: break;
3776         }
3777         goto bad;
3778      }
3779      case ARM64in_StrEX: {
3780         /* 08007C82   stxrb w0, w2, [x4]
3781            48007C82   stxrh w0, w2, [x4]
3782            88007C82   stxr  w0, w2, [x4]
3783            C8007C82   stxr  w0, x2, [x4]
3784         */
3785         switch (i->ARM64in.StrEX.szB) {
3786            case 1: *p++ = 0x08007C82; goto done;
3787            case 2: *p++ = 0x48007C82; goto done;
3788            case 4: *p++ = 0x88007C82; goto done;
3789            case 8: *p++ = 0xC8007C82; goto done;
3790            default: break;
3791         }
3792         goto bad;
3793      }
3794      case ARM64in_MFence: {
3795         *p++ = 0xD5033F9F; /* DSB sy */
3796         *p++ = 0xD5033FBF; /* DMB sy */
3797         *p++ = 0xD5033FDF; /* ISB */
3798         goto done;
3799      }
3800      //case ARM64in_CLREX: {
3801      //   //ATC, but believed to be correct
3802      //   goto bad;
3803      //   *p++ = 0xD5033F5F; /* clrex */
3804      //   goto done;
3805      //}
3806      case ARM64in_VLdStH: {
3807         /* 01 111101 01 imm12 n t   LDR Ht, [Xn|SP, #imm12 * 2]
3808            01 111101 00 imm12 n t   STR Ht, [Xn|SP, #imm12 * 2]
3809         */
3810         UInt hD     = dregEnc(i->ARM64in.VLdStH.hD);
3811         UInt rN     = iregEnc(i->ARM64in.VLdStH.rN);
3812         UInt uimm12 = i->ARM64in.VLdStH.uimm12;
3813         Bool isLD   = i->ARM64in.VLdStH.isLoad;
3814         vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
3815         uimm12 >>= 1;
3816         vassert(uimm12 < (1<<12));
3817         vassert(hD < 32);
3818         vassert(rN < 31);
3819         *p++ = X_2_6_2_12_5_5(X01, X111101, isLD ? X01 : X00,
3820                               uimm12, rN, hD);
3821         goto done;
3822      }
3823      case ARM64in_VLdStS: {
3824         /* 10 111101 01 imm12 n t   LDR St, [Xn|SP, #imm12 * 4]
3825            10 111101 00 imm12 n t   STR St, [Xn|SP, #imm12 * 4]
3826         */
3827         UInt sD     = dregEnc(i->ARM64in.VLdStS.sD);
3828         UInt rN     = iregEnc(i->ARM64in.VLdStS.rN);
3829         UInt uimm12 = i->ARM64in.VLdStS.uimm12;
3830         Bool isLD   = i->ARM64in.VLdStS.isLoad;
3831         vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
3832         uimm12 >>= 2;
3833         vassert(uimm12 < (1<<12));
3834         vassert(sD < 32);
3835         vassert(rN < 31);
3836         *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
3837                               uimm12, rN, sD);
3838         goto done;
3839      }
3840      case ARM64in_VLdStD: {
3841         /* 11 111101 01 imm12 n t   LDR Dt, [Xn|SP, #imm12 * 8]
3842            11 111101 00 imm12 n t   STR Dt, [Xn|SP, #imm12 * 8]
3843         */
3844         UInt dD     = dregEnc(i->ARM64in.VLdStD.dD);
3845         UInt rN     = iregEnc(i->ARM64in.VLdStD.rN);
3846         UInt uimm12 = i->ARM64in.VLdStD.uimm12;
3847         Bool isLD   = i->ARM64in.VLdStD.isLoad;
3848         vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
3849         uimm12 >>= 3;
3850         vassert(uimm12 < (1<<12));
3851         vassert(dD < 32);
3852         vassert(rN < 31);
3853         *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
3854                               uimm12, rN, dD);
3855         goto done;
3856      }
3857      case ARM64in_VLdStQ: {
3858         /* 0100 1100 0000 0000 0111 11 rN rQ   st1 {vQ.2d}, [<rN|SP>]
3859            0100 1100 0100 0000 0111 11 rN rQ   ld1 {vQ.2d}, [<rN|SP>]
3860         */
3861         UInt rQ = qregEnc(i->ARM64in.VLdStQ.rQ);
3862         UInt rN = iregEnc(i->ARM64in.VLdStQ.rN);
3863         vassert(rQ < 32);
3864         vassert(rN < 31);
3865         if (i->ARM64in.VLdStQ.isLoad) {
3866            *p++ = 0x4C407C00 | (rN << 5) | rQ;
3867         } else {
3868            *p++ = 0x4C007C00 | (rN << 5) | rQ;
3869         }
3870         goto done;
3871      }
3872      case ARM64in_VCvtI2F: {
3873         /* 31  28    23 21 20 18  15     9 4
3874            000 11110 00 1  00 010 000000 n d  SCVTF Sd, Wn
3875            000 11110 01 1  00 010 000000 n d  SCVTF Dd, Wn
3876            100 11110 00 1  00 010 000000 n d  SCVTF Sd, Xn
3877            100 11110 01 1  00 010 000000 n d  SCVTF Dd, Xn
3878            000 11110 00 1  00 011 000000 n d  UCVTF Sd, Wn
3879            000 11110 01 1  00 011 000000 n d  UCVTF Dd, Wn
3880            100 11110 00 1  00 011 000000 n d  UCVTF Sd, Xn
3881            100 11110 01 1  00 011 000000 n d  UCVTF Dd, Xn
3882         */
3883         UInt       rN = iregEnc(i->ARM64in.VCvtI2F.rS);
3884         UInt       rD = dregEnc(i->ARM64in.VCvtI2F.rD);
3885         ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
3886         /* Just handle cases as they show up. */
3887         switch (how) {
3888            case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
3889               *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
3890               break;
3891            case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
3892               *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
3893               break;
3894            case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
3895               *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
3896               break;
3897            case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
3898               *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
3899               break;
3900            case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
3901               *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
3902               break;
3903            case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
3904               *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
3905               break;
3906            case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
3907               *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
3908               break;
3909            case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn  */
3910               *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
3911               break;
3912            default:
3913               goto bad; //ATC
3914         }
3915         goto done;
3916      }
3917      case ARM64in_VCvtF2I: {
3918         /*    30       23   20 18  15     9 4
3919            sf 00,11110,0x 1 00 000,000000 n d  FCVTNS Rd, Fn (round to
3920            sf 00,11110,0x 1 00 001,000000 n d  FCVTNU Rd, Fn  nearest)
3921            ---------------- 01 --------------  FCVTP-------- (round to +inf)
3922            ---------------- 10 --------------  FCVTM-------- (round to -inf)
3923            ---------------- 11 --------------  FCVTZ-------- (round to zero)
3924
3925            Rd is Xd when sf==1, Wd when sf==0
3926            Fn is Dn when x==1, Sn when x==0
3927            20:19 carry the rounding mode, using the same encoding as FPCR
3928         */
3929         UInt       rD    = iregEnc(i->ARM64in.VCvtF2I.rD);
3930         UInt       rN    = dregEnc(i->ARM64in.VCvtF2I.rS);
3931         ARM64CvtOp how   = i->ARM64in.VCvtF2I.how;
3932         UChar      armRM = i->ARM64in.VCvtF2I.armRM;
3933         /* Just handle cases as they show up. */
3934         switch (how) {
3935            case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
3936               *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
3937                                    X000000, rN, rD);
3938               break;
3939            case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
3940               *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
3941                                    X000000, rN, rD);
3942               break;
3943            case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
3944               *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
3945                                    X000000, rN, rD);
3946               break;
3947            case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
3948               *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
3949                                    X000000, rN, rD);
3950               break;
3951            case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
3952               *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
3953                                    X000000, rN, rD);
3954               break;
3955            case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
3956               *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
3957                                    X000000, rN, rD);
3958               break;
3959            case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
3960               *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
3961                                    X000000, rN, rD);
3962               break;
3963            case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
3964               *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
3965                                    X000000, rN, rD);
3966               break;
3967            default:
3968               goto bad; //ATC
3969         }
3970         goto done;
3971      }
3972      case ARM64in_VCvtSD: {
3973         /* 31         23 21    16  14    9 4
3974            000,11110, 00 10001 0,1 10000 n d   FCVT Dd, Sn (S->D)
3975            ---------- 01 ----- 0,0 ---------   FCVT Sd, Dn (D->S)
3976            Rounding, when dst is smaller than src, is per the FPCR.
3977         */
3978         UInt dd = dregEnc(i->ARM64in.VCvtSD.dst);
3979         UInt nn = dregEnc(i->ARM64in.VCvtSD.src);
3980         if (i->ARM64in.VCvtSD.sToD) {
3981            *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
3982         } else {
3983            *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
3984         }
3985         goto done;
3986      }
3987      case ARM64in_VCvtHS: {
3988         /* 31         23 21    16  14    9 4
3989            000,11110, 11 10001 0,0 10000 n d   FCVT Sd, Hn (H->S)
3990            ---------- 00 ----- 1,1 ---------   FCVT Hd, Sn (S->H)
3991            Rounding, when dst is smaller than src, is per the FPCR.
3992         */
3993         UInt dd = dregEnc(i->ARM64in.VCvtHS.dst);
3994         UInt nn = dregEnc(i->ARM64in.VCvtHS.src);
3995         if (i->ARM64in.VCvtHS.hToS) {
3996            *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X010000, nn, dd);
3997         } else {
3998            *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X110000, nn, dd);
3999         }
4000         goto done;
4001      }
4002      case ARM64in_VCvtHD: {
4003         /* 31         23 21    16  14    9 4
4004            000,11110, 11 10001 0,1 10000 n d   FCVT Dd, Hn (H->D)
4005            ---------- 01 ----- 1,1 ---------   FCVT Hd, Dn (D->H)
4006            Rounding, when dst is smaller than src, is per the FPCR.
4007         */
4008         UInt dd = dregEnc(i->ARM64in.VCvtHD.dst);
4009         UInt nn = dregEnc(i->ARM64in.VCvtHD.src);
4010         if (i->ARM64in.VCvtHD.hToD) {
4011            *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X110000, nn, dd);
4012         } else {
4013            *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X110000, nn, dd);
4014         }
4015         goto done;
4016      }
4017      case ARM64in_VUnaryD: {
4018         /* 31        23 21     16 14    9 4
4019            000,11110 01 1,0000 0,0 10000 n d  FMOV Dd, Dn (not handled)
4020            ------------------- 0,1 ---------  FABS ------
4021            ------------------- 1,0 ---------  FNEG ------
4022            ------------------- 1,1 ---------  FSQRT -----
4023         */
4024         UInt dD  = dregEnc(i->ARM64in.VUnaryD.dst);
4025         UInt dN  = dregEnc(i->ARM64in.VUnaryD.src);
4026         UInt b16 = 2; /* impossible */
4027         UInt b15 = 2; /* impossible */
4028         switch (i->ARM64in.VUnaryD.op) {
4029            case ARM64fpu_NEG:  b16 = 1; b15 = 0; break;
4030            case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4031            case ARM64fpu_ABS:  b16 = 0; b15 = 1; break;
4032            default: break;
4033         }
4034         if (b16 < 2 && b15 < 2) {
4035            *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
4036                                 (b15 << 5) | X10000, dN, dD);
4037            goto done;
4038         }
4039         /*
4040            000, 11110 01 1,001 11,1 10000 n d  FRINTI Dd, Dm (round per FPCR)
4041         */
4042         if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
4043           *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
4044           goto done;
4045         }
4046         /*
4047            010, 11110 11 1,0000 1,1111 10 n d  FRECPX Dd, Dm
4048         */
4049         if (i->ARM64in.VUnaryD.op == ARM64fpu_RECPX) {
4050           *p++ = X_3_8_5_6_5_5(X010, X11110111, X00001, X111110, dN, dD);
4051           goto done;
4052         }
4053         goto bad;
4054      }
4055      case ARM64in_VUnaryS: {
4056         /* 31        23 21     16 14    9 4
4057            000,11110 00 1,0000 0,0 10000 n d  FMOV Sd, Sn (not handled)
4058            ------------------- 0,1 ---------  FABS ------
4059            ------------------- 1,0 ---------  FNEG ------
4060            ------------------- 1,1 ---------  FSQRT -----
4061         */
4062         UInt sD  = dregEnc(i->ARM64in.VUnaryS.dst);
4063         UInt sN  = dregEnc(i->ARM64in.VUnaryS.src);
4064         UInt b16 = 2; /* impossible */
4065         UInt b15 = 2; /* impossible */
4066         switch (i->ARM64in.VUnaryS.op) {
4067            case ARM64fpu_NEG:  b16 = 1; b15 = 0; break;
4068            case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4069            case ARM64fpu_ABS:  b16 = 0; b15 = 1; break;
4070            default: break;
4071         }
4072         if (b16 < 2 && b15 < 2) {
4073            *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
4074                                 (b15 << 5) | X10000, sN, sD);
4075            goto done;
4076         }
4077         /*
4078            000, 11110 00 1,001 11,1 10000 n d  FRINTI Sd, Sm (round per FPCR)
4079         */
4080         if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
4081           *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
4082           goto done;
4083         }
4084         /*
4085            010, 11110 10 1,0000 1,1111 10 n d  FRECPX Sd, Sm
4086         */
4087         if (i->ARM64in.VUnaryS.op == ARM64fpu_RECPX) {
4088           *p++ = X_3_8_5_6_5_5(X010, X11110101, X00001, X111110, sN, sD);
4089           goto done;
4090         }
4091         goto bad;
4092      }
4093      case ARM64in_VBinD: {
4094         /* 31        23  20 15   11 9 4
4095            ---------------- 0000 ------   FMUL  --------
4096            000 11110 011 m  0001 10 n d   FDIV  Dd,Dn,Dm
4097            ---------------- 0010 ------   FADD  --------
4098            ---------------- 0011 ------   FSUB  --------
4099         */
4100         UInt dD = dregEnc(i->ARM64in.VBinD.dst);
4101         UInt dN = dregEnc(i->ARM64in.VBinD.argL);
4102         UInt dM = dregEnc(i->ARM64in.VBinD.argR);
4103         UInt b1512 = 16; /* impossible */
4104         switch (i->ARM64in.VBinD.op) {
4105            case ARM64fpb_DIV: b1512 = X0001; break;
4106            case ARM64fpb_MUL: b1512 = X0000; break;
4107            case ARM64fpb_SUB: b1512 = X0011; break;
4108            case ARM64fpb_ADD: b1512 = X0010; break;
4109            default: goto bad;
4110         }
4111         vassert(b1512 < 16);
4112         *p++
4113            = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
4114         goto done;
4115      }
4116      case ARM64in_VBinS: {
4117         /* 31        23  20 15   11 9 4
4118            ---------------- 0000 ------   FMUL  --------
4119            000 11110 001 m  0001 10 n d   FDIV  Dd,Dn,Dm
4120            ---------------- 0010 ------   FADD  --------
4121            ---------------- 0011 ------   FSUB  --------
4122         */
4123         UInt sD = dregEnc(i->ARM64in.VBinS.dst);
4124         UInt sN = dregEnc(i->ARM64in.VBinS.argL);
4125         UInt sM = dregEnc(i->ARM64in.VBinS.argR);
4126         UInt b1512 = 16; /* impossible */
4127         switch (i->ARM64in.VBinS.op) {
4128            case ARM64fpb_DIV: b1512 = X0001; break;
4129            case ARM64fpb_MUL: b1512 = X0000; break;
4130            case ARM64fpb_SUB: b1512 = X0011; break;
4131            case ARM64fpb_ADD: b1512 = X0010; break;
4132            default: goto bad;
4133         }
4134         vassert(b1512 < 16);
4135         *p++
4136            = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
4137         goto done;
4138      }
4139      case ARM64in_VCmpD: {
4140         /* 000 11110 01 1 m 00 1000 n 00 000  FCMP Dn, Dm */
4141         UInt dN = dregEnc(i->ARM64in.VCmpD.argL);
4142         UInt dM = dregEnc(i->ARM64in.VCmpD.argR);
4143         *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
4144         goto done;
4145      }
4146      case ARM64in_VCmpS: {
4147         /* 000 11110 00 1 m 00 1000 n 00 000  FCMP Sn, Sm */
4148         UInt sN = dregEnc(i->ARM64in.VCmpS.argL);
4149         UInt sM = dregEnc(i->ARM64in.VCmpS.argR);
4150         *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
4151         goto done;
4152      }
4153      case ARM64in_VFCSel: {
4154         /* 31        23 21 20 15   11 9 5
4155            000 11110 00 1  m  cond 11 n d  FCSEL Sd,Sn,Sm,cond
4156            000 11110 01 1  m  cond 11 n d  FCSEL Dd,Dn,Dm,cond
4157         */
4158         Bool isD  = i->ARM64in.VFCSel.isD;
4159         UInt dd   = dregEnc(i->ARM64in.VFCSel.dst);
4160         UInt nn   = dregEnc(i->ARM64in.VFCSel.argL);
4161         UInt mm   = dregEnc(i->ARM64in.VFCSel.argR);
4162         UInt cond = (UInt)i->ARM64in.VFCSel.cond;
4163         vassert(cond < 16);
4164         *p++ = X_3_8_5_6_5_5(X000, isD ? X11110011 : X11110001,
4165                              mm, (cond << 2) | X000011, nn, dd);
4166         goto done;
4167      }
4168      case ARM64in_FPCR: {
4169         Bool toFPCR = i->ARM64in.FPCR.toFPCR;
4170         UInt iReg   = iregEnc(i->ARM64in.FPCR.iReg);
4171         if (toFPCR) {
4172            /* 0xD51B44 000 Rt  MSR fpcr, rT */
4173            *p++ = 0xD51B4400 | (iReg & 0x1F);
4174            goto done;
4175         }
4176         goto bad; // FPCR -> iReg case currently ATC
4177      }
4178      case ARM64in_FPSR: {
4179         Bool toFPSR = i->ARM64in.FPSR.toFPSR;
4180         UInt iReg   = iregEnc(i->ARM64in.FPSR.iReg);
4181         if (toFPSR) {
4182            /* 0xD51B44 001 Rt  MSR fpsr, rT */
4183            *p++ = 0xD51B4420 | (iReg & 0x1F);
4184         } else {
4185            /* 0xD53B44 001 Rt  MRS rT, fpsr */
4186            *p++ = 0xD53B4420 | (iReg & 0x1F);
4187         }
4188         goto done;
4189      }
4190      case ARM64in_VBinV: {
4191         /* 31        23   20 15     9 4
4192            010 01110 11 1 m  100001 n d   ADD Vd.2d,  Vn.2d,  Vm.2d
4193            010 01110 10 1 m  100001 n d   ADD Vd.4s,  Vn.4s,  Vm.4s
4194            010 01110 01 1 m  100001 n d   ADD Vd.8h,  Vn.8h,  Vm.8h
4195            010 01110 00 1 m  100001 n d   ADD Vd.16b, Vn.16b, Vm.16b
4196
4197            011 01110 11 1 m  100001 n d   SUB Vd.2d,  Vn.2d,  Vm.2d
4198            011 01110 10 1 m  100001 n d   SUB Vd.4s,  Vn.4s,  Vm.4s
4199            011 01110 01 1 m  100001 n d   SUB Vd.8h,  Vn.8h,  Vm.8h
4200            011 01110 00 1 m  100001 n d   SUB Vd.16b, Vn.16b, Vm.16b
4201
4202            010 01110 10 1 m  100111 n d   MUL Vd.4s,  Vn.4s,  Vm.4s
4203            010 01110 01 1 m  100111 n d   MUL Vd.8h,  Vn.8h,  Vm.8h
4204            010 01110 00 1 m  100111 n d   MUL Vd.16b, Vn.16b, Vm.16b
4205
4206            010 01110 01 1 m  110101 n d   FADD Vd.2d, Vn.2d, Vm.2d
4207            010 01110 00 1 m  110101 n d   FADD Vd.4s, Vn.4s, Vm.4s
4208            010 01110 11 1 m  110101 n d   FSUB Vd.2d, Vn.2d, Vm.2d
4209            010 01110 10 1 m  110101 n d   FSUB Vd.4s, Vn.4s, Vm.4s
4210
4211            011 01110 01 1 m  110111 n d   FMUL Vd.2d, Vn.2d, Vm.2d
4212            011 01110 00 1 m  110111 n d   FMUL Vd.4s, Vn.4s, Vm.4s
4213            011 01110 01 1 m  111111 n d   FDIV Vd.2d, Vn.2d, Vm.2d
4214            011 01110 00 1 m  111111 n d   FDIV Vd.4s, Vn.4s, Vm.4s
4215
4216            010 01110 01 1 m  111101 n d   FMAX Vd.2d, Vn.2d, Vm.2d
4217            010 01110 00 1 m  111101 n d   FMAX Vd.4s, Vn.4s, Vm.4s
4218            010 01110 11 1 m  111101 n d   FMIN Vd.2d, Vn.2d, Vm.2d
4219            010 01110 10 1 m  111101 n d   FMIN Vd.4s, Vn.4s, Vm.4s
4220
4221            011 01110 10 1 m  011001 n d   UMAX Vd.4s,  Vn.4s,  Vm.4s
4222            011 01110 01 1 m  011001 n d   UMAX Vd.8h,  Vn.8h,  Vm.8h
4223            011 01110 00 1 m  011001 n d   UMAX Vd.16b, Vn.16b, Vm.16b
4224
4225            011 01110 10 1 m  011011 n d   UMIN Vd.4s,  Vn.4s,  Vm.4s
4226            011 01110 01 1 m  011011 n d   UMIN Vd.8h,  Vn.8h,  Vm.8h
4227            011 01110 00 1 m  011011 n d   UMIN Vd.16b, Vn.16b, Vm.16b
4228
4229            010 01110 10 1 m  011001 n d   SMAX Vd.4s,  Vn.4s,  Vm.4s
4230            010 01110 01 1 m  011001 n d   SMAX Vd.8h,  Vn.8h,  Vm.8h
4231            010 01110 00 1 m  011001 n d   SMAX Vd.16b, Vn.16b, Vm.16b
4232
4233            010 01110 10 1 m  011011 n d   SMIN Vd.4s,  Vn.4s,  Vm.4s
4234            010 01110 01 1 m  011011 n d   SMIN Vd.8h,  Vn.8h,  Vm.8h
4235            010 01110 00 1 m  011011 n d   SMIN Vd.16b, Vn.16b, Vm.16b
4236
4237            010 01110 00 1 m  000111 n d   AND Vd, Vn, Vm
4238            010 01110 10 1 m  000111 n d   ORR Vd, Vn, Vm
4239            011 01110 00 1 m  000111 n d   EOR Vd, Vn, Vm
4240
4241            011 01110 11 1 m  100011 n d   CMEQ Vd.2d,  Vn.2d,  Vm.2d
4242            011 01110 10 1 m  100011 n d   CMEQ Vd.4s,  Vn.4s,  Vm.4s
4243            011 01110 01 1 m  100011 n d   CMEQ Vd.8h,  Vn.8h,  Vm.8h
4244            011 01110 00 1 m  100011 n d   CMEQ Vd.16b, Vn.16b, Vm.16b
4245
4246            011 01110 11 1 m  001101 n d   CMHI Vd.2d,  Vn.2d,  Vm.2d
4247            011 01110 10 1 m  001101 n d   CMHI Vd.4s,  Vn.4s,  Vm.4s
4248            011 01110 01 1 m  001101 n d   CMHI Vd.8h,  Vn.8h,  Vm.8h
4249            011 01110 00 1 m  001101 n d   CMHI Vd.16b, Vn.16b, Vm.16b
4250
4251            010 01110 11 1 m  001101 n d   CMGT Vd.2d,  Vn.2d,  Vm.2d
4252            010 01110 10 1 m  001101 n d   CMGT Vd.4s,  Vn.4s,  Vm.4s
4253            010 01110 01 1 m  001101 n d   CMGT Vd.8h,  Vn.8h,  Vm.8h
4254            010 01110 00 1 m  001101 n d   CMGT Vd.16b, Vn.16b, Vm.16b
4255
4256            010 01110 01 1 m  111001 n d   FCMEQ Vd.2d, Vn.2d, Vm.2d
4257            010 01110 00 1 m  111001 n d   FCMEQ Vd.4s, Vn.4s, Vm.4s
4258
4259            011 01110 01 1 m  111001 n d   FCMGE Vd.2d, Vn.2d, Vm.2d
4260            011 01110 00 1 m  111001 n d   FCMGE Vd.4s, Vn.4s, Vm.4s
4261
4262            011 01110 11 1 m  111001 n d   FCMGT Vd.2d, Vn.2d, Vm.2d
4263            011 01110 10 1 m  111001 n d   FCMGT Vd.4s, Vn.4s, Vm.4s
4264
4265            010 01110 00 0 m  000000 n d   TBL Vd.16b, {Vn.16b}, Vm.16b
4266
4267            010 01110 11 0 m  000110 n d   UZP1 Vd.2d,  Vn.2d,  Vm.2d
4268            010 01110 10 0 m  000110 n d   UZP1 Vd.4s,  Vn.4s,  Vm.4s
4269            010 01110 01 0 m  000110 n d   UZP1 Vd.8h,  Vn.8h,  Vm.8h
4270            010 01110 00 0 m  000110 n d   UZP1 Vd.16b, Vn.16b, Vm.16b
4271
4272            010 01110 11 0 m  010110 n d   UZP2 Vd.2d,  Vn.2d,  Vm.2d
4273            010 01110 10 0 m  010110 n d   UZP2 Vd.4s,  Vn.4s,  Vm.4s
4274            010 01110 01 0 m  010110 n d   UZP2 Vd.8h,  Vn.8h,  Vm.8h
4275            010 01110 00 0 m  010110 n d   UZP2 Vd.16b, Vn.16b, Vm.16b
4276
4277            010 01110 10 0 m  001110 n d   ZIP1 Vd.4s,  Vn.4s,  Vm.4s
4278            010 01110 01 0 m  001110 n d   ZIP1 Vd.8h,  Vn.8h,  Vm.8h
4279            010 01110 10 0 m  001110 n d   ZIP1 Vd.16b, Vn.16b, Vm.16b
4280
4281            010 01110 10 0 m  011110 n d   ZIP2 Vd.4s,  Vn.4s,  Vm.4s
4282            010 01110 01 0 m  011110 n d   ZIP2 Vd.8h,  Vn.8h,  Vm.8h
4283            010 01110 10 0 m  011110 n d   ZIP2 Vd.16b, Vn.16b, Vm.16b
4284
4285            011 01110 00 1 m  100111 n d   PMUL Vd.16b, Vn.16b, Vm.16b
4286
4287            000 01110 00 1 m  111000 n d   PMULL Vd.8h, Vn.8b, Vm.8b
4288
4289            001 01110 10 1 m  110000 n d   UMULL Vd.2d, Vn.2s, Vm.2s
4290            001 01110 01 1 m  110000 n d   UMULL Vd.4s, Vn.4h, Vm.4h
4291            001 01110 00 1 m  110000 n d   UMULL Vd.8h, Vn.8b, Vm.8b
4292
4293            000 01110 10 1 m  110000 n d   SMULL Vd.2d, Vn.2s, Vm.2s
4294            000 01110 01 1 m  110000 n d   SMULL Vd.4s, Vn.4h, Vm.4h
4295            000 01110 00 1 m  110000 n d   SMULL Vd.8h, Vn.8b, Vm.8b
4296
4297            010 01110 11 1 m  000011 n d   SQADD Vd.2d,  Vn.2d,  Vm.2d
4298            010 01110 10 1 m  000011 n d   SQADD Vd.4s,  Vn.4s,  Vm.4s
4299            010 01110 01 1 m  000011 n d   SQADD Vd.8h,  Vn.8h,  Vm.8h
4300            010 01110 00 1 m  000011 n d   SQADD Vd.16b, Vn.16b, Vm.16b
4301
4302            011 01110 11 1 m  000011 n d   UQADD Vd.2d,  Vn.2d,  Vm.2d
4303            011 01110 10 1 m  000011 n d   UQADD Vd.4s,  Vn.4s,  Vm.4s
4304            011 01110 01 1 m  000011 n d   UQADD Vd.8h,  Vn.8h,  Vm.8h
4305            011 01110 00 1 m  000011 n d   UQADD Vd.16b, Vn.16b, Vm.16b
4306
4307            010 01110 11 1 m  001011 n d   SQSUB Vd.2d,  Vn.2d,  Vm.2d
4308            010 01110 10 1 m  001011 n d   SQSUB Vd.4s,  Vn.4s,  Vm.4s
4309            010 01110 01 1 m  001011 n d   SQSUB Vd.8h,  Vn.8h,  Vm.8h
4310            010 01110 00 1 m  001011 n d   SQSUB Vd.16b, Vn.16b, Vm.16b
4311
4312            011 01110 11 1 m  001011 n d   UQSUB Vd.2d,  Vn.2d,  Vm.2d
4313            011 01110 10 1 m  001011 n d   UQSUB Vd.4s,  Vn.4s,  Vm.4s
4314            011 01110 01 1 m  001011 n d   UQSUB Vd.8h,  Vn.8h,  Vm.8h
4315            011 01110 00 1 m  001011 n d   UQSUB Vd.16b, Vn.16b, Vm.16b
4316
4317            000 01110 10 1 m  110100 n d   SQDMULL Vd.2d, Vn.2s, Vm.2s
4318            000 01110 01 1 m  110100 n d   SQDMULL Vd.4s, Vn.4h, Vm.4h
4319
4320            010 01110 10 1 m  101101 n d   SQDMULH   Vd.4s,  Vn.4s,  Vm.4s
4321            010 01110 01 1 m  101101 n d   SQDMULH   Vd.8h,  Vn.8h,  Vm.8h
4322            011 01110 10 1 m  101101 n d   SQRDMULH  Vd.4s,  Vn.4s,  Vm.4s
4323            011 01110 10 1 m  101101 n d   SQRDMULH  Vd.8h,  Vn.8h,  Vm.8h
4324
4325            010 01110 sz 1 m  010011 n d   SQSHL@sz   Vd, Vn, Vm
4326            010 01110 sz 1 m  010111 n d   SQRSHL@sz  Vd, Vn, Vm
4327            011 01110 sz 1 m  010011 n d   UQSHL@sz   Vd, Vn, Vm
4328            011 01110 sz 1 m  010111 n d   URQSHL@sz  Vd, Vn, Vm
4329
4330            010 01110 sz 1 m  010001 n d   SSHL@sz   Vd, Vn, Vm
4331            010 01110 sz 1 m  010101 n d   SRSHL@sz  Vd, Vn, Vm
4332            011 01110 sz 1 m  010001 n d   USHL@sz   Vd, Vn, Vm
4333            011 01110 sz 1 m  010101 n d   URSHL@sz  Vd, Vn, Vm
4334
4335            010 01110 01 1 m  111111 n d   FRECPS  Vd.2d, Vn.2d, Vm.2d
4336            010 01110 00 1 m  111111 n d   FRECPS  Vd.4s, Vn.4s, Vm.4s
4337            010 01110 11 1 m  111111 n d   FRSQRTS Vd.2d, Vn.2d, Vm.2d
4338            010 01110 10 1 m  111111 n d   FRSQRTS Vd.4s, Vn.4s, Vm.4s
4339         */
4340         UInt vD = qregEnc(i->ARM64in.VBinV.dst);
4341         UInt vN = qregEnc(i->ARM64in.VBinV.argL);
4342         UInt vM = qregEnc(i->ARM64in.VBinV.argR);
4343         switch (i->ARM64in.VBinV.op) {
4344            case ARM64vecb_ADD64x2:
4345               *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
4346               break;
4347            case ARM64vecb_ADD32x4:
4348               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
4349               break;
4350            case ARM64vecb_ADD16x8:
4351               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
4352               break;
4353            case ARM64vecb_ADD8x16:
4354               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
4355               break;
4356            case ARM64vecb_SUB64x2:
4357               *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
4358               break;
4359            case ARM64vecb_SUB32x4:
4360               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD);
4361               break;
4362            case ARM64vecb_SUB16x8:
4363               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
4364               break;
4365            case ARM64vecb_SUB8x16:
4366               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
4367               break;
4368            case ARM64vecb_MUL32x4:
4369               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
4370               break;
4371            case ARM64vecb_MUL16x8:
4372               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
4373               break;
4374            case ARM64vecb_MUL8x16:
4375               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD);
4376               break;
4377            case ARM64vecb_FADD64x2:
4378               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
4379               break;
4380            case ARM64vecb_FADD32x4:
4381               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD);
4382               break;
4383            case ARM64vecb_FSUB64x2:
4384               *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD);
4385               break;
4386            case ARM64vecb_FSUB32x4:
4387               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD);
4388               break;
4389            case ARM64vecb_FMUL64x2:
4390               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD);
4391               break;
4392            case ARM64vecb_FMUL32x4:
4393               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD);
4394               break;
4395            case ARM64vecb_FDIV64x2:
4396               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD);
4397               break;
4398            case ARM64vecb_FDIV32x4:
4399               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
4400               break;
4401
4402            case ARM64vecb_FMAX64x2:
4403               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111101, vN, vD);
4404               break;
4405            case ARM64vecb_FMAX32x4:
4406               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111101, vN, vD);
4407               break;
4408            case ARM64vecb_FMIN64x2:
4409               *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111101, vN, vD);
4410               break;
4411            case ARM64vecb_FMIN32x4:
4412               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111101, vN, vD);
4413               break;
4414
4415            case ARM64vecb_UMAX32x4:
4416               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
4417               break;
4418            case ARM64vecb_UMAX16x8:
4419               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD);
4420               break;
4421            case ARM64vecb_UMAX8x16:
4422               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD);
4423               break;
4424
4425            case ARM64vecb_UMIN32x4:
4426               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD);
4427               break;
4428            case ARM64vecb_UMIN16x8:
4429               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
4430               break;
4431            case ARM64vecb_UMIN8x16:
4432               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
4433               break;
4434
4435            case ARM64vecb_SMAX32x4:
4436               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
4437               break;
4438            case ARM64vecb_SMAX16x8:
4439               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
4440               break;
4441            case ARM64vecb_SMAX8x16:
4442               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
4443               break;
4444
4445            case ARM64vecb_SMIN32x4:
4446               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
4447               break;
4448            case ARM64vecb_SMIN16x8:
4449               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
4450               break;
4451            case ARM64vecb_SMIN8x16:
4452               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
4453               break;
4454
4455            case ARM64vecb_AND:
4456               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
4457               break;
4458            case ARM64vecb_ORR:
4459               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
4460               break;
4461            case ARM64vecb_XOR:
4462               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD);
4463               break;
4464
4465            case ARM64vecb_CMEQ64x2:
4466               *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD);
4467               break;
4468            case ARM64vecb_CMEQ32x4:
4469               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD);
4470               break;
4471            case ARM64vecb_CMEQ16x8:
4472               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD);
4473               break;
4474            case ARM64vecb_CMEQ8x16:
4475               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
4476               break;
4477
4478            case ARM64vecb_CMHI64x2:
4479               *p++ = X_3_8_5_6_5_5(X011, X01110111, vM,  X001101, vN, vD);
4480               break;
4481            case ARM64vecb_CMHI32x4:
4482               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM,  X001101, vN, vD);
4483               break;
4484            case ARM64vecb_CMHI16x8:
4485               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM,  X001101, vN, vD);
4486               break;
4487            case ARM64vecb_CMHI8x16:
4488               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM,  X001101, vN, vD);
4489               break;
4490
4491            case ARM64vecb_CMGT64x2:
4492               *p++ = X_3_8_5_6_5_5(X010, X01110111, vM,  X001101, vN, vD);
4493               break;
4494            case ARM64vecb_CMGT32x4:
4495               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM,  X001101, vN, vD);
4496               break;
4497            case ARM64vecb_CMGT16x8:
4498               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM,  X001101, vN, vD);
4499               break;
4500            case ARM64vecb_CMGT8x16:
4501               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM,  X001101, vN, vD);
4502               break;
4503
4504            case ARM64vecb_FCMEQ64x2:
4505               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
4506               break;
4507            case ARM64vecb_FCMEQ32x4:
4508               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
4509               break;
4510
4511            case ARM64vecb_FCMGE64x2:
4512               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
4513               break;
4514            case ARM64vecb_FCMGE32x4:
4515               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
4516               break;
4517
4518            case ARM64vecb_FCMGT64x2:
4519               *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
4520               break;
4521            case ARM64vecb_FCMGT32x4:
4522               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
4523               break;
4524
4525            case ARM64vecb_TBL1:
4526               *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD);
4527               break;
4528
4529            case ARM64vecb_UZP164x2:
4530               *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X000110, vN, vD);
4531               break;
4532            case ARM64vecb_UZP132x4:
4533               *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X000110, vN, vD);
4534               break;
4535            case ARM64vecb_UZP116x8:
4536               *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X000110, vN, vD);
4537               break;
4538            case ARM64vecb_UZP18x16:
4539               *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000110, vN, vD);
4540               break;
4541
4542            case ARM64vecb_UZP264x2:
4543               *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X010110, vN, vD);
4544               break;
4545            case ARM64vecb_UZP232x4:
4546               *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X010110, vN, vD);
4547               break;
4548            case ARM64vecb_UZP216x8:
4549               *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X010110, vN, vD);
4550               break;
4551            case ARM64vecb_UZP28x16:
4552               *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X010110, vN, vD);
4553               break;
4554
4555            case ARM64vecb_ZIP132x4:
4556               *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X001110, vN, vD);
4557               break;
4558            case ARM64vecb_ZIP116x8:
4559               *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X001110, vN, vD);
4560               break;
4561            case ARM64vecb_ZIP18x16:
4562               *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X001110, vN, vD);
4563               break;
4564
4565            case ARM64vecb_ZIP232x4:
4566               *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X011110, vN, vD);
4567               break;
4568            case ARM64vecb_ZIP216x8:
4569               *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X011110, vN, vD);
4570               break;
4571            case ARM64vecb_ZIP28x16:
4572               *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD);
4573               break;
4574
4575            case ARM64vecb_PMUL8x16:
4576               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
4577               break;
4578
4579            case ARM64vecb_PMULL8x8:
4580               *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD);
4581               break;
4582
4583            case ARM64vecb_UMULL2DSS:
4584               *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
4585               break;
4586            case ARM64vecb_UMULL4SHH:
4587               *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
4588               break;
4589            case ARM64vecb_UMULL8HBB:
4590               *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
4591               break;
4592
4593            case ARM64vecb_SMULL2DSS:
4594               *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110000, vN, vD);
4595               break;
4596            case ARM64vecb_SMULL4SHH:
4597               *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110000, vN, vD);
4598               break;
4599            case ARM64vecb_SMULL8HBB:
4600               *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X110000, vN, vD);
4601               break;
4602
4603            case ARM64vecb_SQADD64x2:
4604               *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X000011, vN, vD);
4605               break;
4606            case ARM64vecb_SQADD32x4:
4607               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000011, vN, vD);
4608               break;
4609            case ARM64vecb_SQADD16x8:
4610               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X000011, vN, vD);
4611               break;
4612            case ARM64vecb_SQADD8x16:
4613               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000011, vN, vD);
4614               break;
4615
4616            case ARM64vecb_UQADD64x2:
4617               *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X000011, vN, vD);
4618               break;
4619            case ARM64vecb_UQADD32x4:
4620               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X000011, vN, vD);
4621               break;
4622            case ARM64vecb_UQADD16x8:
4623               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X000011, vN, vD);
4624               break;
4625            case ARM64vecb_UQADD8x16:
4626               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000011, vN, vD);
4627               break;
4628
4629            case ARM64vecb_SQSUB64x2:
4630               *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001011, vN, vD);
4631               break;
4632            case ARM64vecb_SQSUB32x4:
4633               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001011, vN, vD);
4634               break;
4635            case ARM64vecb_SQSUB16x8:
4636               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001011, vN, vD);
4637               break;
4638            case ARM64vecb_SQSUB8x16:
4639               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001011, vN, vD);
4640               break;
4641
4642            case ARM64vecb_UQSUB64x2:
4643               *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001011, vN, vD);
4644               break;
4645            case ARM64vecb_UQSUB32x4:
4646               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001011, vN, vD);
4647               break;
4648            case ARM64vecb_UQSUB16x8:
4649               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001011, vN, vD);
4650               break;
4651            case ARM64vecb_UQSUB8x16:
4652               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001011, vN, vD);
4653               break;
4654
4655            case ARM64vecb_SQDMULL2DSS:
4656               *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110100, vN, vD);
4657               break;
4658            case ARM64vecb_SQDMULL4SHH:
4659               *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110100, vN, vD);
4660               break;
4661
4662            case ARM64vecb_SQDMULH32x4:
4663               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X101101, vN, vD);
4664               break;
4665            case ARM64vecb_SQDMULH16x8:
4666               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X101101, vN, vD);
4667               break;
4668            case ARM64vecb_SQRDMULH32x4:
4669               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X101101, vN, vD);
4670               break;
4671            case ARM64vecb_SQRDMULH16x8:
4672               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X101101, vN, vD);
4673               break;
4674
4675            case ARM64vecb_SQSHL64x2:
4676               *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010011, vN, vD);
4677               break;
4678            case ARM64vecb_SQSHL32x4:
4679               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010011, vN, vD);
4680               break;
4681            case ARM64vecb_SQSHL16x8:
4682               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010011, vN, vD);
4683               break;
4684            case ARM64vecb_SQSHL8x16:
4685               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010011, vN, vD);
4686               break;
4687
4688            case ARM64vecb_SQRSHL64x2:
4689               *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010111, vN, vD);
4690               break;
4691            case ARM64vecb_SQRSHL32x4:
4692               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010111, vN, vD);
4693               break;
4694            case ARM64vecb_SQRSHL16x8:
4695               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010111, vN, vD);
4696               break;
4697            case ARM64vecb_SQRSHL8x16:
4698               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010111, vN, vD);
4699               break;
4700
4701            case ARM64vecb_UQSHL64x2:
4702               *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010011, vN, vD);
4703               break;
4704            case ARM64vecb_UQSHL32x4:
4705               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010011, vN, vD);
4706               break;
4707            case ARM64vecb_UQSHL16x8:
4708               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010011, vN, vD);
4709               break;
4710            case ARM64vecb_UQSHL8x16:
4711               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010011, vN, vD);
4712               break;
4713
4714            case ARM64vecb_UQRSHL64x2:
4715               *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010111, vN, vD);
4716               break;
4717            case ARM64vecb_UQRSHL32x4:
4718               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010111, vN, vD);
4719               break;
4720            case ARM64vecb_UQRSHL16x8:
4721               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010111, vN, vD);
4722               break;
4723            case ARM64vecb_UQRSHL8x16:
4724               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010111, vN, vD);
4725               break;
4726
4727            case ARM64vecb_SSHL64x2:
4728               *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010001, vN, vD);
4729               break;
4730            case ARM64vecb_SSHL32x4:
4731               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010001, vN, vD);
4732               break;
4733            case ARM64vecb_SSHL16x8:
4734               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010001, vN, vD);
4735               break;
4736            case ARM64vecb_SSHL8x16:
4737               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010001, vN, vD);
4738               break;
4739
4740            case ARM64vecb_SRSHL64x2:
4741               *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010101, vN, vD);
4742               break;
4743            case ARM64vecb_SRSHL32x4:
4744               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010101, vN, vD);
4745               break;
4746            case ARM64vecb_SRSHL16x8:
4747               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010101, vN, vD);
4748               break;
4749            case ARM64vecb_SRSHL8x16:
4750               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010101, vN, vD);
4751               break;
4752
4753            case ARM64vecb_USHL64x2:
4754               *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010001, vN, vD);
4755               break;
4756            case ARM64vecb_USHL32x4:
4757               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010001, vN, vD);
4758               break;
4759            case ARM64vecb_USHL16x8:
4760               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010001, vN, vD);
4761               break;
4762            case ARM64vecb_USHL8x16:
4763               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010001, vN, vD);
4764               break;
4765
4766            case ARM64vecb_URSHL64x2:
4767               *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010101, vN, vD);
4768               break;
4769            case ARM64vecb_URSHL32x4:
4770               *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010101, vN, vD);
4771               break;
4772            case ARM64vecb_URSHL16x8:
4773               *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010101, vN, vD);
4774               break;
4775            case ARM64vecb_URSHL8x16:
4776               *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD);
4777               break;
4778
4779            case ARM64vecb_FRECPS64x2:
4780               *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111111, vN, vD);
4781               break;
4782            case ARM64vecb_FRECPS32x4:
4783               *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111111, vN, vD);
4784               break;
4785            case ARM64vecb_FRSQRTS64x2:
4786               *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111111, vN, vD);
4787               break;
4788            case ARM64vecb_FRSQRTS32x4:
4789               *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111111, vN, vD);
4790               break;
4791
4792            default:
4793               goto bad;
4794         }
4795         goto done;
4796      }
4797      case ARM64in_VModifyV: {
4798         /* 31        23   20    15     9 4
4799            010 01110 sz 1 00000 001110 n d   SUQADD@sz  Vd, Vn
4800            011 01110 sz 1 00000 001110 n d   USQADD@sz  Vd, Vn
4801         */
4802         UInt vD = qregEnc(i->ARM64in.VModifyV.mod);
4803         UInt vN = qregEnc(i->ARM64in.VModifyV.arg);
4804         switch (i->ARM64in.VModifyV.op) {
4805            case ARM64vecmo_SUQADD64x2:
4806               *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X001110, vN, vD);
4807               break;
4808            case ARM64vecmo_SUQADD32x4:
4809               *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X001110, vN, vD);
4810               break;
4811            case ARM64vecmo_SUQADD16x8:
4812               *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X001110, vN, vD);
4813               break;
4814            case ARM64vecmo_SUQADD8x16:
4815               *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X001110, vN, vD);
4816               break;
4817            case ARM64vecmo_USQADD64x2:
4818               *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X001110, vN, vD);
4819               break;
4820            case ARM64vecmo_USQADD32x4:
4821               *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X001110, vN, vD);
4822               break;
4823            case ARM64vecmo_USQADD16x8:
4824               *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X001110, vN, vD);
4825               break;
4826            case ARM64vecmo_USQADD8x16:
4827               *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X001110, vN, vD);
4828               break;
4829            default:
4830               goto bad;
4831         }
4832         goto done;
4833      }
4834      case ARM64in_VUnaryV: {
4835         /* 31        23   20    15     9 4
4836            010 01110 11 1 00000 111110 n d  FABS Vd.2d,  Vn.2d
4837            010 01110 10 1 00000 111110 n d  FABS Vd.4s,  Vn.4s
4838            011 01110 11 1 00000 111110 n d  FNEG Vd.2d,  Vn.2d
4839            011 01110 10 1 00000 111110 n d  FNEG Vd.4s,  Vn.4s
4840            011 01110 00 1 00000 010110 n d  NOT  Vd.16b, Vn.16b
4841
4842            010 01110 11 1 00000 101110 n d  ABS  Vd.2d,  Vn.2d
4843            010 01110 10 1 00000 101110 n d  ABS  Vd.4s,  Vn.4s
4844            010 01110 01 1 00000 101110 n d  ABS  Vd.8h,  Vn.8h
4845            010 01110 00 1 00000 101110 n d  ABS  Vd.16b, Vn.16b
4846
4847            010 01110 10 1 00000 010010 n d  CLS  Vd.4s,  Vn.4s
4848            010 01110 01 1 00000 010010 n d  CLS  Vd.8h,  Vn.8h
4849            010 01110 00 1 00000 010010 n d  CLS  Vd.16b, Vn.16b
4850
4851            011 01110 10 1 00000 010010 n d  CLZ  Vd.4s,  Vn.4s
4852            011 01110 01 1 00000 010010 n d  CLZ  Vd.8h,  Vn.8h
4853            011 01110 00 1 00000 010010 n d  CLZ  Vd.16b, Vn.16b
4854
4855            010 01110 00 1 00000 010110 n d  CNT  Vd.16b, Vn.16b
4856
4857            011 01110 01 1 00000 010110 n d  RBIT  Vd.16b, Vn.16b
4858            010 01110 00 1 00000 000110 n d  REV16 Vd.16b, Vn.16b
4859            011 01110 00 1 00000 000010 n d  REV32 Vd.16b, Vn.16b
4860            011 01110 01 1 00000 000010 n d  REV32 Vd.8h, Vn.8h
4861
4862            010 01110 00 1 00000 000010 n d  REV64 Vd.16b, Vn.16b
4863            010 01110 01 1 00000 000010 n d  REV64 Vd.8h, Vn.8h
4864            010 01110 10 1 00000 000010 n d  REV64 Vd.4s, Vn.4s
4865
4866            010 01110 10 1 00001 110010 n d  URECPE Vd.4s, Vn.4s
4867            011 01110 10 1 00001 110010 n d  URSQRTE Vd.4s, Vn.4s
4868
4869            010 01110 11 1 00001 110110 n d  FRECPE Vd.2d, Vn.2d
4870            010 01110 10 1 00001 110110 n d  FRECPE Vd.4s, Vn.4s
4871
4872            011 01110 11 1 00001 110110 n d  FRECPE Vd.2d, Vn.2d
4873            011 01110 10 1 00001 110110 n d  FRECPE Vd.4s, Vn.4s
4874
4875            011 01110 11 1 00001 111110 n d  FSQRT Vd.2d, Vn.2d
4876            011 01110 10 1 00001 111110 n d  FSQRT Vd.4s, Vn.4s
4877         */
4878         UInt vD = qregEnc(i->ARM64in.VUnaryV.dst);
4879         UInt vN = qregEnc(i->ARM64in.VUnaryV.arg);
4880         switch (i->ARM64in.VUnaryV.op) {
4881            case ARM64vecu_FABS64x2:
4882               *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
4883               break;
4884            case ARM64vecu_FABS32x4:
4885               *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
4886               break;
4887            case ARM64vecu_FNEG64x2:
4888               *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
4889               break;
4890            case ARM64vecu_FNEG32x4:
4891               *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
4892               break;
4893            case ARM64vecu_NOT:
4894               *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
4895               break;
4896            case ARM64vecu_ABS64x2:
4897               *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X101110, vN, vD);
4898               break;
4899            case ARM64vecu_ABS32x4:
4900               *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X101110, vN, vD);
4901               break;
4902            case ARM64vecu_ABS16x8:
4903               *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X101110, vN, vD);
4904               break;
4905            case ARM64vecu_ABS8x16:
4906               *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD);
4907               break;
4908            case ARM64vecu_CLS32x4:
4909               *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD);
4910               break;
4911            case ARM64vecu_CLS16x8:
4912               *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD);
4913               break;
4914            case ARM64vecu_CLS8x16:
4915               *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD);
4916               break;
4917            case ARM64vecu_CLZ32x4:
4918               *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD);
4919               break;
4920            case ARM64vecu_CLZ16x8:
4921               *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD);
4922               break;
4923            case ARM64vecu_CLZ8x16:
4924               *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD);
4925               break;
4926            case ARM64vecu_CNT8x16:
4927               *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
4928               break;
4929            case ARM64vecu_RBIT:
4930               *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010110, vN, vD);
4931               break;
4932            case ARM64vecu_REV1616B:
4933               *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000110, vN, vD);
4934               break;
4935            case ARM64vecu_REV3216B:
4936               *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X000010, vN, vD);
4937               break;
4938            case ARM64vecu_REV328H:
4939               *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X000010, vN, vD);
4940               break;
4941            case ARM64vecu_REV6416B:
4942               *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000010, vN, vD);
4943               break;
4944            case ARM64vecu_REV648H:
4945               *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X000010, vN, vD);
4946               break;
4947            case ARM64vecu_REV644S:
4948               *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD);
4949               break;
4950            case ARM64vecu_URECPE32x4:
4951               *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD);
4952               break;
4953            case ARM64vecu_URSQRTE32x4:
4954               *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
4955               break;
4956            case ARM64vecu_FRECPE64x2:
4957               *p++ = X_3_8_5_6_5_5(X010, X01110111, X00001, X110110, vN, vD);
4958               break;
4959            case ARM64vecu_FRECPE32x4:
4960               *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110110, vN, vD);
4961               break;
4962            case ARM64vecu_FRSQRTE64x2:
4963               *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X110110, vN, vD);
4964               break;
4965            case ARM64vecu_FRSQRTE32x4:
4966               *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110110, vN, vD);
4967               break;
4968            case ARM64vecu_FSQRT64x2:
4969               *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X111110, vN, vD);
4970               break;
4971            case ARM64vecu_FSQRT32x4:
4972               *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X111110, vN, vD);
4973               break;
4974            default:
4975               goto bad;
4976         }
4977         goto done;
4978      }
4979      case ARM64in_VNarrowV: {
4980         /* 31        23 21      15     9 4
4981            000 01110 00 1,00001 001010 n d  XTN Vd.8b, Vn.8h
4982            000 01110 01 1,00001 001010 n d  XTN Vd.4h, Vn.4s
4983            000 01110 10 1,00001 001010 n d  XTN Vd.2s, Vn.2d
4984
4985            001 01110 00 1,00001 001010 n d  SQXTUN Vd.8b, Vn.8h
4986            001 01110 01 1,00001 001010 n d  SQXTUN Vd.4h, Vn.4s
4987            001 01110 10 1,00001 001010 n d  SQXTUN Vd.2s, Vn.2d
4988
4989            000 01110 00 1,00001 010010 n d  SQXTN Vd.8b, Vn.8h
4990            000 01110 01 1,00001 010010 n d  SQXTN Vd.4h, Vn.4s
4991            000 01110 10 1,00001 010010 n d  SQXTN Vd.2s, Vn.2d
4992
4993            001 01110 00 1,00001 010010 n d  UQXTN Vd.8b, Vn.8h
4994            001 01110 01 1,00001 010010 n d  UQXTN Vd.4h, Vn.4s
4995            001 01110 10 1,00001 010010 n d  UQXTN Vd.2s, Vn.2d
4996         */
4997         UInt vD = qregEnc(i->ARM64in.VNarrowV.dst);
4998         UInt vN = qregEnc(i->ARM64in.VNarrowV.src);
4999         UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
5000         vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
5001         switch (i->ARM64in.VNarrowV.op) {
5002            case ARM64vecna_XTN:
5003               *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5004                                    X00001, X001010, vN, vD);
5005               goto done;
5006            case ARM64vecna_SQXTUN:
5007               *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5008                                    X00001, X001010, vN, vD);
5009               goto done;
5010            case ARM64vecna_SQXTN:
5011               *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5012                                    X00001, X010010, vN, vD);
5013               goto done;
5014            case ARM64vecna_UQXTN:
5015               *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5016                                    X00001, X010010, vN, vD);
5017               goto done;
5018            default:
5019               break;
5020         }
5021         goto bad;
5022      }
5023      case ARM64in_VShiftImmV: {
5024         /*
5025            011 011110 immh immb 000001 n d  USHR     Vd.T, Vn.T, #sh
5026            010 011110 immh immb 000001 n d  SSHR     Vd.T, Vn.T, #sh
5027
5028            001 011110 immh immb 100101 n d  UQSHRN   ,,#sh
5029            000 011110 immh immb 100101 n d  SQSHRN   ,,#sh
5030            001 011110 immh immb 100001 n d  SQSHRUN  ,,#sh
5031
5032            001 011110 immh immb 100111 n d  UQRSHRN  ,,#sh
5033            000 011110 immh immb 100111 n d  SQRSHRN  ,,#sh
5034            001 011110 immh immb 100011 n d  SQRSHRUN ,,#sh
5035
5036            where immh:immb
5037               = case T of
5038                    2d  | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
5039                    4s  | sh in 1..32 -> let  xxxxx = 32-sh in 01xx:xxx
5040                    8h  | sh in 1..16 -> let   xxxx = 16-sh in 001x:xxx
5041                    16b | sh in 1..8  -> let    xxx =  8-sh in 0001:xxx
5042
5043            010 011110 immh immb 010101 n d  SHL    Vd.T, Vn.T, #sh
5044
5045            011 011110 immh immb 011101 n d  UQSHL  Vd.T, Vn.T, #sh
5046            010 011110 immh immb 011101 n d  SQSHL  Vd.T, Vn.T, #sh
5047            011 011110 immh immb 011001 n d  SQSHLU Vd.T, Vn.T, #sh
5048
5049            where immh:immb
5050               = case T of
5051                    2d  | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
5052                    4s  | sh in 0..31 -> let  xxxxx = sh in 01xx:xxx
5053                    8h  | sh in 0..15 -> let   xxxx = sh in 001x:xxx
5054                    16b | sh in 0..7  -> let    xxx = sh in 0001:xxx
5055         */
5056         UInt vD   = qregEnc(i->ARM64in.VShiftImmV.dst);
5057         UInt vN   = qregEnc(i->ARM64in.VShiftImmV.src);
5058         UInt sh   = i->ARM64in.VShiftImmV.amt;
5059         UInt tmpl = 0; /* invalid */
5060
5061         const UInt tmpl_USHR
5062            = X_3_6_7_6_5_5(X011, X011110, 0, X000001, vN, vD);
5063         const UInt tmpl_SSHR
5064            = X_3_6_7_6_5_5(X010, X011110, 0, X000001, vN, vD);
5065
5066         const UInt tmpl_UQSHRN
5067            = X_3_6_7_6_5_5(X001, X011110, 0, X100101, vN, vD);
5068         const UInt tmpl_SQSHRN
5069            = X_3_6_7_6_5_5(X000, X011110, 0, X100101, vN, vD);
5070         const UInt tmpl_SQSHRUN
5071            = X_3_6_7_6_5_5(X001, X011110, 0, X100001, vN, vD);
5072
5073         const UInt tmpl_UQRSHRN
5074            = X_3_6_7_6_5_5(X001, X011110, 0, X100111, vN, vD);
5075         const UInt tmpl_SQRSHRN
5076            = X_3_6_7_6_5_5(X000, X011110, 0, X100111, vN, vD);
5077         const UInt tmpl_SQRSHRUN
5078            = X_3_6_7_6_5_5(X001, X011110, 0, X100011, vN, vD);
5079
5080         const UInt tmpl_SHL
5081            = X_3_6_7_6_5_5(X010, X011110, 0, X010101, vN, vD);
5082
5083         const UInt tmpl_UQSHL
5084            = X_3_6_7_6_5_5(X011, X011110, 0, X011101, vN, vD);
5085         const UInt tmpl_SQSHL
5086            = X_3_6_7_6_5_5(X010, X011110, 0, X011101, vN, vD);
5087         const UInt tmpl_SQSHLU
5088            = X_3_6_7_6_5_5(X011, X011110, 0, X011001, vN, vD);
5089
5090         switch (i->ARM64in.VShiftImmV.op) {
5091            case ARM64vecshi_SSHR64x2:    tmpl = tmpl_SSHR;     goto right64x2;
5092            case ARM64vecshi_USHR64x2:    tmpl = tmpl_USHR;     goto right64x2;
5093            case ARM64vecshi_SHL64x2:     tmpl = tmpl_SHL;      goto left64x2;
5094            case ARM64vecshi_UQSHL64x2:   tmpl = tmpl_UQSHL;    goto left64x2;
5095            case ARM64vecshi_SQSHL64x2:   tmpl = tmpl_SQSHL;    goto left64x2;
5096            case ARM64vecshi_SQSHLU64x2:  tmpl = tmpl_SQSHLU;   goto left64x2;
5097            case ARM64vecshi_SSHR32x4:    tmpl = tmpl_SSHR;     goto right32x4;
5098            case ARM64vecshi_USHR32x4:    tmpl = tmpl_USHR;     goto right32x4;
5099            case ARM64vecshi_UQSHRN2SD:   tmpl = tmpl_UQSHRN;   goto right32x4;
5100            case ARM64vecshi_SQSHRN2SD:   tmpl = tmpl_SQSHRN;   goto right32x4;
5101            case ARM64vecshi_SQSHRUN2SD:  tmpl = tmpl_SQSHRUN;  goto right32x4;
5102            case ARM64vecshi_UQRSHRN2SD:  tmpl = tmpl_UQRSHRN;  goto right32x4;
5103            case ARM64vecshi_SQRSHRN2SD:  tmpl = tmpl_SQRSHRN;  goto right32x4;
5104            case ARM64vecshi_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
5105            case ARM64vecshi_SHL32x4:     tmpl = tmpl_SHL;      goto left32x4;
5106            case ARM64vecshi_UQSHL32x4:   tmpl = tmpl_UQSHL;    goto left32x4;
5107            case ARM64vecshi_SQSHL32x4:   tmpl = tmpl_SQSHL;    goto left32x4;
5108            case ARM64vecshi_SQSHLU32x4:  tmpl = tmpl_SQSHLU;   goto left32x4;
5109            case ARM64vecshi_SSHR16x8:    tmpl = tmpl_SSHR;     goto right16x8;
5110            case ARM64vecshi_USHR16x8:    tmpl = tmpl_USHR;     goto right16x8;
5111            case ARM64vecshi_UQSHRN4HS:   tmpl = tmpl_UQSHRN;   goto right16x8;
5112            case ARM64vecshi_SQSHRN4HS:   tmpl = tmpl_SQSHRN;   goto right16x8;
5113            case ARM64vecshi_SQSHRUN4HS:  tmpl = tmpl_SQSHRUN;  goto right16x8;
5114            case ARM64vecshi_UQRSHRN4HS:  tmpl = tmpl_UQRSHRN;  goto right16x8;
5115            case ARM64vecshi_SQRSHRN4HS:  tmpl = tmpl_SQRSHRN;  goto right16x8;
5116            case ARM64vecshi_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
5117            case ARM64vecshi_SHL16x8:     tmpl = tmpl_SHL;      goto left16x8;
5118            case ARM64vecshi_UQSHL16x8:   tmpl = tmpl_UQSHL;    goto left16x8;
5119            case ARM64vecshi_SQSHL16x8:   tmpl = tmpl_SQSHL;    goto left16x8;
5120            case ARM64vecshi_SQSHLU16x8:  tmpl = tmpl_SQSHLU;   goto left16x8;
5121            case ARM64vecshi_SSHR8x16:    tmpl = tmpl_SSHR;     goto right8x16;
5122            case ARM64vecshi_USHR8x16:    tmpl = tmpl_USHR;     goto right8x16;
5123            case ARM64vecshi_UQSHRN8BH:   tmpl = tmpl_UQSHRN;   goto right8x16;
5124            case ARM64vecshi_SQSHRN8BH:   tmpl = tmpl_SQSHRN;   goto right8x16;
5125            case ARM64vecshi_SQSHRUN8BH:  tmpl = tmpl_SQSHRUN;  goto right8x16;
5126            case ARM64vecshi_UQRSHRN8BH:  tmpl = tmpl_UQRSHRN;  goto right8x16;
5127            case ARM64vecshi_SQRSHRN8BH:  tmpl = tmpl_SQRSHRN;  goto right8x16;
5128            case ARM64vecshi_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
5129            case ARM64vecshi_SHL8x16:     tmpl = tmpl_SHL;      goto left8x16;
5130            case ARM64vecshi_UQSHL8x16:   tmpl = tmpl_UQSHL;    goto left8x16;
5131            case ARM64vecshi_SQSHL8x16:   tmpl = tmpl_SQSHL;    goto left8x16;
5132            case ARM64vecshi_SQSHLU8x16:  tmpl = tmpl_SQSHLU;   goto left8x16;
5133
5134            default: break;
5135
5136            right64x2:
5137               if (sh >= 1 && sh <= 63) {
5138                  *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | (64-sh), 0,0,0);
5139                  goto done;
5140               }
5141               break;
5142            right32x4:
5143               if (sh >= 1 && sh <= 32) {
5144                  *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | (32-sh), 0,0,0);
5145                  goto done;
5146               }
5147               break;
5148            right16x8:
5149               if (sh >= 1 && sh <= 16) {
5150                  *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | (16-sh), 0,0,0);
5151                  goto done;
5152               }
5153               break;
5154            right8x16:
5155               if (sh >= 1 && sh <= 8) {
5156                  *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | (8-sh), 0,0,0);
5157                  goto done;
5158               }
5159               break;
5160
5161            left64x2:
5162               if (sh >= 0 && sh <= 63) {
5163                  *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | sh, 0,0,0);
5164                  goto done;
5165               }
5166               break;
5167            left32x4:
5168               if (sh >= 0 && sh <= 31) {
5169                  *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | sh, 0,0,0);
5170                  goto done;
5171               }
5172               break;
5173            left16x8:
5174               if (sh >= 0 && sh <= 15) {
5175                  *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | sh, 0,0,0);
5176                  goto done;
5177               }
5178               break;
5179            left8x16:
5180               if (sh >= 0 && sh <= 7) {
5181                  *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | sh, 0,0,0);
5182                  goto done;
5183               }
5184               break;
5185         }
5186         goto bad;
5187      }
5188      case ARM64in_VExtV: {
5189         /*
5190            011 01110 000 m 0 imm4 0 n d  EXT Vd.16b, Vn.16b, Vm.16b, #imm4
5191            where imm4 = the shift amount, in bytes,
5192                  Vn is low operand, Vm is high operand
5193         */
5194         UInt vD   = qregEnc(i->ARM64in.VExtV.dst);
5195         UInt vN   = qregEnc(i->ARM64in.VExtV.srcLo);
5196         UInt vM   = qregEnc(i->ARM64in.VExtV.srcHi);
5197         UInt imm4 = i->ARM64in.VExtV.amtB;
5198         vassert(imm4 >= 1 && imm4 <= 15);
5199         *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
5200                              X000000 | (imm4 << 1), vN, vD);
5201         goto done;
5202      }
5203      case ARM64in_VImmQ: {
5204         UInt   rQ  = qregEnc(i->ARM64in.VImmQ.rQ);
5205         UShort imm = i->ARM64in.VImmQ.imm;
5206         vassert(rQ < 32);
5207         switch (imm) {
5208            case 0x0000:
5209               // movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ
5210               *p++ = 0x4F000400 | rQ;
5211               goto done;
5212            case 0x0001:
5213               // movi rQ, #0xFF == 0x2F 0x00 0xE4 001 rQ
5214               *p++ = 0x2F00E420 | rQ;
5215               goto done;
5216            case 0x0003:
5217               // movi rQ, #0xFFFF == 0x2F 0x00 0xE4 011 rQ
5218               *p++ = 0x2F00E460 | rQ;
5219               goto done;
5220            case 0x000F:
5221               // movi rQ, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rQ
5222               *p++ = 0x2F00E5E0 | rQ;
5223               goto done;
5224            case 0x003F:
5225               // movi rQ, #0xFFFFFFFFFFFF == 0x2F 0x01 0xE7 111 rQ
5226               *p++ = 0x2F01E7E0 | rQ;
5227               goto done;
5228            case 0x00FF:
5229               // movi rQ, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rQ
5230               *p++ = 0x2F07E7E0 | rQ;
5231               goto done;
5232            case 0xFFFF:
5233               // mvni rQ.4s, #0x0 == 0x6F 0x00 0x04 000 rQ
5234               *p++ = 0x6F000400 | rQ;
5235               goto done;
5236            default:
5237               break;
5238         }
5239         goto bad; /* no other handled cases right now */
5240      }
5241
5242      case ARM64in_VDfromX: {
5243         /* INS Vd.D[0], rX
5244            0100 1110 0000 1000 0001 11 nn dd   INS Vd.D[0], Xn
5245            This isn't wonderful, in the sense that the upper half of
5246            the vector register stays unchanged and thus the insn is
5247            data dependent on its output register. */
5248         UInt dd = dregEnc(i->ARM64in.VDfromX.rD);
5249         UInt xx = iregEnc(i->ARM64in.VDfromX.rX);
5250         vassert(xx < 31);
5251         *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5252         goto done;
5253      }
5254
5255      case ARM64in_VQfromX: {
5256         /* FMOV D, X
5257            1001 1110 0110 0111 0000 00 nn dd   FMOV Vd.D[0], Xn
5258            I think this zeroes out the top half of the destination, which
5259            is what we need.  TODO: can we do VDfromX and VQfromXX better? */
5260         UInt dd = qregEnc(i->ARM64in.VQfromX.rQ);
5261         UInt xx = iregEnc(i->ARM64in.VQfromX.rXlo);
5262         vassert(xx < 31);
5263         *p++ = 0x9E670000 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5264         goto done;
5265      }
5266
5267      case ARM64in_VQfromXX: {
5268         /* What we really generate is a two insn sequence:
5269               INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
5270            0100 1110 0000 1000 0001 11 nn dd   INS Vd.D[0], Xn
5271            0100 1110 0001 1000 0001 11 nn dd   INS Vd.D[1], Xn
5272         */
5273         UInt qq  = qregEnc(i->ARM64in.VQfromXX.rQ);
5274         UInt xhi = iregEnc(i->ARM64in.VQfromXX.rXhi);
5275         UInt xlo = iregEnc(i->ARM64in.VQfromXX.rXlo);
5276         vassert(xhi < 31 && xlo < 31);
5277         *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
5278         *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
5279         goto done;
5280      }
5281
5282      case ARM64in_VXfromQ: {
5283         /* 010 0111 0000 01000 001111 nn dd  UMOV Xd, Vn.D[0]
5284            010 0111 0000 11000 001111 nn dd  UMOV Xd, Vn.D[1]
5285         */
5286         UInt dd     = iregEnc(i->ARM64in.VXfromQ.rX);
5287         UInt nn     = qregEnc(i->ARM64in.VXfromQ.rQ);
5288         UInt laneNo = i->ARM64in.VXfromQ.laneNo;
5289         vassert(dd < 31);
5290         vassert(laneNo < 2);
5291         *p++ = X_3_8_5_6_5_5(X010, X01110000,
5292                              laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
5293         goto done;
5294      }
5295
5296      case ARM64in_VXfromDorS: {
5297         /* 000 11110001 00110 000000 n d     FMOV Wd, Sn
5298            100 11110011 00110 000000 n d     FMOV Xd, Dn
5299         */
5300         UInt dd    = iregEnc(i->ARM64in.VXfromDorS.rX);
5301         UInt nn    = dregEnc(i->ARM64in.VXfromDorS.rDorS);
5302         Bool fromD = i->ARM64in.VXfromDorS.fromD;
5303         vassert(dd < 31);
5304         *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000,
5305                              fromD ? X11110011 : X11110001,
5306                              X00110, X000000, nn, dd);
5307         goto done;
5308      }
5309
5310      case ARM64in_VMov: {
5311         /* 000 11110 00 10000 00 10000 n d   FMOV Sd, Sn
5312            000 11110 01 10000 00 10000 n d   FMOV Dd, Dn
5313            010 01110 10 1 n    0 00111 n d   MOV Vd.16b, Vn.16b
5314         */
5315        HReg rD = i->ARM64in.VMov.dst;
5316        HReg rN = i->ARM64in.VMov.src;
5317        switch (i->ARM64in.VMov.szB) {
5318           case 16: {
5319              UInt dd = qregEnc(rD);
5320              UInt nn = qregEnc(rN);
5321              *p++ = X_3_8_5_6_5_5(X010, X01110101, nn, X000111, nn, dd);
5322              goto done;
5323           }
5324           case 8: {
5325              UInt dd = dregEnc(rD);
5326              UInt nn = dregEnc(rN);
5327              *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
5328              goto done;
5329           }
5330           default:
5331              break;
5332        }
5333        goto bad;
5334      }
5335
5336      case ARM64in_EvCheck: {
5337         /* The sequence is fixed (canned) except for the two amodes
5338            supplied by the insn.  These don't change the length, though.
5339            We generate:
5340               ldr  w9, [x21 + #8]   8 == offsetof(host_EvC_COUNTER)
5341               subs w9, w9, #1
5342               str  w9, [x21 + #8]   8 == offsetof(host_EvC_COUNTER)
5343               bpl  nofail
5344               ldr  x9, [x21 + #0]   0 == offsetof(host_EvC_FAILADDR)
5345               br   x9
5346              nofail:
5347         */
5348         UInt* p0 = p;
5349         p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
5350                                i->ARM64in.EvCheck.amCounter);
5351         *p++ = 0x71000529; /* subs w9, w9, #1 */
5352         p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
5353                                i->ARM64in.EvCheck.amCounter);
5354         *p++ = 0x54000065; /* bpl nofail */
5355         p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
5356                                i->ARM64in.EvCheck.amFailAddr);
5357         *p++ = 0xD61F0120; /* br x9 */
5358         /* nofail: */
5359
5360         /* Crosscheck */
5361         vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0);
5362         goto done;
5363      }
5364
5365      case ARM64in_ProfInc: {
5366         /* We generate:
5367              (ctrP is unknown now, so use 0x6555'7555'8555'9566 in the
5368              expectation that a later call to LibVEX_patchProfCtr
5369              will be used to fill in the immediate fields once the
5370              right value is known.)
5371            imm64-exactly4 x9, 0x6555'7555'8555'9566
5372            ldr  x8, [x9]
5373            add  x8, x8, #1
5374            str  x8, [x9]
5375         */
5376         p = imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL);
5377         *p++ = 0xF9400128;
5378         *p++ = 0x91000508;
5379         *p++ = 0xF9000128;
5380         /* Tell the caller .. */
5381         vassert(!(*is_profInc));
5382         *is_profInc = True;
5383         goto done;
5384      }
5385
5386      /* ... */
5387      default:
5388         goto bad;
5389    }
5390
5391  bad:
5392   ppARM64Instr(i);
5393   vpanic("emit_ARM64Instr");
5394   /*NOTREACHED*/
5395
5396  done:
5397   vassert(((UChar*)p) - &buf[0] <= 36);
5398   return ((UChar*)p) - &buf[0];
5399}
5400
5401
5402/* How big is an event check?  See case for ARM64in_EvCheck in
5403   emit_ARM64Instr just above.  That crosschecks what this returns, so
5404   we can tell if we're inconsistent. */
5405Int evCheckSzB_ARM64 (void)
5406{
5407   return 24;
5408}
5409
5410
5411/* NB: what goes on here has to be very closely coordinated with the
5412   emitInstr case for XDirect, above. */
5413VexInvalRange chainXDirect_ARM64 ( VexEndness endness_host,
5414                                   void* place_to_chain,
5415                                   const void* disp_cp_chain_me_EXPECTED,
5416                                   const void* place_to_jump_to )
5417{
5418   vassert(endness_host == VexEndnessLE);
5419
5420   /* What we're expecting to see is:
5421        movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
5422        movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
5423        movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
5424        movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
5425        blr  x9
5426      viz
5427        <16 bytes generated by imm64_to_ireg_EXACTLY4>
5428        D6 3F 01 20
5429   */
5430   UInt* p = (UInt*)place_to_chain;
5431   vassert(0 == (3 & (HWord)p));
5432   vassert(is_imm64_to_ireg_EXACTLY4(
5433              p, /*x*/9, (Addr)disp_cp_chain_me_EXPECTED));
5434   vassert(p[4] == 0xD63F0120);
5435
5436   /* And what we want to change it to is:
5437        movw x9, place_to_jump_to[15:0]
5438        movk x9, place_to_jump_to[31:15], lsl 16
5439        movk x9, place_to_jump_to[47:32], lsl 32
5440        movk x9, place_to_jump_to[63:48], lsl 48
5441        br   x9
5442      viz
5443        <16 bytes generated by imm64_to_ireg_EXACTLY4>
5444        D6 1F 01 20
5445
5446      The replacement has the same length as the original.
5447   */
5448   (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)place_to_jump_to);
5449   p[4] = 0xD61F0120;
5450
5451   VexInvalRange vir = {(HWord)p, 20};
5452   return vir;
5453}
5454
5455
5456/* NB: what goes on here has to be very closely coordinated with the
5457   emitInstr case for XDirect, above. */
5458VexInvalRange unchainXDirect_ARM64 ( VexEndness endness_host,
5459                                     void* place_to_unchain,
5460                                     const void* place_to_jump_to_EXPECTED,
5461                                     const void* disp_cp_chain_me )
5462{
5463   vassert(endness_host == VexEndnessLE);
5464
5465   /* What we're expecting to see is:
5466        movw x9, place_to_jump_to_EXPECTED[15:0]
5467        movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
5468        movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
5469        movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
5470        br   x9
5471      viz
5472        <16 bytes generated by imm64_to_ireg_EXACTLY4>
5473        D6 1F 01 20
5474   */
5475   UInt* p = (UInt*)place_to_unchain;
5476   vassert(0 == (3 & (HWord)p));
5477   vassert(is_imm64_to_ireg_EXACTLY4(
5478              p, /*x*/9, (Addr)place_to_jump_to_EXPECTED));
5479   vassert(p[4] == 0xD61F0120);
5480
5481   /* And what we want to change it to is:
5482        movw x9, disp_cp_chain_me_to[15:0]
5483        movk x9, disp_cp_chain_me_to[31:15], lsl 16
5484        movk x9, disp_cp_chain_me_to[47:32], lsl 32
5485        movk x9, disp_cp_chain_me_to[63:48], lsl 48
5486        blr  x9
5487      viz
5488        <16 bytes generated by imm64_to_ireg_EXACTLY4>
5489        D6 3F 01 20
5490   */
5491   (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
5492   p[4] = 0xD63F0120;
5493
5494   VexInvalRange vir = {(HWord)p, 20};
5495   return vir;
5496}
5497
5498
5499/* Patch the counter address into a profile inc point, as previously
5500   created by the ARM64in_ProfInc case for emit_ARM64Instr. */
5501VexInvalRange patchProfInc_ARM64 ( VexEndness endness_host,
5502                                   void*  place_to_patch,
5503                                   const ULong* location_of_counter )
5504{
5505   vassert(sizeof(ULong*) == 8);
5506   vassert(endness_host == VexEndnessLE);
5507   UInt* p = (UInt*)place_to_patch;
5508   vassert(0 == (3 & (HWord)p));
5509   vassert(is_imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL));
5510   vassert(p[4] == 0xF9400128);
5511   vassert(p[5] == 0x91000508);
5512   vassert(p[6] == 0xF9000128);
5513   imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)location_of_counter);
5514   VexInvalRange vir = {(HWord)p, 4*4};
5515   return vir;
5516}
5517
5518/*---------------------------------------------------------------*/
5519/*--- end                                   host_arm64_defs.c ---*/
5520/*---------------------------------------------------------------*/
5521