1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_arm_defs.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2017 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2017 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex.h"
38#include "libvex_trc_values.h"
39
40#include "main_util.h"
41#include "host_generic_regs.h"
42#include "host_arm_defs.h"
43
44UInt arm_hwcaps = 0;
45
46
47/* --------- Registers. --------- */
48
49const RRegUniverse* getRRegUniverse_ARM ( void )
50{
51   /* The real-register universe is a big constant, so we just want to
52      initialise it once. */
53   static RRegUniverse rRegUniverse_ARM;
54   static Bool         rRegUniverse_ARM_initted = False;
55
56   /* Handy shorthand, nothing more */
57   RRegUniverse* ru = &rRegUniverse_ARM;
58
59   /* This isn't thread-safe.  Sigh. */
60   if (LIKELY(rRegUniverse_ARM_initted))
61      return ru;
62
63   RRegUniverse__init(ru);
64
65   /* Add the registers.  The initial segment of this array must be
66      those available for allocation by reg-alloc, and those that
67      follow are not available for allocation. */
68
69   /* Callee saves ones are listed first, since we prefer them
70      if they're available. */
71   ru->regs[ru->size++] = hregARM_R4();
72   ru->regs[ru->size++] = hregARM_R5();
73   ru->regs[ru->size++] = hregARM_R6();
74   ru->regs[ru->size++] = hregARM_R7();
75   ru->regs[ru->size++] = hregARM_R10();
76   ru->regs[ru->size++] = hregARM_R11();
77   /* Otherwise we'll have to slum it out with caller-saves ones. */
78   ru->regs[ru->size++] = hregARM_R0();
79   ru->regs[ru->size++] = hregARM_R1();
80   ru->regs[ru->size++] = hregARM_R2();
81   ru->regs[ru->size++] = hregARM_R3();
82   ru->regs[ru->size++] = hregARM_R9();
83   /* FP registers.  Note: these are all callee-save.  Yay!  Hence we
84      don't need to mention them as trashed in getHRegUsage for
85      ARMInstr_Call. */
86   ru->regs[ru->size++] = hregARM_D8();
87   ru->regs[ru->size++] = hregARM_D9();
88   ru->regs[ru->size++] = hregARM_D10();
89   ru->regs[ru->size++] = hregARM_D11();
90   ru->regs[ru->size++] = hregARM_D12();
91   ru->regs[ru->size++] = hregARM_S26();
92   ru->regs[ru->size++] = hregARM_S27();
93   ru->regs[ru->size++] = hregARM_S28();
94   ru->regs[ru->size++] = hregARM_S29();
95   ru->regs[ru->size++] = hregARM_S30();
96   ru->regs[ru->size++] = hregARM_Q8();
97   ru->regs[ru->size++] = hregARM_Q9();
98   ru->regs[ru->size++] = hregARM_Q10();
99   ru->regs[ru->size++] = hregARM_Q11();
100   ru->regs[ru->size++] = hregARM_Q12();
101   ru->allocable = ru->size;
102
103   /* And other regs, not available to the allocator. */
104
105   // unavail: r8 as GSP
106   // r12 is used as a spill/reload temporary
107   // r13 as SP
108   // r14 as LR
109   // r15 as PC
110   //
111   // All in all, we have 11 allocatable integer registers:
112   // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
113   // and r12 dedicated as a spill temporary.
114   // 13 14 and 15 are not under the allocator's control.
115   //
116   // Hence for the allocatable registers we have:
117   //
118   // callee-saved: 4 5 6 7 (8) 9 10 11
119   // caller-saved: 0 1 2 3
120   // Note 9 is ambiguous: the base EABI does not give an e/r-saved
121   // designation for it, but the Linux instantiation of the ABI
122   // specifies it as callee-saved.
123   //
124   // If the set of available registers changes or if the e/r status
125   // changes, be sure to re-check/sync the definition of
126   // getHRegUsage for ARMInstr_Call too.
127   ru->regs[ru->size++] = hregARM_R8();
128   ru->regs[ru->size++] = hregARM_R12();
129   ru->regs[ru->size++] = hregARM_R13();
130   ru->regs[ru->size++] = hregARM_R14();
131   ru->regs[ru->size++] = hregARM_R15();
132   ru->regs[ru->size++] = hregARM_Q13();
133   ru->regs[ru->size++] = hregARM_Q14();
134   ru->regs[ru->size++] = hregARM_Q15();
135
136   rRegUniverse_ARM_initted = True;
137
138   RRegUniverse__check_is_sane(ru);
139   return ru;
140}
141
142
143void ppHRegARM ( HReg reg )  {
144   Int r;
145   /* Be generic for all virtual regs. */
146   if (hregIsVirtual(reg)) {
147      ppHReg(reg);
148      return;
149   }
150   /* But specific for real regs. */
151   switch (hregClass(reg)) {
152      case HRcInt32:
153         r = hregEncoding(reg);
154         vassert(r >= 0 && r < 16);
155         vex_printf("r%d", r);
156         return;
157      case HRcFlt64:
158         r = hregEncoding(reg);
159         vassert(r >= 0 && r < 32);
160         vex_printf("d%d", r);
161         return;
162      case HRcFlt32:
163         r = hregEncoding(reg);
164         vassert(r >= 0 && r < 32);
165         vex_printf("s%d", r);
166         return;
167      case HRcVec128:
168         r = hregEncoding(reg);
169         vassert(r >= 0 && r < 16);
170         vex_printf("q%d", r);
171         return;
172      default:
173         vpanic("ppHRegARM");
174   }
175}
176
177
178/* --------- Condition codes, ARM encoding. --------- */
179
180const HChar* showARMCondCode ( ARMCondCode cond ) {
181   switch (cond) {
182       case ARMcc_EQ:  return "eq";
183       case ARMcc_NE:  return "ne";
184       case ARMcc_HS:  return "hs";
185       case ARMcc_LO:  return "lo";
186       case ARMcc_MI:  return "mi";
187       case ARMcc_PL:  return "pl";
188       case ARMcc_VS:  return "vs";
189       case ARMcc_VC:  return "vc";
190       case ARMcc_HI:  return "hi";
191       case ARMcc_LS:  return "ls";
192       case ARMcc_GE:  return "ge";
193       case ARMcc_LT:  return "lt";
194       case ARMcc_GT:  return "gt";
195       case ARMcc_LE:  return "le";
196       case ARMcc_AL:  return "al"; // default
197       case ARMcc_NV:  return "nv";
198       default: vpanic("showARMCondCode");
199   }
200}
201
202
203/* --------- Mem AModes: Addressing Mode 1 --------- */
204
205ARMAMode1* ARMAMode1_RI  ( HReg reg, Int simm13 ) {
206   ARMAMode1* am        = LibVEX_Alloc_inline(sizeof(ARMAMode1));
207   am->tag              = ARMam1_RI;
208   am->ARMam1.RI.reg    = reg;
209   am->ARMam1.RI.simm13 = simm13;
210   vassert(-4095 <= simm13 && simm13 <= 4095);
211   return am;
212}
213ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
214   ARMAMode1* am        = LibVEX_Alloc_inline(sizeof(ARMAMode1));
215   am->tag              = ARMam1_RRS;
216   am->ARMam1.RRS.base  = base;
217   am->ARMam1.RRS.index = index;
218   am->ARMam1.RRS.shift = shift;
219   vassert(0 <= shift && shift <= 3);
220   return am;
221}
222
223void ppARMAMode1 ( ARMAMode1* am ) {
224   switch (am->tag) {
225      case ARMam1_RI:
226         vex_printf("%d(", am->ARMam1.RI.simm13);
227         ppHRegARM(am->ARMam1.RI.reg);
228         vex_printf(")");
229         break;
230      case ARMam1_RRS:
231         vex_printf("(");
232         ppHRegARM(am->ARMam1.RRS.base);
233         vex_printf(",");
234         ppHRegARM(am->ARMam1.RRS.index);
235         vex_printf(",%u)", am->ARMam1.RRS.shift);
236         break;
237      default:
238         vassert(0);
239   }
240}
241
242static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
243   switch (am->tag) {
244      case ARMam1_RI:
245         addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
246         return;
247      case ARMam1_RRS:
248         //    addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
249         //    addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
250         //   return;
251      default:
252         vpanic("addRegUsage_ARMAmode1");
253   }
254}
255
256static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
257   switch (am->tag) {
258      case ARMam1_RI:
259         am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
260         return;
261      case ARMam1_RRS:
262         //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
263         //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
264         //return;
265      default:
266         vpanic("mapRegs_ARMAmode1");
267   }
268}
269
270
271/* --------- Mem AModes: Addressing Mode 2 --------- */
272
273ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
274   ARMAMode2* am       = LibVEX_Alloc_inline(sizeof(ARMAMode2));
275   am->tag             = ARMam2_RI;
276   am->ARMam2.RI.reg   = reg;
277   am->ARMam2.RI.simm9 = simm9;
278   vassert(-255 <= simm9 && simm9 <= 255);
279   return am;
280}
281ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
282   ARMAMode2* am       = LibVEX_Alloc_inline(sizeof(ARMAMode2));
283   am->tag             = ARMam2_RR;
284   am->ARMam2.RR.base  = base;
285   am->ARMam2.RR.index = index;
286   return am;
287}
288
289void ppARMAMode2 ( ARMAMode2* am ) {
290   switch (am->tag) {
291      case ARMam2_RI:
292         vex_printf("%d(", am->ARMam2.RI.simm9);
293         ppHRegARM(am->ARMam2.RI.reg);
294         vex_printf(")");
295         break;
296      case ARMam2_RR:
297         vex_printf("(");
298         ppHRegARM(am->ARMam2.RR.base);
299         vex_printf(",");
300         ppHRegARM(am->ARMam2.RR.index);
301         vex_printf(")");
302         break;
303      default:
304         vassert(0);
305   }
306}
307
308static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
309   switch (am->tag) {
310      case ARMam2_RI:
311         addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
312         return;
313      case ARMam2_RR:
314         //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
315         //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
316         //   return;
317      default:
318         vpanic("addRegUsage_ARMAmode2");
319   }
320}
321
322static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
323   switch (am->tag) {
324      case ARMam2_RI:
325         am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
326         return;
327      case ARMam2_RR:
328         //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
329         //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
330         //return;
331      default:
332         vpanic("mapRegs_ARMAmode2");
333   }
334}
335
336
337/* --------- Mem AModes: Addressing Mode VFP --------- */
338
339ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
340   ARMAModeV* am = LibVEX_Alloc_inline(sizeof(ARMAModeV));
341   vassert(simm11 >= -1020 && simm11 <= 1020);
342   vassert(0 == (simm11 & 3));
343   am->reg    = reg;
344   am->simm11 = simm11;
345   return am;
346}
347
348void ppARMAModeV ( ARMAModeV* am ) {
349   vex_printf("%d(", am->simm11);
350   ppHRegARM(am->reg);
351   vex_printf(")");
352}
353
354static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
355   addHRegUse(u, HRmRead, am->reg);
356}
357
358static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
359   am->reg = lookupHRegRemap(m, am->reg);
360}
361
362
363/* --------- Mem AModes: Addressing Mode Neon ------- */
364
365ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
366   ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
367   am->tag = ARMamN_RR;
368   am->ARMamN.RR.rN = rN;
369   am->ARMamN.RR.rM = rM;
370   return am;
371}
372
373ARMAModeN *mkARMAModeN_R ( HReg rN ) {
374   ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
375   am->tag = ARMamN_R;
376   am->ARMamN.R.rN = rN;
377   return am;
378}
379
380static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
381   if (am->tag == ARMamN_R) {
382      addHRegUse(u, HRmRead, am->ARMamN.R.rN);
383   } else {
384      addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
385      addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
386   }
387}
388
389static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
390   if (am->tag == ARMamN_R) {
391      am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
392   } else {
393      am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
394      am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
395   }
396}
397
398void ppARMAModeN ( ARMAModeN* am ) {
399   vex_printf("[");
400   if (am->tag == ARMamN_R) {
401      ppHRegARM(am->ARMamN.R.rN);
402   } else {
403      ppHRegARM(am->ARMamN.RR.rN);
404   }
405   vex_printf("]");
406   if (am->tag == ARMamN_RR) {
407      vex_printf(", ");
408      ppHRegARM(am->ARMamN.RR.rM);
409   }
410}
411
412
413/* --------- Reg or imm-8x4 operands --------- */
414
415static UInt ROR32 ( UInt x, UInt sh ) {
416   vassert(sh >= 0 && sh < 32);
417   if (sh == 0)
418      return x;
419   else
420      return (x << (32-sh)) | (x >> sh);
421}
422
423ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
424   ARMRI84* ri84          = LibVEX_Alloc_inline(sizeof(ARMRI84));
425   ri84->tag              = ARMri84_I84;
426   ri84->ARMri84.I84.imm8 = imm8;
427   ri84->ARMri84.I84.imm4 = imm4;
428   vassert(imm8 >= 0 && imm8 <= 255);
429   vassert(imm4 >= 0 && imm4 <= 15);
430   return ri84;
431}
432ARMRI84* ARMRI84_R ( HReg reg ) {
433   ARMRI84* ri84       = LibVEX_Alloc_inline(sizeof(ARMRI84));
434   ri84->tag           = ARMri84_R;
435   ri84->ARMri84.R.reg = reg;
436   return ri84;
437}
438
439void ppARMRI84 ( ARMRI84* ri84 ) {
440   switch (ri84->tag) {
441      case ARMri84_I84:
442         vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
443                                  2 * ri84->ARMri84.I84.imm4));
444         break;
445      case ARMri84_R:
446         ppHRegARM(ri84->ARMri84.R.reg);
447         break;
448      default:
449         vassert(0);
450   }
451}
452
453static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
454   switch (ri84->tag) {
455      case ARMri84_I84:
456         return;
457      case ARMri84_R:
458         addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
459         return;
460      default:
461         vpanic("addRegUsage_ARMRI84");
462   }
463}
464
465static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
466   switch (ri84->tag) {
467      case ARMri84_I84:
468         return;
469      case ARMri84_R:
470         ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
471         return;
472      default:
473         vpanic("mapRegs_ARMRI84");
474   }
475}
476
477
478/* --------- Reg or imm5 operands --------- */
479
480ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
481   ARMRI5* ri5         = LibVEX_Alloc_inline(sizeof(ARMRI5));
482   ri5->tag            = ARMri5_I5;
483   ri5->ARMri5.I5.imm5 = imm5;
484   vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
485   return ri5;
486}
487ARMRI5* ARMRI5_R ( HReg reg ) {
488   ARMRI5* ri5       = LibVEX_Alloc_inline(sizeof(ARMRI5));
489   ri5->tag          = ARMri5_R;
490   ri5->ARMri5.R.reg = reg;
491   return ri5;
492}
493
494void ppARMRI5 ( ARMRI5* ri5 ) {
495   switch (ri5->tag) {
496      case ARMri5_I5:
497         vex_printf("%u", ri5->ARMri5.I5.imm5);
498         break;
499      case ARMri5_R:
500         ppHRegARM(ri5->ARMri5.R.reg);
501         break;
502      default:
503         vassert(0);
504   }
505}
506
507static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
508   switch (ri5->tag) {
509      case ARMri5_I5:
510         return;
511      case ARMri5_R:
512         addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
513         return;
514      default:
515         vpanic("addRegUsage_ARMRI5");
516   }
517}
518
519static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
520   switch (ri5->tag) {
521      case ARMri5_I5:
522         return;
523      case ARMri5_R:
524         ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
525         return;
526      default:
527         vpanic("mapRegs_ARMRI5");
528   }
529}
530
531/* -------- Neon Immediate operatnd --------- */
532
533ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
534   ARMNImm* i = LibVEX_Alloc_inline(sizeof(ARMNImm));
535   i->type = type;
536   i->imm8 = imm8;
537   return i;
538}
539
540ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
541   int i, j;
542   ULong y, x = imm->imm8;
543   switch (imm->type) {
544      case 3:
545         x = x << 8; /* fallthrough */
546      case 2:
547         x = x << 8; /* fallthrough */
548      case 1:
549         x = x << 8; /* fallthrough */
550      case 0:
551         return (x << 32) | x;
552      case 5:
553      case 6:
554         if (imm->type == 5)
555            x = x << 8;
556         else
557            x = (x << 8) | x;
558         /* fallthrough */
559      case 4:
560         x = (x << 16) | x;
561         return (x << 32) | x;
562      case 8:
563         x = (x << 8) | 0xFF;
564         /* fallthrough */
565      case 7:
566         x = (x << 8) | 0xFF;
567         return (x << 32) | x;
568      case 9:
569         x = 0;
570         for (i = 7; i >= 0; i--) {
571            y = ((ULong)imm->imm8 >> i) & 1;
572            for (j = 0; j < 8; j++) {
573               x = (x << 1) | y;
574            }
575         }
576         return x;
577      case 10:
578         x |= (x & 0x80) << 5;
579         x |= (~x & 0x40) << 5;
580         x &= 0x187F; /* 0001 1000 0111 1111 */
581         x |= (x & 0x40) << 4;
582         x |= (x & 0x40) << 3;
583         x |= (x & 0x40) << 2;
584         x |= (x & 0x40) << 1;
585         x = x << 19;
586         x = (x << 32) | x;
587         return x;
588      default:
589         vpanic("ARMNImm_to_Imm64");
590   }
591}
592
593ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
594   ARMNImm tmp;
595   if ((x & 0xFFFFFFFF) == (x >> 32)) {
596      if ((x & 0xFFFFFF00) == 0)
597         return ARMNImm_TI(0, x & 0xFF);
598      if ((x & 0xFFFF00FF) == 0)
599         return ARMNImm_TI(1, (x >> 8) & 0xFF);
600      if ((x & 0xFF00FFFF) == 0)
601         return ARMNImm_TI(2, (x >> 16) & 0xFF);
602      if ((x & 0x00FFFFFF) == 0)
603         return ARMNImm_TI(3, (x >> 24) & 0xFF);
604      if ((x & 0xFFFF00FF) == 0xFF)
605         return ARMNImm_TI(7, (x >> 8) & 0xFF);
606      if ((x & 0xFF00FFFF) == 0xFFFF)
607         return ARMNImm_TI(8, (x >> 16) & 0xFF);
608      if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
609         if ((x & 0xFF00) == 0)
610            return ARMNImm_TI(4, x & 0xFF);
611         if ((x & 0x00FF) == 0)
612            return ARMNImm_TI(5, (x >> 8) & 0xFF);
613         if ((x & 0xFF) == ((x >> 8) & 0xFF))
614            return ARMNImm_TI(6, x & 0xFF);
615      }
616      if ((x & 0x7FFFF) == 0) {
617         tmp.type = 10;
618         tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
619         if (ARMNImm_to_Imm64(&tmp) == x)
620            return ARMNImm_TI(tmp.type, tmp.imm8);
621      }
622   } else {
623      /* This can only be type 9. */
624      tmp.imm8 = (((x >> 56) & 1) << 7)
625               | (((x >> 48) & 1) << 6)
626               | (((x >> 40) & 1) << 5)
627               | (((x >> 32) & 1) << 4)
628               | (((x >> 24) & 1) << 3)
629               | (((x >> 16) & 1) << 2)
630               | (((x >>  8) & 1) << 1)
631               | (((x >>  0) & 1) << 0);
632      tmp.type = 9;
633      if (ARMNImm_to_Imm64 (&tmp) == x)
634         return ARMNImm_TI(tmp.type, tmp.imm8);
635   }
636   return NULL;
637}
638
639void ppARMNImm (ARMNImm* i) {
640   ULong x = ARMNImm_to_Imm64(i);
641   vex_printf("0x%llX%llX", x, x);
642}
643
644/* -- Register or scalar operand --- */
645
646ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
647{
648   ARMNRS *p = LibVEX_Alloc_inline(sizeof(ARMNRS));
649   p->tag = tag;
650   p->reg = reg;
651   p->index = index;
652   return p;
653}
654
655void ppARMNRS(ARMNRS *p)
656{
657   ppHRegARM(p->reg);
658   if (p->tag == ARMNRS_Scalar) {
659      vex_printf("[%u]", p->index);
660   }
661}
662
663/* --------- Instructions. --------- */
664
665const HChar* showARMAluOp ( ARMAluOp op ) {
666   switch (op) {
667      case ARMalu_ADD:  return "add";
668      case ARMalu_ADDS: return "adds";
669      case ARMalu_ADC:  return "adc";
670      case ARMalu_SUB:  return "sub";
671      case ARMalu_SUBS: return "subs";
672      case ARMalu_SBC:  return "sbc";
673      case ARMalu_AND:  return "and";
674      case ARMalu_BIC:  return "bic";
675      case ARMalu_OR:   return "orr";
676      case ARMalu_XOR:  return "xor";
677      default: vpanic("showARMAluOp");
678   }
679}
680
681const HChar* showARMShiftOp ( ARMShiftOp op ) {
682   switch (op) {
683      case ARMsh_SHL: return "shl";
684      case ARMsh_SHR: return "shr";
685      case ARMsh_SAR: return "sar";
686      default: vpanic("showARMShiftOp");
687   }
688}
689
690const HChar* showARMUnaryOp ( ARMUnaryOp op ) {
691   switch (op) {
692      case ARMun_NEG: return "neg";
693      case ARMun_NOT: return "not";
694      case ARMun_CLZ: return "clz";
695      default: vpanic("showARMUnaryOp");
696   }
697}
698
699const HChar* showARMMulOp ( ARMMulOp op ) {
700   switch (op) {
701      case ARMmul_PLAIN: return "mul";
702      case ARMmul_ZX:    return "umull";
703      case ARMmul_SX:    return "smull";
704      default: vpanic("showARMMulOp");
705   }
706}
707
708const HChar* showARMVfpOp ( ARMVfpOp op ) {
709   switch (op) {
710      case ARMvfp_ADD: return "add";
711      case ARMvfp_SUB: return "sub";
712      case ARMvfp_MUL: return "mul";
713      case ARMvfp_DIV: return "div";
714      default: vpanic("showARMVfpOp");
715   }
716}
717
718const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
719   switch (op) {
720      case ARMvfpu_COPY: return "cpy";
721      case ARMvfpu_NEG:  return "neg";
722      case ARMvfpu_ABS:  return "abs";
723      case ARMvfpu_SQRT: return "sqrt";
724      default: vpanic("showARMVfpUnaryOp");
725   }
726}
727
728const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
729   switch (op) {
730      case ARMneon_VAND: return "vand";
731      case ARMneon_VORR: return "vorr";
732      case ARMneon_VXOR: return "veor";
733      case ARMneon_VADD: return "vadd";
734      case ARMneon_VRHADDS: return "vrhadd";
735      case ARMneon_VRHADDU: return "vrhadd";
736      case ARMneon_VADDFP: return "vadd";
737      case ARMneon_VPADDFP: return "vpadd";
738      case ARMneon_VABDFP: return "vabd";
739      case ARMneon_VSUB: return "vsub";
740      case ARMneon_VSUBFP: return "vsub";
741      case ARMneon_VMINU: return "vmin";
742      case ARMneon_VMINS: return "vmin";
743      case ARMneon_VMINF: return "vmin";
744      case ARMneon_VMAXU: return "vmax";
745      case ARMneon_VMAXS: return "vmax";
746      case ARMneon_VMAXF: return "vmax";
747      case ARMneon_VQADDU: return "vqadd";
748      case ARMneon_VQADDS: return "vqadd";
749      case ARMneon_VQSUBU: return "vqsub";
750      case ARMneon_VQSUBS: return "vqsub";
751      case ARMneon_VCGTU:  return "vcgt";
752      case ARMneon_VCGTS:  return "vcgt";
753      case ARMneon_VCGTF:  return "vcgt";
754      case ARMneon_VCGEF:  return "vcgt";
755      case ARMneon_VCGEU:  return "vcge";
756      case ARMneon_VCGES:  return "vcge";
757      case ARMneon_VCEQ:  return "vceq";
758      case ARMneon_VCEQF:  return "vceq";
759      case ARMneon_VPADD:   return "vpadd";
760      case ARMneon_VPMINU:   return "vpmin";
761      case ARMneon_VPMINS:   return "vpmin";
762      case ARMneon_VPMINF:   return "vpmin";
763      case ARMneon_VPMAXU:   return "vpmax";
764      case ARMneon_VPMAXS:   return "vpmax";
765      case ARMneon_VPMAXF:   return "vpmax";
766      case ARMneon_VEXT:   return "vext";
767      case ARMneon_VMUL:   return "vmuli";
768      case ARMneon_VMULLU:   return "vmull";
769      case ARMneon_VMULLS:   return "vmull";
770      case ARMneon_VMULP:  return "vmul";
771      case ARMneon_VMULFP:  return "vmul";
772      case ARMneon_VMULLP:  return "vmul";
773      case ARMneon_VQDMULH: return "vqdmulh";
774      case ARMneon_VQRDMULH: return "vqrdmulh";
775      case ARMneon_VQDMULL: return "vqdmull";
776      case ARMneon_VTBL: return "vtbl";
777      case ARMneon_VRECPS: return "vrecps";
778      case ARMneon_VRSQRTS: return "vrecps";
779      case ARMneon_INVALID: return "??invalid??";
780      /* ... */
781      default: vpanic("showARMNeonBinOp");
782   }
783}
784
785const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
786   switch (op) {
787      case ARMneon_VAND:
788      case ARMneon_VORR:
789      case ARMneon_VXOR:
790         return "";
791      case ARMneon_VADD:
792      case ARMneon_VSUB:
793      case ARMneon_VEXT:
794      case ARMneon_VMUL:
795      case ARMneon_VPADD:
796      case ARMneon_VTBL:
797      case ARMneon_VCEQ:
798         return ".i";
799      case ARMneon_VRHADDU:
800      case ARMneon_VMINU:
801      case ARMneon_VMAXU:
802      case ARMneon_VQADDU:
803      case ARMneon_VQSUBU:
804      case ARMneon_VCGTU:
805      case ARMneon_VCGEU:
806      case ARMneon_VMULLU:
807      case ARMneon_VPMINU:
808      case ARMneon_VPMAXU:
809         return ".u";
810      case ARMneon_VRHADDS:
811      case ARMneon_VMINS:
812      case ARMneon_VMAXS:
813      case ARMneon_VQADDS:
814      case ARMneon_VQSUBS:
815      case ARMneon_VCGTS:
816      case ARMneon_VCGES:
817      case ARMneon_VQDMULL:
818      case ARMneon_VMULLS:
819      case ARMneon_VPMINS:
820      case ARMneon_VPMAXS:
821      case ARMneon_VQDMULH:
822      case ARMneon_VQRDMULH:
823         return ".s";
824      case ARMneon_VMULP:
825      case ARMneon_VMULLP:
826         return ".p";
827      case ARMneon_VADDFP:
828      case ARMneon_VABDFP:
829      case ARMneon_VPADDFP:
830      case ARMneon_VSUBFP:
831      case ARMneon_VMULFP:
832      case ARMneon_VMINF:
833      case ARMneon_VMAXF:
834      case ARMneon_VPMINF:
835      case ARMneon_VPMAXF:
836      case ARMneon_VCGTF:
837      case ARMneon_VCGEF:
838      case ARMneon_VCEQF:
839      case ARMneon_VRECPS:
840      case ARMneon_VRSQRTS:
841         return ".f";
842      /* ... */
843      default: vpanic("showARMNeonBinOpDataType");
844   }
845}
846
847const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
848   switch (op) {
849      case ARMneon_COPY: return "vmov";
850      case ARMneon_COPYLS: return "vmov";
851      case ARMneon_COPYLU: return "vmov";
852      case ARMneon_COPYN: return "vmov";
853      case ARMneon_COPYQNSS: return "vqmovn";
854      case ARMneon_COPYQNUS: return "vqmovun";
855      case ARMneon_COPYQNUU: return "vqmovn";
856      case ARMneon_NOT: return "vmvn";
857      case ARMneon_EQZ: return "vceq";
858      case ARMneon_CNT: return "vcnt";
859      case ARMneon_CLS: return "vcls";
860      case ARMneon_CLZ: return "vclz";
861      case ARMneon_DUP: return "vdup";
862      case ARMneon_PADDLS: return "vpaddl";
863      case ARMneon_PADDLU: return "vpaddl";
864      case ARMneon_VQSHLNSS: return "vqshl";
865      case ARMneon_VQSHLNUU: return "vqshl";
866      case ARMneon_VQSHLNUS: return "vqshlu";
867      case ARMneon_REV16: return "vrev16";
868      case ARMneon_REV32: return "vrev32";
869      case ARMneon_REV64: return "vrev64";
870      case ARMneon_VCVTFtoU: return "vcvt";
871      case ARMneon_VCVTFtoS: return "vcvt";
872      case ARMneon_VCVTUtoF: return "vcvt";
873      case ARMneon_VCVTStoF: return "vcvt";
874      case ARMneon_VCVTFtoFixedU: return "vcvt";
875      case ARMneon_VCVTFtoFixedS: return "vcvt";
876      case ARMneon_VCVTFixedUtoF: return "vcvt";
877      case ARMneon_VCVTFixedStoF: return "vcvt";
878      case ARMneon_VCVTF32toF16: return "vcvt";
879      case ARMneon_VCVTF16toF32: return "vcvt";
880      case ARMneon_VRECIP: return "vrecip";
881      case ARMneon_VRECIPF: return "vrecipf";
882      case ARMneon_VNEGF: return "vneg";
883      case ARMneon_ABS: return "vabs";
884      case ARMneon_VABSFP: return "vabsfp";
885      case ARMneon_VRSQRTEFP: return "vrsqrtefp";
886      case ARMneon_VRSQRTE: return "vrsqrte";
887      /* ... */
888      default: vpanic("showARMNeonUnOp");
889   }
890}
891
892const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
893   switch (op) {
894      case ARMneon_COPY:
895      case ARMneon_NOT:
896         return "";
897      case ARMneon_COPYN:
898      case ARMneon_EQZ:
899      case ARMneon_CNT:
900      case ARMneon_DUP:
901      case ARMneon_REV16:
902      case ARMneon_REV32:
903      case ARMneon_REV64:
904         return ".i";
905      case ARMneon_COPYLU:
906      case ARMneon_PADDLU:
907      case ARMneon_COPYQNUU:
908      case ARMneon_VQSHLNUU:
909      case ARMneon_VRECIP:
910      case ARMneon_VRSQRTE:
911         return ".u";
912      case ARMneon_CLS:
913      case ARMneon_CLZ:
914      case ARMneon_COPYLS:
915      case ARMneon_PADDLS:
916      case ARMneon_COPYQNSS:
917      case ARMneon_COPYQNUS:
918      case ARMneon_VQSHLNSS:
919      case ARMneon_VQSHLNUS:
920      case ARMneon_ABS:
921         return ".s";
922      case ARMneon_VRECIPF:
923      case ARMneon_VNEGF:
924      case ARMneon_VABSFP:
925      case ARMneon_VRSQRTEFP:
926         return ".f";
927      case ARMneon_VCVTFtoU: return ".u32.f32";
928      case ARMneon_VCVTFtoS: return ".s32.f32";
929      case ARMneon_VCVTUtoF: return ".f32.u32";
930      case ARMneon_VCVTStoF: return ".f32.s32";
931      case ARMneon_VCVTF16toF32: return ".f32.f16";
932      case ARMneon_VCVTF32toF16: return ".f16.f32";
933      case ARMneon_VCVTFtoFixedU: return ".u32.f32";
934      case ARMneon_VCVTFtoFixedS: return ".s32.f32";
935      case ARMneon_VCVTFixedUtoF: return ".f32.u32";
936      case ARMneon_VCVTFixedStoF: return ".f32.s32";
937      /* ... */
938      default: vpanic("showARMNeonUnOpDataType");
939   }
940}
941
942const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
943   switch (op) {
944      case ARMneon_SETELEM: return "vmov";
945      case ARMneon_GETELEMU: return "vmov";
946      case ARMneon_GETELEMS: return "vmov";
947      case ARMneon_VDUP: return "vdup";
948      /* ... */
949      default: vpanic("showARMNeonUnarySOp");
950   }
951}
952
953const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
954   switch (op) {
955      case ARMneon_SETELEM:
956      case ARMneon_VDUP:
957         return ".i";
958      case ARMneon_GETELEMS:
959         return ".s";
960      case ARMneon_GETELEMU:
961         return ".u";
962      /* ... */
963      default: vpanic("showARMNeonUnarySOp");
964   }
965}
966
967const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
968   switch (op) {
969      case ARMneon_VSHL: return "vshl";
970      case ARMneon_VSAL: return "vshl";
971      case ARMneon_VQSHL: return "vqshl";
972      case ARMneon_VQSAL: return "vqshl";
973      /* ... */
974      default: vpanic("showARMNeonShiftOp");
975   }
976}
977
978const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
979   switch (op) {
980      case ARMneon_VSHL:
981      case ARMneon_VQSHL:
982         return ".u";
983      case ARMneon_VSAL:
984      case ARMneon_VQSAL:
985         return ".s";
986      /* ... */
987      default: vpanic("showARMNeonShiftOpDataType");
988   }
989}
990
991const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
992   switch (op) {
993      case ARMneon_TRN: return "vtrn";
994      case ARMneon_ZIP: return "vzip";
995      case ARMneon_UZP: return "vuzp";
996      /* ... */
997      default: vpanic("showARMNeonDualOp");
998   }
999}
1000
1001const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1002   switch (op) {
1003      case ARMneon_TRN:
1004      case ARMneon_ZIP:
1005      case ARMneon_UZP:
1006         return "i";
1007      /* ... */
1008      default: vpanic("showARMNeonDualOp");
1009   }
1010}
1011
1012static const HChar* showARMNeonDataSize_wrk ( UInt size )
1013{
1014   switch (size) {
1015      case 0: return "8";
1016      case 1: return "16";
1017      case 2: return "32";
1018      case 3: return "64";
1019      default: vpanic("showARMNeonDataSize");
1020   }
1021}
1022
1023static const HChar* showARMNeonDataSize ( const ARMInstr* i )
1024{
1025   switch (i->tag) {
1026      case ARMin_NBinary:
1027         if (i->ARMin.NBinary.op == ARMneon_VEXT)
1028            return "8";
1029         if (i->ARMin.NBinary.op == ARMneon_VAND ||
1030             i->ARMin.NBinary.op == ARMneon_VORR ||
1031             i->ARMin.NBinary.op == ARMneon_VXOR)
1032            return "";
1033         return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1034      case ARMin_NUnary:
1035         if (i->ARMin.NUnary.op == ARMneon_COPY ||
1036             i->ARMin.NUnary.op == ARMneon_NOT ||
1037             i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1038             i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1039             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1040             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1041             i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1042             i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1043             i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1044             i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1045             i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1046             i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1047            return "";
1048         if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1049             i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1050             i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1051            UInt size;
1052            size = i->ARMin.NUnary.size;
1053            if (size & 0x40)
1054               return "64";
1055            if (size & 0x20)
1056               return "32";
1057            if (size & 0x10)
1058               return "16";
1059            if (size & 0x08)
1060               return "8";
1061            vpanic("showARMNeonDataSize");
1062         }
1063         return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1064      case ARMin_NUnaryS:
1065         if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1066            int size;
1067            size = i->ARMin.NUnaryS.size;
1068            if ((size & 1) == 1)
1069               return "8";
1070            if ((size & 3) == 2)
1071               return "16";
1072            if ((size & 7) == 4)
1073               return "32";
1074            vpanic("showARMNeonDataSize");
1075         }
1076         return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1077      case ARMin_NShift:
1078         return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1079      case ARMin_NDual:
1080         return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1081      default:
1082         vpanic("showARMNeonDataSize");
1083   }
1084}
1085
1086ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1087                         HReg dst, HReg argL, ARMRI84* argR ) {
1088   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1089   i->tag            = ARMin_Alu;
1090   i->ARMin.Alu.op   = op;
1091   i->ARMin.Alu.dst  = dst;
1092   i->ARMin.Alu.argL = argL;
1093   i->ARMin.Alu.argR = argR;
1094   return i;
1095}
1096ARMInstr* ARMInstr_Shift  ( ARMShiftOp op,
1097                            HReg dst, HReg argL, ARMRI5* argR ) {
1098   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1099   i->tag              = ARMin_Shift;
1100   i->ARMin.Shift.op   = op;
1101   i->ARMin.Shift.dst  = dst;
1102   i->ARMin.Shift.argL = argL;
1103   i->ARMin.Shift.argR = argR;
1104   return i;
1105}
1106ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1107   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1108   i->tag             = ARMin_Unary;
1109   i->ARMin.Unary.op  = op;
1110   i->ARMin.Unary.dst = dst;
1111   i->ARMin.Unary.src = src;
1112   return i;
1113}
1114ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1115   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1116   i->tag                  = ARMin_CmpOrTst;
1117   i->ARMin.CmpOrTst.isCmp = isCmp;
1118   i->ARMin.CmpOrTst.argL  = argL;
1119   i->ARMin.CmpOrTst.argR  = argR;
1120   return i;
1121}
1122ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1123   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1124   i->tag           = ARMin_Mov;
1125   i->ARMin.Mov.dst = dst;
1126   i->ARMin.Mov.src = src;
1127   return i;
1128}
1129ARMInstr* ARMInstr_Imm32  ( HReg dst, UInt imm32 ) {
1130   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1131   i->tag               = ARMin_Imm32;
1132   i->ARMin.Imm32.dst   = dst;
1133   i->ARMin.Imm32.imm32 = imm32;
1134   return i;
1135}
1136ARMInstr* ARMInstr_LdSt32 ( ARMCondCode cc,
1137                            Bool isLoad, HReg rD, ARMAMode1* amode ) {
1138   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1139   i->tag                 = ARMin_LdSt32;
1140   i->ARMin.LdSt32.cc     = cc;
1141   i->ARMin.LdSt32.isLoad = isLoad;
1142   i->ARMin.LdSt32.rD     = rD;
1143   i->ARMin.LdSt32.amode  = amode;
1144   vassert(cc != ARMcc_NV);
1145   return i;
1146}
1147ARMInstr* ARMInstr_LdSt16 ( ARMCondCode cc,
1148                            Bool isLoad, Bool signedLoad,
1149                            HReg rD, ARMAMode2* amode ) {
1150   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1151   i->tag                     = ARMin_LdSt16;
1152   i->ARMin.LdSt16.cc         = cc;
1153   i->ARMin.LdSt16.isLoad     = isLoad;
1154   i->ARMin.LdSt16.signedLoad = signedLoad;
1155   i->ARMin.LdSt16.rD         = rD;
1156   i->ARMin.LdSt16.amode      = amode;
1157   vassert(cc != ARMcc_NV);
1158   return i;
1159}
1160ARMInstr* ARMInstr_LdSt8U ( ARMCondCode cc,
1161                            Bool isLoad, HReg rD, ARMAMode1* amode ) {
1162   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1163   i->tag                 = ARMin_LdSt8U;
1164   i->ARMin.LdSt8U.cc     = cc;
1165   i->ARMin.LdSt8U.isLoad = isLoad;
1166   i->ARMin.LdSt8U.rD     = rD;
1167   i->ARMin.LdSt8U.amode  = amode;
1168   vassert(cc != ARMcc_NV);
1169   return i;
1170}
1171ARMInstr* ARMInstr_Ld8S ( ARMCondCode cc, HReg rD, ARMAMode2* amode ) {
1172   ARMInstr* i         = LibVEX_Alloc_inline(sizeof(ARMInstr));
1173   i->tag              = ARMin_Ld8S;
1174   i->ARMin.Ld8S.cc    = cc;
1175   i->ARMin.Ld8S.rD    = rD;
1176   i->ARMin.Ld8S.amode = amode;
1177   vassert(cc != ARMcc_NV);
1178   return i;
1179}
1180ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
1181                             ARMCondCode cond, Bool toFastEP ) {
1182   ARMInstr* i               = LibVEX_Alloc_inline(sizeof(ARMInstr));
1183   i->tag                    = ARMin_XDirect;
1184   i->ARMin.XDirect.dstGA    = dstGA;
1185   i->ARMin.XDirect.amR15T   = amR15T;
1186   i->ARMin.XDirect.cond     = cond;
1187   i->ARMin.XDirect.toFastEP = toFastEP;
1188   return i;
1189}
1190ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
1191                            ARMCondCode cond ) {
1192   ARMInstr* i            = LibVEX_Alloc_inline(sizeof(ARMInstr));
1193   i->tag                 = ARMin_XIndir;
1194   i->ARMin.XIndir.dstGA  = dstGA;
1195   i->ARMin.XIndir.amR15T = amR15T;
1196   i->ARMin.XIndir.cond   = cond;
1197   return i;
1198}
1199ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
1200                               ARMCondCode cond, IRJumpKind jk ) {
1201   ARMInstr* i               = LibVEX_Alloc_inline(sizeof(ARMInstr));
1202   i->tag                    = ARMin_XAssisted;
1203   i->ARMin.XAssisted.dstGA  = dstGA;
1204   i->ARMin.XAssisted.amR15T = amR15T;
1205   i->ARMin.XAssisted.cond   = cond;
1206   i->ARMin.XAssisted.jk     = jk;
1207   return i;
1208}
1209ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1210   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1211   i->tag             = ARMin_CMov;
1212   i->ARMin.CMov.cond = cond;
1213   i->ARMin.CMov.dst  = dst;
1214   i->ARMin.CMov.src  = src;
1215   vassert(cond != ARMcc_AL);
1216   return i;
1217}
1218ARMInstr* ARMInstr_Call ( ARMCondCode cond, Addr32 target, Int nArgRegs,
1219                          RetLoc rloc ) {
1220   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1221   i->tag                 = ARMin_Call;
1222   i->ARMin.Call.cond     = cond;
1223   i->ARMin.Call.target   = target;
1224   i->ARMin.Call.nArgRegs = nArgRegs;
1225   i->ARMin.Call.rloc     = rloc;
1226   vassert(is_sane_RetLoc(rloc));
1227   return i;
1228}
1229ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
1230   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1231   i->tag          = ARMin_Mul;
1232   i->ARMin.Mul.op = op;
1233   return i;
1234}
1235ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1236   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1237   i->tag             = ARMin_LdrEX;
1238   i->ARMin.LdrEX.szB = szB;
1239   vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1240   return i;
1241}
1242ARMInstr* ARMInstr_StrEX ( Int szB ) {
1243   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1244   i->tag             = ARMin_StrEX;
1245   i->ARMin.StrEX.szB = szB;
1246   vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1247   return i;
1248}
1249ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1250   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1251   i->tag                 = ARMin_VLdStD;
1252   i->ARMin.VLdStD.isLoad = isLoad;
1253   i->ARMin.VLdStD.dD     = dD;
1254   i->ARMin.VLdStD.amode  = am;
1255   return i;
1256}
1257ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1258   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1259   i->tag                 = ARMin_VLdStS;
1260   i->ARMin.VLdStS.isLoad = isLoad;
1261   i->ARMin.VLdStS.fD     = fD;
1262   i->ARMin.VLdStS.amode  = am;
1263   return i;
1264}
1265ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1266   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1267   i->tag              = ARMin_VAluD;
1268   i->ARMin.VAluD.op   = op;
1269   i->ARMin.VAluD.dst  = dst;
1270   i->ARMin.VAluD.argL = argL;
1271   i->ARMin.VAluD.argR = argR;
1272   return i;
1273}
1274ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1275   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1276   i->tag              = ARMin_VAluS;
1277   i->ARMin.VAluS.op   = op;
1278   i->ARMin.VAluS.dst  = dst;
1279   i->ARMin.VAluS.argL = argL;
1280   i->ARMin.VAluS.argR = argR;
1281   return i;
1282}
1283ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1284   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1285   i->tag               = ARMin_VUnaryD;
1286   i->ARMin.VUnaryD.op  = op;
1287   i->ARMin.VUnaryD.dst = dst;
1288   i->ARMin.VUnaryD.src = src;
1289   return i;
1290}
1291ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1292   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1293   i->tag               = ARMin_VUnaryS;
1294   i->ARMin.VUnaryS.op  = op;
1295   i->ARMin.VUnaryS.dst = dst;
1296   i->ARMin.VUnaryS.src = src;
1297   return i;
1298}
1299ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1300   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1301   i->tag              = ARMin_VCmpD;
1302   i->ARMin.VCmpD.argL = argL;
1303   i->ARMin.VCmpD.argR = argR;
1304   return i;
1305}
1306ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1307   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1308   i->tag               = ARMin_VCMovD;
1309   i->ARMin.VCMovD.cond = cond;
1310   i->ARMin.VCMovD.dst  = dst;
1311   i->ARMin.VCMovD.src  = src;
1312   vassert(cond != ARMcc_AL);
1313   return i;
1314}
1315ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1316   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1317   i->tag               = ARMin_VCMovS;
1318   i->ARMin.VCMovS.cond = cond;
1319   i->ARMin.VCMovS.dst  = dst;
1320   i->ARMin.VCMovS.src  = src;
1321   vassert(cond != ARMcc_AL);
1322   return i;
1323}
1324ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1325   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1326   i->tag               = ARMin_VCvtSD;
1327   i->ARMin.VCvtSD.sToD = sToD;
1328   i->ARMin.VCvtSD.dst  = dst;
1329   i->ARMin.VCvtSD.src  = src;
1330   return i;
1331}
1332ARMInstr* ARMInstr_VXferQ ( Bool toQ, HReg qD, HReg dHi, HReg dLo ) {
1333   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1334   i->tag              = ARMin_VXferQ;
1335   i->ARMin.VXferQ.toQ = toQ;
1336   i->ARMin.VXferQ.qD  = qD;
1337   i->ARMin.VXferQ.dHi = dHi;
1338   i->ARMin.VXferQ.dLo = dLo;
1339   return i;
1340}
1341ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1342   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1343   i->tag              = ARMin_VXferD;
1344   i->ARMin.VXferD.toD = toD;
1345   i->ARMin.VXferD.dD  = dD;
1346   i->ARMin.VXferD.rHi = rHi;
1347   i->ARMin.VXferD.rLo = rLo;
1348   return i;
1349}
1350ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1351   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1352   i->tag              = ARMin_VXferS;
1353   i->ARMin.VXferS.toS = toS;
1354   i->ARMin.VXferS.fD  = fD;
1355   i->ARMin.VXferS.rLo = rLo;
1356   return i;
1357}
1358ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1359                            HReg dst, HReg src ) {
1360   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1361   i->tag                = ARMin_VCvtID;
1362   i->ARMin.VCvtID.iToD  = iToD;
1363   i->ARMin.VCvtID.syned = syned;
1364   i->ARMin.VCvtID.dst   = dst;
1365   i->ARMin.VCvtID.src   = src;
1366   return i;
1367}
1368ARMInstr* ARMInstr_VRIntR ( Bool isF64, HReg dst, HReg src )
1369{
1370   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1371   i->tag                = ARMin_VRIntR;
1372   i->ARMin.VRIntR.isF64 = isF64;
1373   i->ARMin.VRIntR.dst   = dst ;
1374   i->ARMin.VRIntR.src   = src;
1375   return i;
1376}
1377ARMInstr* ARMInstr_VMinMaxNum ( Bool isF64, Bool isMax,
1378                                HReg dst, HReg srcL, HReg srcR )
1379{
1380   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1381   i->tag = ARMin_VMinMaxNum;
1382   i->ARMin.VMinMaxNum.isF64 = isF64;
1383   i->ARMin.VMinMaxNum.isMax = isMax;
1384   i->ARMin.VMinMaxNum.dst   = dst ;
1385   i->ARMin.VMinMaxNum.srcL  = srcL;
1386   i->ARMin.VMinMaxNum.srcR  = srcR;
1387   return i;
1388}
1389ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1390   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1391   i->tag                 = ARMin_FPSCR;
1392   i->ARMin.FPSCR.toFPSCR = toFPSCR;
1393   i->ARMin.FPSCR.iReg    = iReg;
1394   return i;
1395}
1396ARMInstr* ARMInstr_MFence ( void ) {
1397   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1398   i->tag      = ARMin_MFence;
1399   return i;
1400}
1401ARMInstr* ARMInstr_CLREX( void ) {
1402   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1403   i->tag      = ARMin_CLREX;
1404   return i;
1405}
1406
1407ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1408   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1409   i->tag                  = ARMin_NLdStQ;
1410   i->ARMin.NLdStQ.isLoad  = isLoad;
1411   i->ARMin.NLdStQ.dQ      = dQ;
1412   i->ARMin.NLdStQ.amode   = amode;
1413   return i;
1414}
1415
1416ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1417   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1418   i->tag                  = ARMin_NLdStD;
1419   i->ARMin.NLdStD.isLoad  = isLoad;
1420   i->ARMin.NLdStD.dD      = dD;
1421   i->ARMin.NLdStD.amode   = amode;
1422   return i;
1423}
1424
1425ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1426                            UInt size, Bool Q ) {
1427   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1428   i->tag                = ARMin_NUnary;
1429   i->ARMin.NUnary.op   = op;
1430   i->ARMin.NUnary.src  = nQ;
1431   i->ARMin.NUnary.dst  = dQ;
1432   i->ARMin.NUnary.size = size;
1433   i->ARMin.NUnary.Q    = Q;
1434   return i;
1435}
1436
1437ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1438                             UInt size, Bool Q ) {
1439   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1440   i->tag                = ARMin_NUnaryS;
1441   i->ARMin.NUnaryS.op   = op;
1442   i->ARMin.NUnaryS.src  = src;
1443   i->ARMin.NUnaryS.dst  = dst;
1444   i->ARMin.NUnaryS.size = size;
1445   i->ARMin.NUnaryS.Q    = Q;
1446   return i;
1447}
1448
1449ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1450                           UInt size, Bool Q ) {
1451   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1452   i->tag                = ARMin_NDual;
1453   i->ARMin.NDual.op   = op;
1454   i->ARMin.NDual.arg1 = nQ;
1455   i->ARMin.NDual.arg2 = mQ;
1456   i->ARMin.NDual.size = size;
1457   i->ARMin.NDual.Q    = Q;
1458   return i;
1459}
1460
1461ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1462                             HReg dst, HReg argL, HReg argR,
1463                             UInt size, Bool Q ) {
1464   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1465   i->tag                = ARMin_NBinary;
1466   i->ARMin.NBinary.op   = op;
1467   i->ARMin.NBinary.argL = argL;
1468   i->ARMin.NBinary.argR = argR;
1469   i->ARMin.NBinary.dst  = dst;
1470   i->ARMin.NBinary.size = size;
1471   i->ARMin.NBinary.Q    = Q;
1472   return i;
1473}
1474
1475ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1476   ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1477   i->tag         = ARMin_NeonImm;
1478   i->ARMin.NeonImm.dst = dst;
1479   i->ARMin.NeonImm.imm = imm;
1480   return i;
1481}
1482
1483ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1484   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1485   i->tag               = ARMin_NCMovQ;
1486   i->ARMin.NCMovQ.cond = cond;
1487   i->ARMin.NCMovQ.dst  = dst;
1488   i->ARMin.NCMovQ.src  = src;
1489   vassert(cond != ARMcc_AL);
1490   return i;
1491}
1492
1493ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1494                            HReg dst, HReg argL, HReg argR,
1495                            UInt size, Bool Q ) {
1496   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1497   i->tag                = ARMin_NShift;
1498   i->ARMin.NShift.op   = op;
1499   i->ARMin.NShift.argL = argL;
1500   i->ARMin.NShift.argR = argR;
1501   i->ARMin.NShift.dst  = dst;
1502   i->ARMin.NShift.size = size;
1503   i->ARMin.NShift.Q    = Q;
1504   return i;
1505}
1506
1507ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
1508{
1509   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1510   i->tag              = ARMin_NShl64;
1511   i->ARMin.NShl64.dst = dst;
1512   i->ARMin.NShl64.src = src;
1513   i->ARMin.NShl64.amt = amt;
1514   vassert(amt >= 1 && amt <= 63);
1515   return i;
1516}
1517
1518/* Helper copy-pasted from isel.c */
1519static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1520{
1521   UInt i;
1522   for (i = 0; i < 16; i++) {
1523      if (0 == (u & 0xFFFFFF00)) {
1524         *u8 = u;
1525         *u4 = i;
1526         return True;
1527      }
1528      u = ROR32(u, 30);
1529   }
1530   vassert(i == 16);
1531   return False;
1532}
1533
1534ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1535   UInt u8, u4;
1536   ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1537   /* Try to generate single ADD if possible */
1538   if (fitsIn8x4(&u8, &u4, imm32)) {
1539      i->tag            = ARMin_Alu;
1540      i->ARMin.Alu.op   = ARMalu_ADD;
1541      i->ARMin.Alu.dst  = rD;
1542      i->ARMin.Alu.argL = rN;
1543      i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1544   } else {
1545      i->tag               = ARMin_Add32;
1546      i->ARMin.Add32.rD    = rD;
1547      i->ARMin.Add32.rN    = rN;
1548      i->ARMin.Add32.imm32 = imm32;
1549   }
1550   return i;
1551}
1552
1553ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
1554                             ARMAMode1* amFailAddr ) {
1555   ARMInstr* i                 = LibVEX_Alloc_inline(sizeof(ARMInstr));
1556   i->tag                      = ARMin_EvCheck;
1557   i->ARMin.EvCheck.amCounter  = amCounter;
1558   i->ARMin.EvCheck.amFailAddr = amFailAddr;
1559   return i;
1560}
1561
1562ARMInstr* ARMInstr_ProfInc ( void ) {
1563   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1564   i->tag      = ARMin_ProfInc;
1565   return i;
1566}
1567
1568/* ... */
1569
1570void ppARMInstr ( const ARMInstr* i ) {
1571   switch (i->tag) {
1572      case ARMin_Alu:
1573         vex_printf("%-4s  ", showARMAluOp(i->ARMin.Alu.op));
1574         ppHRegARM(i->ARMin.Alu.dst);
1575         vex_printf(", ");
1576         ppHRegARM(i->ARMin.Alu.argL);
1577         vex_printf(", ");
1578         ppARMRI84(i->ARMin.Alu.argR);
1579         return;
1580      case ARMin_Shift:
1581         vex_printf("%s   ", showARMShiftOp(i->ARMin.Shift.op));
1582         ppHRegARM(i->ARMin.Shift.dst);
1583         vex_printf(", ");
1584         ppHRegARM(i->ARMin.Shift.argL);
1585         vex_printf(", ");
1586         ppARMRI5(i->ARMin.Shift.argR);
1587         return;
1588      case ARMin_Unary:
1589         vex_printf("%s   ", showARMUnaryOp(i->ARMin.Unary.op));
1590         ppHRegARM(i->ARMin.Unary.dst);
1591         vex_printf(", ");
1592         ppHRegARM(i->ARMin.Unary.src);
1593         return;
1594      case ARMin_CmpOrTst:
1595         vex_printf("%s   ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1596         ppHRegARM(i->ARMin.CmpOrTst.argL);
1597         vex_printf(", ");
1598         ppARMRI84(i->ARMin.CmpOrTst.argR);
1599         return;
1600      case ARMin_Mov:
1601         vex_printf("mov   ");
1602         ppHRegARM(i->ARMin.Mov.dst);
1603         vex_printf(", ");
1604         ppARMRI84(i->ARMin.Mov.src);
1605         return;
1606      case ARMin_Imm32:
1607         vex_printf("imm   ");
1608         ppHRegARM(i->ARMin.Imm32.dst);
1609         vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1610         return;
1611      case ARMin_LdSt32:
1612         if (i->ARMin.LdSt32.isLoad) {
1613            vex_printf("ldr%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
1614                                    : showARMCondCode(i->ARMin.LdSt32.cc));
1615            ppHRegARM(i->ARMin.LdSt32.rD);
1616            vex_printf(", ");
1617            ppARMAMode1(i->ARMin.LdSt32.amode);
1618         } else {
1619            vex_printf("str%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
1620                                    : showARMCondCode(i->ARMin.LdSt32.cc));
1621            ppARMAMode1(i->ARMin.LdSt32.amode);
1622            vex_printf(", ");
1623            ppHRegARM(i->ARMin.LdSt32.rD);
1624         }
1625         return;
1626      case ARMin_LdSt16:
1627         if (i->ARMin.LdSt16.isLoad) {
1628            vex_printf("%s%s%s",
1629                       i->ARMin.LdSt16.signedLoad ? "ldrsh" : "ldrh",
1630                       i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
1631                          : showARMCondCode(i->ARMin.LdSt16.cc),
1632                       i->ARMin.LdSt16.signedLoad ? " " : "  ");
1633            ppHRegARM(i->ARMin.LdSt16.rD);
1634            vex_printf(", ");
1635            ppARMAMode2(i->ARMin.LdSt16.amode);
1636         } else {
1637            vex_printf("strh%s  ",
1638                       i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
1639                          : showARMCondCode(i->ARMin.LdSt16.cc));
1640            ppARMAMode2(i->ARMin.LdSt16.amode);
1641            vex_printf(", ");
1642            ppHRegARM(i->ARMin.LdSt16.rD);
1643         }
1644         return;
1645      case ARMin_LdSt8U:
1646         if (i->ARMin.LdSt8U.isLoad) {
1647            vex_printf("ldrb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
1648                                      : showARMCondCode(i->ARMin.LdSt8U.cc));
1649            ppHRegARM(i->ARMin.LdSt8U.rD);
1650            vex_printf(", ");
1651            ppARMAMode1(i->ARMin.LdSt8U.amode);
1652         } else {
1653            vex_printf("strb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
1654                                      : showARMCondCode(i->ARMin.LdSt8U.cc));
1655            ppARMAMode1(i->ARMin.LdSt8U.amode);
1656            vex_printf(", ");
1657            ppHRegARM(i->ARMin.LdSt8U.rD);
1658         }
1659         return;
1660      case ARMin_Ld8S:
1661         vex_printf("ldrsb%s ", i->ARMin.Ld8S.cc == ARMcc_AL ? "  "
1662                                   : showARMCondCode(i->ARMin.Ld8S.cc));
1663         ppARMAMode2(i->ARMin.Ld8S.amode);
1664         vex_printf(", ");
1665         ppHRegARM(i->ARMin.Ld8S.rD);
1666         return;
1667      case ARMin_XDirect:
1668         vex_printf("(xDirect) ");
1669         vex_printf("if (%%cpsr.%s) { ",
1670                    showARMCondCode(i->ARMin.XDirect.cond));
1671         vex_printf("movw r12,0x%x; ",
1672                    (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
1673         vex_printf("movt r12,0x%x; ",
1674                    (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
1675         vex_printf("str r12,");
1676         ppARMAMode1(i->ARMin.XDirect.amR15T);
1677         vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
1678                    i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1679         vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
1680                    i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1681         vex_printf("blx r12 }");
1682         return;
1683      case ARMin_XIndir:
1684         vex_printf("(xIndir) ");
1685         vex_printf("if (%%cpsr.%s) { ",
1686                    showARMCondCode(i->ARMin.XIndir.cond));
1687         vex_printf("str ");
1688         ppHRegARM(i->ARMin.XIndir.dstGA);
1689         vex_printf(",");
1690         ppARMAMode1(i->ARMin.XIndir.amR15T);
1691         vex_printf("; movw r12,LO16($disp_cp_xindir); ");
1692         vex_printf("movt r12,HI16($disp_cp_xindir); ");
1693         vex_printf("blx r12 }");
1694         return;
1695      case ARMin_XAssisted:
1696         vex_printf("(xAssisted) ");
1697         vex_printf("if (%%cpsr.%s) { ",
1698                    showARMCondCode(i->ARMin.XAssisted.cond));
1699         vex_printf("str ");
1700         ppHRegARM(i->ARMin.XAssisted.dstGA);
1701         vex_printf(",");
1702         ppARMAMode1(i->ARMin.XAssisted.amR15T);
1703         vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
1704                    (Int)i->ARMin.XAssisted.jk);
1705         vex_printf("movw r12,LO16($disp_cp_xassisted); ");
1706         vex_printf("movt r12,HI16($disp_cp_xassisted); ");
1707         vex_printf("blx r12 }");
1708         return;
1709      case ARMin_CMov:
1710         vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1711         ppHRegARM(i->ARMin.CMov.dst);
1712         vex_printf(", ");
1713         ppARMRI84(i->ARMin.CMov.src);
1714         return;
1715      case ARMin_Call:
1716         vex_printf("call%s  ",
1717                    i->ARMin.Call.cond==ARMcc_AL
1718                       ? "" : showARMCondCode(i->ARMin.Call.cond));
1719         vex_printf("0x%x [nArgRegs=%d, ",
1720                    i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1721         ppRetLoc(i->ARMin.Call.rloc);
1722         vex_printf("]");
1723         return;
1724      case ARMin_Mul:
1725         vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1726         if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1727            vex_printf("r0, r2, r3");
1728         } else {
1729            vex_printf("r1:r0, r2, r3");
1730         }
1731         return;
1732      case ARMin_LdrEX: {
1733         const HChar* sz = "";
1734         switch (i->ARMin.LdrEX.szB) {
1735            case 1: sz = "b"; break; case 2: sz = "h"; break;
1736            case 8: sz = "d"; break; case 4: break;
1737            default: vassert(0);
1738         }
1739         vex_printf("ldrex%s %sr2, [r4]",
1740                    sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
1741         return;
1742      }
1743      case ARMin_StrEX: {
1744         const HChar* sz = "";
1745         switch (i->ARMin.StrEX.szB) {
1746            case 1: sz = "b"; break; case 2: sz = "h"; break;
1747            case 8: sz = "d"; break; case 4: break;
1748            default: vassert(0);
1749         }
1750         vex_printf("strex%s r0, %sr2, [r4]",
1751                    sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
1752         return;
1753      }
1754      case ARMin_VLdStD:
1755         if (i->ARMin.VLdStD.isLoad) {
1756            vex_printf("fldd  ");
1757            ppHRegARM(i->ARMin.VLdStD.dD);
1758            vex_printf(", ");
1759            ppARMAModeV(i->ARMin.VLdStD.amode);
1760         } else {
1761            vex_printf("fstd  ");
1762            ppARMAModeV(i->ARMin.VLdStD.amode);
1763            vex_printf(", ");
1764            ppHRegARM(i->ARMin.VLdStD.dD);
1765         }
1766         return;
1767      case ARMin_VLdStS:
1768         if (i->ARMin.VLdStS.isLoad) {
1769            vex_printf("flds  ");
1770            ppHRegARM(i->ARMin.VLdStS.fD);
1771            vex_printf(", ");
1772            ppARMAModeV(i->ARMin.VLdStS.amode);
1773         } else {
1774            vex_printf("fsts  ");
1775            ppARMAModeV(i->ARMin.VLdStS.amode);
1776            vex_printf(", ");
1777            ppHRegARM(i->ARMin.VLdStS.fD);
1778         }
1779         return;
1780      case ARMin_VAluD:
1781         vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1782         ppHRegARM(i->ARMin.VAluD.dst);
1783         vex_printf(", ");
1784         ppHRegARM(i->ARMin.VAluD.argL);
1785         vex_printf(", ");
1786         ppHRegARM(i->ARMin.VAluD.argR);
1787         return;
1788      case ARMin_VAluS:
1789         vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1790         ppHRegARM(i->ARMin.VAluS.dst);
1791         vex_printf(", ");
1792         ppHRegARM(i->ARMin.VAluS.argL);
1793         vex_printf(", ");
1794         ppHRegARM(i->ARMin.VAluS.argR);
1795         return;
1796      case ARMin_VUnaryD:
1797         vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1798         ppHRegARM(i->ARMin.VUnaryD.dst);
1799         vex_printf(", ");
1800         ppHRegARM(i->ARMin.VUnaryD.src);
1801         return;
1802      case ARMin_VUnaryS:
1803         vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1804         ppHRegARM(i->ARMin.VUnaryS.dst);
1805         vex_printf(", ");
1806         ppHRegARM(i->ARMin.VUnaryS.src);
1807         return;
1808      case ARMin_VCmpD:
1809         vex_printf("fcmpd ");
1810         ppHRegARM(i->ARMin.VCmpD.argL);
1811         vex_printf(", ");
1812         ppHRegARM(i->ARMin.VCmpD.argR);
1813         vex_printf(" ; fmstat");
1814         return;
1815      case ARMin_VCMovD:
1816         vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1817         ppHRegARM(i->ARMin.VCMovD.dst);
1818         vex_printf(", ");
1819         ppHRegARM(i->ARMin.VCMovD.src);
1820         return;
1821      case ARMin_VCMovS:
1822         vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1823         ppHRegARM(i->ARMin.VCMovS.dst);
1824         vex_printf(", ");
1825         ppHRegARM(i->ARMin.VCMovS.src);
1826         return;
1827      case ARMin_VCvtSD:
1828         vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1829         ppHRegARM(i->ARMin.VCvtSD.dst);
1830         vex_printf(", ");
1831         ppHRegARM(i->ARMin.VCvtSD.src);
1832         return;
1833      case ARMin_VXferQ:
1834         if (i->ARMin.VXferQ.toQ) {
1835            vex_printf("vmov ");
1836            ppHRegARM(i->ARMin.VXferQ.qD);
1837            vex_printf("-lo64, ");
1838            ppHRegARM(i->ARMin.VXferQ.dLo);
1839            vex_printf(" ; vmov ");
1840            ppHRegARM(i->ARMin.VXferQ.qD);
1841            vex_printf("-hi64, ");
1842            ppHRegARM(i->ARMin.VXferQ.dHi);
1843         } else {
1844            vex_printf("vmov ");
1845            ppHRegARM(i->ARMin.VXferQ.dLo);
1846            vex_printf(", ");
1847            ppHRegARM(i->ARMin.VXferQ.qD);
1848            vex_printf("-lo64");
1849            vex_printf(" ; vmov ");
1850            ppHRegARM(i->ARMin.VXferQ.dHi);
1851            vex_printf(", ");
1852            ppHRegARM(i->ARMin.VXferQ.qD);
1853            vex_printf("-hi64");
1854         }
1855         return;
1856      case ARMin_VXferD:
1857         vex_printf("vmov  ");
1858         if (i->ARMin.VXferD.toD) {
1859            ppHRegARM(i->ARMin.VXferD.dD);
1860            vex_printf(", ");
1861            ppHRegARM(i->ARMin.VXferD.rLo);
1862            vex_printf(", ");
1863            ppHRegARM(i->ARMin.VXferD.rHi);
1864         } else {
1865            ppHRegARM(i->ARMin.VXferD.rLo);
1866            vex_printf(", ");
1867            ppHRegARM(i->ARMin.VXferD.rHi);
1868            vex_printf(", ");
1869            ppHRegARM(i->ARMin.VXferD.dD);
1870         }
1871         return;
1872      case ARMin_VXferS:
1873         vex_printf("vmov  ");
1874         if (i->ARMin.VXferS.toS) {
1875            ppHRegARM(i->ARMin.VXferS.fD);
1876            vex_printf(", ");
1877            ppHRegARM(i->ARMin.VXferS.rLo);
1878         } else {
1879            ppHRegARM(i->ARMin.VXferS.rLo);
1880            vex_printf(", ");
1881            ppHRegARM(i->ARMin.VXferS.fD);
1882         }
1883         return;
1884      case ARMin_VCvtID: {
1885         const HChar* nm = "?";
1886         if (i->ARMin.VCvtID.iToD) {
1887            nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1888         } else {
1889            nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1890         }
1891         vex_printf("%s ", nm);
1892         ppHRegARM(i->ARMin.VCvtID.dst);
1893         vex_printf(", ");
1894         ppHRegARM(i->ARMin.VCvtID.src);
1895         return;
1896      }
1897      case ARMin_VRIntR: {
1898         const HChar* sz = i->ARMin.VRIntR.isF64 ? "f64" : "f32";
1899         vex_printf("vrintr.%s.%s ", sz, sz);
1900         ppHRegARM(i->ARMin.VRIntR.dst);
1901         vex_printf(", ");
1902         ppHRegARM(i->ARMin.VRIntR.src);
1903         return;
1904      }
1905      case ARMin_VMinMaxNum: {
1906         const HChar* sz = i->ARMin.VMinMaxNum.isF64 ? "f64" : "f32";
1907         const HChar* nm = i->ARMin.VMinMaxNum.isMax ? "vmaxnm" : "vminnm";
1908         vex_printf("%s.%s ", nm, sz);
1909         ppHRegARM(i->ARMin.VMinMaxNum.dst);
1910         vex_printf(", ");
1911         ppHRegARM(i->ARMin.VMinMaxNum.srcL);
1912         vex_printf(", ");
1913         ppHRegARM(i->ARMin.VMinMaxNum.srcR);
1914         return;
1915      }
1916      case ARMin_FPSCR:
1917         if (i->ARMin.FPSCR.toFPSCR) {
1918            vex_printf("fmxr  fpscr, ");
1919            ppHRegARM(i->ARMin.FPSCR.iReg);
1920         } else {
1921            vex_printf("fmrx  ");
1922            ppHRegARM(i->ARMin.FPSCR.iReg);
1923            vex_printf(", fpscr");
1924         }
1925         return;
1926      case ARMin_MFence:
1927         vex_printf("(mfence) dsb sy; dmb sy; isb");
1928         return;
1929      case ARMin_CLREX:
1930         vex_printf("clrex");
1931         return;
1932      case ARMin_NLdStQ:
1933         if (i->ARMin.NLdStQ.isLoad)
1934            vex_printf("vld1.32 {");
1935         else
1936            vex_printf("vst1.32 {");
1937         ppHRegARM(i->ARMin.NLdStQ.dQ);
1938         vex_printf("} ");
1939         ppARMAModeN(i->ARMin.NLdStQ.amode);
1940         return;
1941      case ARMin_NLdStD:
1942         if (i->ARMin.NLdStD.isLoad)
1943            vex_printf("vld1.32 {");
1944         else
1945            vex_printf("vst1.32 {");
1946         ppHRegARM(i->ARMin.NLdStD.dD);
1947         vex_printf("} ");
1948         ppARMAModeN(i->ARMin.NLdStD.amode);
1949         return;
1950      case ARMin_NUnary:
1951         vex_printf("%s%s%s  ",
1952                    showARMNeonUnOp(i->ARMin.NUnary.op),
1953                    showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1954                    showARMNeonDataSize(i));
1955         ppHRegARM(i->ARMin.NUnary.dst);
1956         vex_printf(", ");
1957         ppHRegARM(i->ARMin.NUnary.src);
1958         if (i->ARMin.NUnary.op == ARMneon_EQZ)
1959            vex_printf(", #0");
1960         if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1961             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1962             i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1963             i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1964            vex_printf(", #%u", i->ARMin.NUnary.size);
1965         }
1966         if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1967             i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1968             i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1969            UInt size;
1970            size = i->ARMin.NUnary.size;
1971            if (size & 0x40) {
1972               vex_printf(", #%u", size - 64);
1973            } else if (size & 0x20) {
1974               vex_printf(", #%u", size - 32);
1975            } else if (size & 0x10) {
1976               vex_printf(", #%u", size - 16);
1977            } else if (size & 0x08) {
1978               vex_printf(", #%u", size - 8);
1979            }
1980         }
1981         return;
1982      case ARMin_NUnaryS:
1983         vex_printf("%s%s%s  ",
1984                    showARMNeonUnOpS(i->ARMin.NUnaryS.op),
1985                    showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
1986                    showARMNeonDataSize(i));
1987         ppARMNRS(i->ARMin.NUnaryS.dst);
1988         vex_printf(", ");
1989         ppARMNRS(i->ARMin.NUnaryS.src);
1990         return;
1991      case ARMin_NShift:
1992         vex_printf("%s%s%s  ",
1993                    showARMNeonShiftOp(i->ARMin.NShift.op),
1994                    showARMNeonShiftOpDataType(i->ARMin.NShift.op),
1995                    showARMNeonDataSize(i));
1996         ppHRegARM(i->ARMin.NShift.dst);
1997         vex_printf(", ");
1998         ppHRegARM(i->ARMin.NShift.argL);
1999         vex_printf(", ");
2000         ppHRegARM(i->ARMin.NShift.argR);
2001         return;
2002      case ARMin_NShl64:
2003         vex_printf("vshl.i64 ");
2004         ppHRegARM(i->ARMin.NShl64.dst);
2005         vex_printf(", ");
2006         ppHRegARM(i->ARMin.NShl64.src);
2007         vex_printf(", #%u", i->ARMin.NShl64.amt);
2008         return;
2009      case ARMin_NDual:
2010         vex_printf("%s%s%s  ",
2011                    showARMNeonDualOp(i->ARMin.NDual.op),
2012                    showARMNeonDualOpDataType(i->ARMin.NDual.op),
2013                    showARMNeonDataSize(i));
2014         ppHRegARM(i->ARMin.NDual.arg1);
2015         vex_printf(", ");
2016         ppHRegARM(i->ARMin.NDual.arg2);
2017         return;
2018      case ARMin_NBinary:
2019         vex_printf("%s%s%s",
2020                    showARMNeonBinOp(i->ARMin.NBinary.op),
2021                    showARMNeonBinOpDataType(i->ARMin.NBinary.op),
2022                    showARMNeonDataSize(i));
2023         vex_printf("  ");
2024         ppHRegARM(i->ARMin.NBinary.dst);
2025         vex_printf(", ");
2026         ppHRegARM(i->ARMin.NBinary.argL);
2027         vex_printf(", ");
2028         ppHRegARM(i->ARMin.NBinary.argR);
2029         return;
2030      case ARMin_NeonImm:
2031         vex_printf("vmov  ");
2032         ppHRegARM(i->ARMin.NeonImm.dst);
2033         vex_printf(", ");
2034         ppARMNImm(i->ARMin.NeonImm.imm);
2035         return;
2036      case ARMin_NCMovQ:
2037         vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
2038         ppHRegARM(i->ARMin.NCMovQ.dst);
2039         vex_printf(", ");
2040         ppHRegARM(i->ARMin.NCMovQ.src);
2041         return;
2042      case ARMin_Add32:
2043         vex_printf("add32 ");
2044         ppHRegARM(i->ARMin.Add32.rD);
2045         vex_printf(", ");
2046         ppHRegARM(i->ARMin.Add32.rN);
2047         vex_printf(", ");
2048         vex_printf("%u", i->ARMin.Add32.imm32);
2049         return;
2050      case ARMin_EvCheck:
2051         vex_printf("(evCheck) ldr r12,");
2052         ppARMAMode1(i->ARMin.EvCheck.amCounter);
2053         vex_printf("; subs r12,r12,$1; str r12,");
2054         ppARMAMode1(i->ARMin.EvCheck.amCounter);
2055         vex_printf("; bpl nofail; ldr r12,");
2056         ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
2057         vex_printf("; bx r12; nofail:");
2058         return;
2059      case ARMin_ProfInc:
2060         vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
2061                    "movw r12,HI16($NotKnownYet); "
2062                    "ldr r11,[r12]; "
2063                    "adds r11,r11,$1; "
2064                    "str r11,[r12]; "
2065                    "ldr r11,[r12+4]; "
2066                    "adc r11,r11,$0; "
2067                    "str r11,[r12+4]");
2068         return;
2069      default:
2070         vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
2071         vpanic("ppARMInstr(1)");
2072         return;
2073   }
2074}
2075
2076
2077/* --------- Helpers for register allocation. --------- */
2078
2079void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
2080{
2081   vassert(mode64 == False);
2082   initHRegUsage(u);
2083   switch (i->tag) {
2084      case ARMin_Alu:
2085         addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
2086         addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
2087         addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
2088         return;
2089      case ARMin_Shift:
2090         addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
2091         addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
2092         addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
2093         return;
2094      case ARMin_Unary:
2095         addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
2096         addHRegUse(u, HRmRead, i->ARMin.Unary.src);
2097         return;
2098      case ARMin_CmpOrTst:
2099         addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
2100         addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
2101         return;
2102      case ARMin_Mov:
2103         addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
2104         addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
2105         return;
2106      case ARMin_Imm32:
2107         addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
2108         return;
2109      case ARMin_LdSt32:
2110         addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
2111         if (i->ARMin.LdSt32.isLoad) {
2112            addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
2113            if (i->ARMin.LdSt32.cc != ARMcc_AL)
2114               addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2115         } else {
2116            addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2117         }
2118         return;
2119      case ARMin_LdSt16:
2120         addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
2121         if (i->ARMin.LdSt16.isLoad) {
2122            addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
2123            if (i->ARMin.LdSt16.cc != ARMcc_AL)
2124               addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2125         } else {
2126            addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2127         }
2128         return;
2129      case ARMin_LdSt8U:
2130         addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
2131         if (i->ARMin.LdSt8U.isLoad) {
2132            addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
2133            if (i->ARMin.LdSt8U.cc != ARMcc_AL)
2134               addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2135         } else {
2136            addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2137         }
2138         return;
2139      case ARMin_Ld8S:
2140         addRegUsage_ARMAMode2(u, i->ARMin.Ld8S.amode);
2141         addHRegUse(u, HRmWrite, i->ARMin.Ld8S.rD);
2142         if (i->ARMin.Ld8S.cc != ARMcc_AL)
2143            addHRegUse(u, HRmRead, i->ARMin.Ld8S.rD);
2144         return;
2145      /* XDirect/XIndir/XAssisted are also a bit subtle.  They
2146         conditionally exit the block.  Hence we only need to list (1)
2147         the registers that they read, and (2) the registers that they
2148         write in the case where the block is not exited.  (2) is
2149         empty, hence only (1) is relevant here. */
2150      case ARMin_XDirect:
2151         addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
2152         return;
2153      case ARMin_XIndir:
2154         addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
2155         addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
2156         return;
2157      case ARMin_XAssisted:
2158         addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
2159         addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
2160         return;
2161      case ARMin_CMov:
2162         addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
2163         addHRegUse(u, HRmRead,  i->ARMin.CMov.dst);
2164         addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
2165         return;
2166      case ARMin_Call:
2167         /* logic and comments copied/modified from x86 back end */
2168         /* This is a bit subtle. */
2169         /* First off, claim it trashes all the caller-saved regs
2170            which fall within the register allocator's jurisdiction.
2171            These I believe to be r0,1,2,3.  If it turns out that r9
2172            is also caller-saved, then we'll have to add that here
2173            too. */
2174         addHRegUse(u, HRmWrite, hregARM_R0());
2175         addHRegUse(u, HRmWrite, hregARM_R1());
2176         addHRegUse(u, HRmWrite, hregARM_R2());
2177         addHRegUse(u, HRmWrite, hregARM_R3());
2178         /* Now we have to state any parameter-carrying registers
2179            which might be read.  This depends on nArgRegs. */
2180         switch (i->ARMin.Call.nArgRegs) {
2181            case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
2182            case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
2183            case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
2184            case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
2185            case 0: break;
2186            default: vpanic("getRegUsage_ARM:Call:regparms");
2187         }
2188         /* Finally, there is the issue that the insn trashes a
2189            register because the literal target address has to be
2190            loaded into a register.  Fortunately, for the nArgRegs=
2191            0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
2192            this does not cause any further damage.  For the
2193            nArgRegs=4 case, we'll have to choose another register
2194            arbitrarily since all the caller saved regs are used for
2195            parameters, and so we might as well choose r11.
2196            */
2197         if (i->ARMin.Call.nArgRegs == 4)
2198            addHRegUse(u, HRmWrite, hregARM_R11());
2199         /* Upshot of this is that the assembler really must observe
2200            the here-stated convention of which register to use as an
2201            address temporary, depending on nArgRegs: 0==r0,
2202            1==r1, 2==r2, 3==r3, 4==r11 */
2203         return;
2204      case ARMin_Mul:
2205         addHRegUse(u, HRmRead, hregARM_R2());
2206         addHRegUse(u, HRmRead, hregARM_R3());
2207         addHRegUse(u, HRmWrite, hregARM_R0());
2208         if (i->ARMin.Mul.op != ARMmul_PLAIN)
2209            addHRegUse(u, HRmWrite, hregARM_R1());
2210         return;
2211      case ARMin_LdrEX:
2212         addHRegUse(u, HRmRead, hregARM_R4());
2213         addHRegUse(u, HRmWrite, hregARM_R2());
2214         if (i->ARMin.LdrEX.szB == 8)
2215            addHRegUse(u, HRmWrite, hregARM_R3());
2216         return;
2217      case ARMin_StrEX:
2218         addHRegUse(u, HRmRead, hregARM_R4());
2219         addHRegUse(u, HRmWrite, hregARM_R0());
2220         addHRegUse(u, HRmRead, hregARM_R2());
2221         if (i->ARMin.StrEX.szB == 8)
2222            addHRegUse(u, HRmRead, hregARM_R3());
2223         return;
2224      case ARMin_VLdStD:
2225         addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2226         if (i->ARMin.VLdStD.isLoad) {
2227            addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2228         } else {
2229            addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2230         }
2231         return;
2232      case ARMin_VLdStS:
2233         addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2234         if (i->ARMin.VLdStS.isLoad) {
2235            addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2236         } else {
2237            addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2238         }
2239         return;
2240      case ARMin_VAluD:
2241         addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2242         addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2243         addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2244         return;
2245      case ARMin_VAluS:
2246         addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2247         addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2248         addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2249         return;
2250      case ARMin_VUnaryD:
2251         addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2252         addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2253         return;
2254      case ARMin_VUnaryS:
2255         addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2256         addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2257         return;
2258      case ARMin_VCmpD:
2259         addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2260         addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2261         return;
2262      case ARMin_VCMovD:
2263         addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2264         addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
2265         addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
2266         return;
2267      case ARMin_VCMovS:
2268         addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2269         addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
2270         addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
2271         return;
2272      case ARMin_VCvtSD:
2273         addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2274         addHRegUse(u, HRmRead,  i->ARMin.VCvtSD.src);
2275         return;
2276      case ARMin_VXferQ:
2277         if (i->ARMin.VXferQ.toQ) {
2278            addHRegUse(u, HRmWrite, i->ARMin.VXferQ.qD);
2279            addHRegUse(u, HRmRead,  i->ARMin.VXferQ.dHi);
2280            addHRegUse(u, HRmRead,  i->ARMin.VXferQ.dLo);
2281         } else {
2282            addHRegUse(u, HRmRead,  i->ARMin.VXferQ.qD);
2283            addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dHi);
2284            addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dLo);
2285         }
2286         return;
2287      case ARMin_VXferD:
2288         if (i->ARMin.VXferD.toD) {
2289            addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2290            addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
2291            addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
2292         } else {
2293            addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
2294            addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2295            addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2296         }
2297         return;
2298      case ARMin_VXferS:
2299         if (i->ARMin.VXferS.toS) {
2300            addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2301            addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
2302         } else {
2303            addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
2304            addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2305         }
2306         return;
2307      case ARMin_VCvtID:
2308         addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2309         addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
2310         return;
2311      case ARMin_VRIntR:
2312         addHRegUse(u, HRmWrite, i->ARMin.VRIntR.dst);
2313         addHRegUse(u, HRmRead,  i->ARMin.VRIntR.src);
2314         return;
2315      case ARMin_VMinMaxNum:
2316         addHRegUse(u, HRmWrite, i->ARMin.VMinMaxNum.dst);
2317         addHRegUse(u, HRmRead,  i->ARMin.VMinMaxNum.srcL);
2318         addHRegUse(u, HRmRead,  i->ARMin.VMinMaxNum.srcR);
2319         return;
2320      case ARMin_FPSCR:
2321         if (i->ARMin.FPSCR.toFPSCR)
2322            addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2323         else
2324            addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2325         return;
2326      case ARMin_MFence:
2327         return;
2328      case ARMin_CLREX:
2329         return;
2330      case ARMin_NLdStQ:
2331         if (i->ARMin.NLdStQ.isLoad)
2332            addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2333         else
2334            addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2335         addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2336         return;
2337      case ARMin_NLdStD:
2338         if (i->ARMin.NLdStD.isLoad)
2339            addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2340         else
2341            addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2342         addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2343         return;
2344      case ARMin_NUnary:
2345         addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2346         addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2347         return;
2348      case ARMin_NUnaryS:
2349         addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2350         addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2351         return;
2352      case ARMin_NShift:
2353         addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2354         addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2355         addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2356         return;
2357      case ARMin_NShl64:
2358         addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
2359         addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
2360         return;
2361      case ARMin_NDual:
2362         addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2363         addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2364         addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2365         addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2366         return;
2367      case ARMin_NBinary:
2368         addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2369         /* TODO: sometimes dst is also being read! */
2370         // XXX fix this
2371         addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2372         addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2373         return;
2374      case ARMin_NeonImm:
2375         addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2376         return;
2377      case ARMin_NCMovQ:
2378         addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2379         addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
2380         addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
2381         return;
2382      case ARMin_Add32:
2383         addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2384         addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2385         return;
2386      case ARMin_EvCheck:
2387         /* We expect both amodes only to mention r8, so this is in
2388            fact pointless, since r8 isn't allocatable, but
2389            anyway.. */
2390         addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
2391         addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
2392         addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
2393         return;
2394      case ARMin_ProfInc:
2395         addHRegUse(u, HRmWrite, hregARM_R12());
2396         addHRegUse(u, HRmWrite, hregARM_R11());
2397         return;
2398      default:
2399         ppARMInstr(i);
2400         vpanic("getRegUsage_ARMInstr");
2401   }
2402}
2403
2404
2405void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2406{
2407   vassert(mode64 == False);
2408   switch (i->tag) {
2409      case ARMin_Alu:
2410         i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2411         i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2412         mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2413         return;
2414      case ARMin_Shift:
2415         i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2416         i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2417         mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2418         return;
2419      case ARMin_Unary:
2420         i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2421         i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2422         return;
2423      case ARMin_CmpOrTst:
2424         i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2425         mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2426         return;
2427      case ARMin_Mov:
2428         i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2429         mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2430         return;
2431      case ARMin_Imm32:
2432         i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2433         return;
2434      case ARMin_LdSt32:
2435         i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2436         mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2437         return;
2438      case ARMin_LdSt16:
2439         i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2440         mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2441         return;
2442      case ARMin_LdSt8U:
2443         i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2444         mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2445         return;
2446      case ARMin_Ld8S:
2447         i->ARMin.Ld8S.rD = lookupHRegRemap(m, i->ARMin.Ld8S.rD);
2448         mapRegs_ARMAMode2(m, i->ARMin.Ld8S.amode);
2449         return;
2450      case ARMin_XDirect:
2451         mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
2452         return;
2453      case ARMin_XIndir:
2454         i->ARMin.XIndir.dstGA
2455            = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
2456         mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
2457         return;
2458      case ARMin_XAssisted:
2459         i->ARMin.XAssisted.dstGA
2460            = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
2461         mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
2462         return;
2463      case ARMin_CMov:
2464         i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2465         mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2466         return;
2467      case ARMin_Call:
2468         return;
2469      case ARMin_Mul:
2470         return;
2471      case ARMin_LdrEX:
2472         return;
2473      case ARMin_StrEX:
2474         return;
2475      case ARMin_VLdStD:
2476         i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2477         mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2478         return;
2479      case ARMin_VLdStS:
2480         i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2481         mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2482         return;
2483      case ARMin_VAluD:
2484         i->ARMin.VAluD.dst  = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2485         i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2486         i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2487         return;
2488      case ARMin_VAluS:
2489         i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2490         i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2491         i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2492         return;
2493      case ARMin_VUnaryD:
2494         i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2495         i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2496         return;
2497      case ARMin_VUnaryS:
2498         i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2499         i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2500         return;
2501      case ARMin_VCmpD:
2502         i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2503         i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2504         return;
2505      case ARMin_VCMovD:
2506         i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2507         i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2508         return;
2509      case ARMin_VCMovS:
2510         i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2511         i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2512         return;
2513      case ARMin_VCvtSD:
2514         i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2515         i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2516         return;
2517      case ARMin_VXferQ:
2518         i->ARMin.VXferQ.qD  = lookupHRegRemap(m, i->ARMin.VXferQ.qD);
2519         i->ARMin.VXferQ.dHi = lookupHRegRemap(m, i->ARMin.VXferQ.dHi);
2520         i->ARMin.VXferQ.dLo = lookupHRegRemap(m, i->ARMin.VXferQ.dLo);
2521         return;
2522      case ARMin_VXferD:
2523         i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2524         i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2525         i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2526         return;
2527      case ARMin_VXferS:
2528         i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2529         i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2530         return;
2531      case ARMin_VCvtID:
2532         i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2533         i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2534         return;
2535      case ARMin_VRIntR:
2536         i->ARMin.VRIntR.dst = lookupHRegRemap(m, i->ARMin.VRIntR.dst);
2537         i->ARMin.VRIntR.src = lookupHRegRemap(m, i->ARMin.VRIntR.src);
2538         return;
2539      case ARMin_VMinMaxNum:
2540         i->ARMin.VMinMaxNum.dst
2541            = lookupHRegRemap(m, i->ARMin.VMinMaxNum.dst);
2542         i->ARMin.VMinMaxNum.srcL
2543            = lookupHRegRemap(m, i->ARMin.VMinMaxNum.srcL);
2544         i->ARMin.VMinMaxNum.srcR
2545            = lookupHRegRemap(m, i->ARMin.VMinMaxNum.srcR);
2546         return;
2547      case ARMin_FPSCR:
2548         i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2549         return;
2550      case ARMin_MFence:
2551         return;
2552      case ARMin_CLREX:
2553         return;
2554      case ARMin_NLdStQ:
2555         i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2556         mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2557         return;
2558      case ARMin_NLdStD:
2559         i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2560         mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2561         return;
2562      case ARMin_NUnary:
2563         i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2564         i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2565         return;
2566      case ARMin_NUnaryS:
2567         i->ARMin.NUnaryS.src->reg
2568            = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2569         i->ARMin.NUnaryS.dst->reg
2570            = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2571         return;
2572      case ARMin_NShift:
2573         i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2574         i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2575         i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2576         return;
2577      case ARMin_NShl64:
2578         i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
2579         i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
2580         return;
2581      case ARMin_NDual:
2582         i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2583         i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2584         return;
2585      case ARMin_NBinary:
2586         i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2587         i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2588         i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2589         return;
2590      case ARMin_NeonImm:
2591         i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2592         return;
2593      case ARMin_NCMovQ:
2594         i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2595         i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2596         return;
2597      case ARMin_Add32:
2598         i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2599         i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2600         return;
2601      case ARMin_EvCheck:
2602         /* We expect both amodes only to mention r8, so this is in
2603            fact pointless, since r8 isn't allocatable, but
2604            anyway.. */
2605         mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
2606         mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
2607         return;
2608      case ARMin_ProfInc:
2609         /* hardwires r11 and r12 -- nothing to modify. */
2610         return;
2611      default:
2612         ppARMInstr(i);
2613         vpanic("mapRegs_ARMInstr");
2614   }
2615}
2616
2617/* Figure out if i represents a reg-reg move, and if so assign the
2618   source and destination to *src and *dst.  If in doubt say No.  Used
2619   by the register allocator to do move coalescing.
2620*/
2621Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst )
2622{
2623   /* Moves between integer regs */
2624   switch (i->tag) {
2625      case ARMin_Mov:
2626         if (i->ARMin.Mov.src->tag == ARMri84_R) {
2627            *src = i->ARMin.Mov.src->ARMri84.R.reg;
2628            *dst = i->ARMin.Mov.dst;
2629            return True;
2630         }
2631         break;
2632      case ARMin_VUnaryD:
2633         if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2634            *src = i->ARMin.VUnaryD.src;
2635            *dst = i->ARMin.VUnaryD.dst;
2636            return True;
2637         }
2638         break;
2639      case ARMin_VUnaryS:
2640         if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2641            *src = i->ARMin.VUnaryS.src;
2642            *dst = i->ARMin.VUnaryS.dst;
2643            return True;
2644         }
2645         break;
2646      case ARMin_NUnary:
2647         if (i->ARMin.NUnary.op == ARMneon_COPY) {
2648            *src = i->ARMin.NUnary.src;
2649            *dst = i->ARMin.NUnary.dst;
2650            return True;
2651         }
2652         break;
2653      default:
2654         break;
2655   }
2656
2657   return False;
2658}
2659
2660
2661/* Generate arm spill/reload instructions under the direction of the
2662   register allocator.  Note it's critical these don't write the
2663   condition codes. */
2664
2665void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2666                    HReg rreg, Int offsetB, Bool mode64 )
2667{
2668   HRegClass rclass;
2669   vassert(offsetB >= 0);
2670   vassert(!hregIsVirtual(rreg));
2671   vassert(mode64 == False);
2672   *i1 = *i2 = NULL;
2673   rclass = hregClass(rreg);
2674   switch (rclass) {
2675      case HRcInt32:
2676         vassert(offsetB <= 4095);
2677         *i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
2678                                rreg,
2679                                ARMAMode1_RI(hregARM_R8(), offsetB) );
2680         return;
2681      case HRcFlt32:
2682      case HRcFlt64: {
2683         HReg r8   = hregARM_R8();  /* baseblock */
2684         HReg r12  = hregARM_R12(); /* spill temp */
2685         HReg base = r8;
2686         vassert(0 == (offsetB & 3));
2687         if (offsetB >= 1024) {
2688            Int offsetKB = offsetB / 1024;
2689            /* r12 = r8 + (1024 * offsetKB) */
2690            *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2691                               ARMRI84_I84(offsetKB, 11));
2692            offsetB -= (1024 * offsetKB);
2693            base = r12;
2694         }
2695         vassert(offsetB <= 1020);
2696         if (rclass == HRcFlt32) {
2697            *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2698                                   rreg,
2699                                   mkARMAModeV(base, offsetB) );
2700         } else {
2701            *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2702                                   rreg,
2703                                   mkARMAModeV(base, offsetB) );
2704         }
2705         return;
2706      }
2707      case HRcVec128: {
2708         HReg r8  = hregARM_R8();
2709         HReg r12 = hregARM_R12();
2710         *i1 = ARMInstr_Add32(r12, r8, offsetB);
2711         *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2712         return;
2713      }
2714      default:
2715         ppHRegClass(rclass);
2716         vpanic("genSpill_ARM: unimplemented regclass");
2717   }
2718}
2719
2720void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2721                     HReg rreg, Int offsetB, Bool mode64 )
2722{
2723   HRegClass rclass;
2724   vassert(offsetB >= 0);
2725   vassert(!hregIsVirtual(rreg));
2726   vassert(mode64 == False);
2727   *i1 = *i2 = NULL;
2728   rclass = hregClass(rreg);
2729   switch (rclass) {
2730      case HRcInt32:
2731         vassert(offsetB <= 4095);
2732         *i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
2733                                rreg,
2734                                ARMAMode1_RI(hregARM_R8(), offsetB) );
2735         return;
2736      case HRcFlt32:
2737      case HRcFlt64: {
2738         HReg r8   = hregARM_R8();  /* baseblock */
2739         HReg r12  = hregARM_R12(); /* spill temp */
2740         HReg base = r8;
2741         vassert(0 == (offsetB & 3));
2742         if (offsetB >= 1024) {
2743            Int offsetKB = offsetB / 1024;
2744            /* r12 = r8 + (1024 * offsetKB) */
2745            *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2746                               ARMRI84_I84(offsetKB, 11));
2747            offsetB -= (1024 * offsetKB);
2748            base = r12;
2749         }
2750         vassert(offsetB <= 1020);
2751         if (rclass == HRcFlt32) {
2752            *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2753                                   rreg,
2754                                   mkARMAModeV(base, offsetB) );
2755         } else {
2756            *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2757                                   rreg,
2758                                   mkARMAModeV(base, offsetB) );
2759         }
2760         return;
2761      }
2762      case HRcVec128: {
2763         HReg r8  = hregARM_R8();
2764         HReg r12 = hregARM_R12();
2765         *i1 = ARMInstr_Add32(r12, r8, offsetB);
2766         *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2767         return;
2768      }
2769      default:
2770         ppHRegClass(rclass);
2771         vpanic("genReload_ARM: unimplemented regclass");
2772   }
2773}
2774
2775
2776/* Emit an instruction into buf and return the number of bytes used.
2777   Note that buf is not the insn's final place, and therefore it is
2778   imperative to emit position-independent code. */
2779
2780static inline UInt iregEnc ( HReg r )
2781{
2782   UInt n;
2783   vassert(hregClass(r) == HRcInt32);
2784   vassert(!hregIsVirtual(r));
2785   n = hregEncoding(r);
2786   vassert(n <= 15);
2787   return n;
2788}
2789
2790static inline UInt dregEnc ( HReg r )
2791{
2792   UInt n;
2793   vassert(hregClass(r) == HRcFlt64);
2794   vassert(!hregIsVirtual(r));
2795   n = hregEncoding(r);
2796   vassert(n <= 31);
2797   return n;
2798}
2799
2800static inline UInt fregEnc ( HReg r )
2801{
2802   UInt n;
2803   vassert(hregClass(r) == HRcFlt32);
2804   vassert(!hregIsVirtual(r));
2805   n = hregEncoding(r);
2806   vassert(n <= 31);
2807   return n;
2808}
2809
2810static inline UInt qregEnc ( HReg r )
2811{
2812   UInt n;
2813   vassert(hregClass(r) == HRcVec128);
2814   vassert(!hregIsVirtual(r));
2815   n = hregEncoding(r);
2816   vassert(n <= 15);
2817   return n;
2818}
2819
2820#define BITS4(zzb3,zzb2,zzb1,zzb0) \
2821   (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2822#define X0000  BITS4(0,0,0,0)
2823#define X0001  BITS4(0,0,0,1)
2824#define X0010  BITS4(0,0,1,0)
2825#define X0011  BITS4(0,0,1,1)
2826#define X0100  BITS4(0,1,0,0)
2827#define X0101  BITS4(0,1,0,1)
2828#define X0110  BITS4(0,1,1,0)
2829#define X0111  BITS4(0,1,1,1)
2830#define X1000  BITS4(1,0,0,0)
2831#define X1001  BITS4(1,0,0,1)
2832#define X1010  BITS4(1,0,1,0)
2833#define X1011  BITS4(1,0,1,1)
2834#define X1100  BITS4(1,1,0,0)
2835#define X1101  BITS4(1,1,0,1)
2836#define X1110  BITS4(1,1,1,0)
2837#define X1111  BITS4(1,1,1,1)
2838
2839#define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2840   (((((UInt)(zzx7)) & 0xF) << 28) | \
2841    (((zzx6) & 0xF) << 24) |  \
2842    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2843    (((zzx3) & 0xF) << 12))
2844
2845#define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
2846   (((((UInt)(zzx7)) & 0xF) << 28) | \
2847    (((zzx6) & 0xF) << 24) |  \
2848    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2849    (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
2850
2851#define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
2852   (((((UInt)(zzx7)) & 0xF) << 28) | \
2853    (((zzx6) & 0xF) << 24) |  \
2854    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2855    (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
2856
2857#define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2858  (((((UInt)(zzx7)) & 0xF) << 28) | \
2859   (((zzx6) & 0xF) << 24) | \
2860   (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2861   (((zzx0) & 0xF) << 0))
2862
2863#define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
2864   (((((UInt)(zzx7)) & 0xF) << 28) | \
2865    (((zzx6) & 0xF) << 24) |  \
2866    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2867    (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
2868    (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
2869
2870#define XX______(zzx7,zzx6) \
2871   (((((UInt)(zzx7)) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2872
2873/* Generate a skeletal insn that involves an a RI84 shifter operand.
2874   Returns a word which is all zeroes apart from bits 25 and 11..0,
2875   since it is those that encode the shifter operand (at least to the
2876   extent that we care about it.) */
2877static UInt skeletal_RI84 ( ARMRI84* ri )
2878{
2879   UInt instr;
2880   if (ri->tag == ARMri84_I84) {
2881      vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2882      vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2883      instr = 1 << 25;
2884      instr |= (ri->ARMri84.I84.imm4 << 8);
2885      instr |= ri->ARMri84.I84.imm8;
2886   } else {
2887      instr = 0 << 25;
2888      instr |= iregEnc(ri->ARMri84.R.reg);
2889   }
2890   return instr;
2891}
2892
2893/* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
2894   11..7. */
2895static UInt skeletal_RI5 ( ARMRI5* ri )
2896{
2897   UInt instr;
2898   if (ri->tag == ARMri5_I5) {
2899      UInt imm5 = ri->ARMri5.I5.imm5;
2900      vassert(imm5 >= 1 && imm5 <= 31);
2901      instr = 0 << 4;
2902      instr |= imm5 << 7;
2903   } else {
2904      instr = 1 << 4;
2905      instr |= iregEnc(ri->ARMri5.R.reg) << 8;
2906   }
2907   return instr;
2908}
2909
2910
2911/* Get an immediate into a register, using only that
2912   register.  (very lame..) */
2913static UInt* imm32_to_ireg ( UInt* p, Int rD, UInt imm32 )
2914{
2915   UInt instr;
2916   vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2917#if 0
2918   if (0 == (imm32 & ~0xFF)) {
2919      /* mov with a immediate shifter operand of (0, imm32) (??) */
2920      instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2921      instr |= imm32;
2922      *p++ = instr;
2923   } else {
2924      // this is very bad; causes Dcache pollution
2925      // ldr  rD, [pc]
2926      instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2927      *p++ = instr;
2928      // b .+8
2929      instr = 0xEA000000;
2930      *p++ = instr;
2931      // .word imm32
2932      *p++ = imm32;
2933   }
2934#else
2935   if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2936      /* Generate movw rD, #low16.  Then, if the high 16 are
2937         nonzero, generate movt rD, #high16. */
2938      UInt lo16 = imm32 & 0xFFFF;
2939      UInt hi16 = (imm32 >> 16) & 0xFFFF;
2940      instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2941                       (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2942                       lo16 & 0xF);
2943      *p++ = instr;
2944      if (hi16 != 0) {
2945         instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2946                          (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2947                          hi16 & 0xF);
2948         *p++ = instr;
2949      }
2950   } else {
2951      UInt imm, rot;
2952      UInt op = X1010;
2953      UInt rN = 0;
2954      if ((imm32 & 0xFF) || (imm32 == 0)) {
2955         imm = imm32 & 0xFF;
2956         rot = 0;
2957         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2958         *p++ = instr;
2959         op = X1000;
2960         rN = rD;
2961      }
2962      if (imm32 & 0xFF000000) {
2963         imm = (imm32 >> 24) & 0xFF;
2964         rot = 4;
2965         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2966         *p++ = instr;
2967         op = X1000;
2968         rN = rD;
2969      }
2970      if (imm32 & 0xFF0000) {
2971         imm = (imm32 >> 16) & 0xFF;
2972         rot = 8;
2973         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2974         *p++ = instr;
2975         op = X1000;
2976         rN = rD;
2977      }
2978      if (imm32 & 0xFF00) {
2979         imm = (imm32 >> 8) & 0xFF;
2980         rot = 12;
2981         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2982         *p++ = instr;
2983         op = X1000;
2984         rN = rD;
2985      }
2986   }
2987#endif
2988   return p;
2989}
2990
2991/* Get an immediate into a register, using only that register, and
2992   generating exactly 2 instructions, regardless of the value of the
2993   immediate. This is used when generating sections of code that need
2994   to be patched later, so as to guarantee a specific size. */
2995static UInt* imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2996{
2997   if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2998      /* Generate movw rD, #low16 ;  movt rD, #high16. */
2999      UInt lo16 = imm32 & 0xFFFF;
3000      UInt hi16 = (imm32 >> 16) & 0xFFFF;
3001      UInt instr;
3002      instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
3003                       (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
3004                       lo16 & 0xF);
3005      *p++ = instr;
3006      instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
3007                       (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
3008                       hi16 & 0xF);
3009      *p++ = instr;
3010   } else {
3011      vassert(0); /* lose */
3012   }
3013   return p;
3014}
3015
3016/* Check whether p points at a 2-insn sequence cooked up by
3017   imm32_to_ireg_EXACTLY2(). */
3018static Bool is_imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
3019{
3020   if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
3021      /* Generate movw rD, #low16 ;  movt rD, #high16. */
3022      UInt lo16 = imm32 & 0xFFFF;
3023      UInt hi16 = (imm32 >> 16) & 0xFFFF;
3024      UInt i0, i1;
3025      i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
3026                    (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
3027                    lo16 & 0xF);
3028      i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
3029                    (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
3030                    hi16 & 0xF);
3031      return p[0] == i0 && p[1] == i1;
3032   } else {
3033      vassert(0); /* lose */
3034   }
3035}
3036
3037
3038static UInt* do_load_or_store32 ( UInt* p,
3039                                  Bool isLoad, UInt rD, ARMAMode1* am )
3040{
3041   vassert(rD <= 12);
3042   vassert(am->tag == ARMam1_RI); // RR case is not handled
3043   UInt bB = 0;
3044   UInt bL = isLoad ? 1 : 0;
3045   Int  simm12;
3046   UInt instr, bP;
3047   if (am->ARMam1.RI.simm13 < 0) {
3048      bP = 0;
3049      simm12 = -am->ARMam1.RI.simm13;
3050   } else {
3051      bP = 1;
3052      simm12 = am->ARMam1.RI.simm13;
3053   }
3054   vassert(simm12 >= 0 && simm12 <= 4095);
3055   instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
3056                    iregEnc(am->ARMam1.RI.reg),
3057                    rD);
3058   instr |= simm12;
3059   *p++ = instr;
3060   return p;
3061}
3062
3063
3064/* Emit an instruction into buf and return the number of bytes used.
3065   Note that buf is not the insn's final place, and therefore it is
3066   imperative to emit position-independent code.  If the emitted
3067   instruction was a profiler inc, set *is_profInc to True, else
3068   leave it unchanged. */
3069
3070Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
3071                    UChar* buf, Int nbuf, const ARMInstr* i,
3072                    Bool mode64, VexEndness endness_host,
3073                    const void* disp_cp_chain_me_to_slowEP,
3074                    const void* disp_cp_chain_me_to_fastEP,
3075                    const void* disp_cp_xindir,
3076                    const void* disp_cp_xassisted )
3077{
3078   UInt* p = (UInt*)buf;
3079   vassert(nbuf >= 32);
3080   vassert(mode64 == False);
3081   vassert(0 == (((HWord)buf) & 3));
3082
3083   switch (i->tag) {
3084      case ARMin_Alu: {
3085         UInt     instr, subopc;
3086         UInt     rD   = iregEnc(i->ARMin.Alu.dst);
3087         UInt     rN   = iregEnc(i->ARMin.Alu.argL);
3088         ARMRI84* argR = i->ARMin.Alu.argR;
3089         switch (i->ARMin.Alu.op) {
3090            case ARMalu_ADDS: /* fallthru */
3091            case ARMalu_ADD:  subopc = X0100; break;
3092            case ARMalu_ADC:  subopc = X0101; break;
3093            case ARMalu_SUBS: /* fallthru */
3094            case ARMalu_SUB:  subopc = X0010; break;
3095            case ARMalu_SBC:  subopc = X0110; break;
3096            case ARMalu_AND:  subopc = X0000; break;
3097            case ARMalu_BIC:  subopc = X1110; break;
3098            case ARMalu_OR:   subopc = X1100; break;
3099            case ARMalu_XOR:  subopc = X0001; break;
3100            default: goto bad;
3101         }
3102         instr = skeletal_RI84(argR);
3103         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3104                           (subopc << 1) & 0xF, rN, rD);
3105         if (i->ARMin.Alu.op == ARMalu_ADDS
3106             || i->ARMin.Alu.op == ARMalu_SUBS) {
3107            instr |= 1<<20;  /* set the S bit */
3108         }
3109         *p++ = instr;
3110         goto done;
3111      }
3112      case ARMin_Shift: {
3113         UInt    instr, subopc;
3114         UInt    rD   = iregEnc(i->ARMin.Shift.dst);
3115         UInt    rM   = iregEnc(i->ARMin.Shift.argL);
3116         ARMRI5* argR = i->ARMin.Shift.argR;
3117         switch (i->ARMin.Shift.op) {
3118            case ARMsh_SHL: subopc = X0000; break;
3119            case ARMsh_SHR: subopc = X0001; break;
3120            case ARMsh_SAR: subopc = X0010; break;
3121            default: goto bad;
3122         }
3123         instr = skeletal_RI5(argR);
3124         instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
3125         instr |= (subopc & 3) << 5;
3126         *p++ = instr;
3127         goto done;
3128      }
3129      case ARMin_Unary: {
3130         UInt instr;
3131         UInt rDst = iregEnc(i->ARMin.Unary.dst);
3132         UInt rSrc = iregEnc(i->ARMin.Unary.src);
3133         switch (i->ARMin.Unary.op) {
3134            case ARMun_CLZ:
3135               instr = XXXXXXXX(X1110,X0001,X0110,X1111,
3136                                rDst,X1111,X0001,rSrc);
3137               *p++ = instr;
3138               goto done;
3139            case ARMun_NEG: /* RSB rD,rS,#0 */
3140               instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
3141               *p++ = instr;
3142               goto done;
3143            case ARMun_NOT: {
3144               UInt subopc = X1111; /* MVN */
3145               instr = rSrc;
3146               instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3147                                 (subopc << 1) & 0xF, 0, rDst);
3148               *p++ = instr;
3149               goto done;
3150            }
3151            default:
3152               break;
3153         }
3154         goto bad;
3155      }
3156      case ARMin_CmpOrTst: {
3157         UInt instr  = skeletal_RI84(i->ARMin.CmpOrTst.argR);
3158         UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
3159         UInt SBZ    = 0;
3160         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3161                           ((subopc << 1) & 0xF) | 1,
3162                           iregEnc(i->ARMin.CmpOrTst.argL), SBZ );
3163         *p++ = instr;
3164         goto done;
3165      }
3166      case ARMin_Mov: {
3167         UInt instr  = skeletal_RI84(i->ARMin.Mov.src);
3168         UInt subopc = X1101; /* MOV */
3169         UInt SBZ    = 0;
3170         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3171                           (subopc << 1) & 0xF, SBZ,
3172                           iregEnc(i->ARMin.Mov.dst));
3173         *p++ = instr;
3174         goto done;
3175      }
3176      case ARMin_Imm32: {
3177         p = imm32_to_ireg( (UInt*)p, iregEnc(i->ARMin.Imm32.dst),
3178                                      i->ARMin.Imm32.imm32 );
3179         goto done;
3180      }
3181      case ARMin_LdSt32:
3182      case ARMin_LdSt8U: {
3183         UInt        bL, bB;
3184         HReg        rD;
3185         ARMAMode1*  am;
3186         ARMCondCode cc;
3187         if (i->tag == ARMin_LdSt32) {
3188            bB = 0;
3189            bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
3190            am = i->ARMin.LdSt32.amode;
3191            rD = i->ARMin.LdSt32.rD;
3192            cc = i->ARMin.LdSt32.cc;
3193         } else {
3194            bB = 1;
3195            bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
3196            am = i->ARMin.LdSt8U.amode;
3197            rD = i->ARMin.LdSt8U.rD;
3198            cc = i->ARMin.LdSt8U.cc;
3199         }
3200         vassert(cc != ARMcc_NV);
3201         if (am->tag == ARMam1_RI) {
3202            Int  simm12;
3203            UInt instr, bP;
3204            if (am->ARMam1.RI.simm13 < 0) {
3205               bP = 0;
3206               simm12 = -am->ARMam1.RI.simm13;
3207            } else {
3208               bP = 1;
3209               simm12 = am->ARMam1.RI.simm13;
3210            }
3211            vassert(simm12 >= 0 && simm12 <= 4095);
3212            instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
3213                             iregEnc(am->ARMam1.RI.reg),
3214                             iregEnc(rD));
3215            instr |= simm12;
3216            *p++ = instr;
3217            goto done;
3218         } else {
3219            // RR case
3220            goto bad;
3221         }
3222      }
3223      case ARMin_LdSt16: {
3224         HReg        rD = i->ARMin.LdSt16.rD;
3225         UInt        bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
3226         UInt        bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
3227         ARMAMode2*  am = i->ARMin.LdSt16.amode;
3228         ARMCondCode cc = i->ARMin.LdSt16.cc;
3229         vassert(cc != ARMcc_NV);
3230         if (am->tag == ARMam2_RI) {
3231            HReg rN = am->ARMam2.RI.reg;
3232            Int  simm8;
3233            UInt bP, imm8hi, imm8lo, instr;
3234            if (am->ARMam2.RI.simm9 < 0) {
3235               bP = 0;
3236               simm8 = -am->ARMam2.RI.simm9;
3237            } else {
3238               bP = 1;
3239               simm8 = am->ARMam2.RI.simm9;
3240            }
3241            vassert(simm8 >= 0 && simm8 <= 255);
3242            imm8hi = (simm8 >> 4) & 0xF;
3243            imm8lo = simm8 & 0xF;
3244            vassert(!(bL == 0 && bS == 1)); // "! signed store"
3245            /**/ if (bL == 0 && bS == 0) {
3246               // strh
3247               instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregEnc(rN),
3248                                iregEnc(rD), imm8hi, X1011, imm8lo);
3249               *p++ = instr;
3250               goto done;
3251            }
3252            else if (bL == 1 && bS == 0) {
3253               // ldrh
3254               instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3255                                iregEnc(rD), imm8hi, X1011, imm8lo);
3256               *p++ = instr;
3257               goto done;
3258            }
3259            else if (bL == 1 && bS == 1) {
3260               // ldrsh
3261               instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3262                                iregEnc(rD), imm8hi, X1111, imm8lo);
3263               *p++ = instr;
3264               goto done;
3265            }
3266            else vassert(0); // ill-constructed insn
3267         } else {
3268            // RR case
3269            goto bad;
3270         }
3271      }
3272      case ARMin_Ld8S: {
3273         HReg        rD = i->ARMin.Ld8S.rD;
3274         ARMAMode2*  am = i->ARMin.Ld8S.amode;
3275         ARMCondCode cc = i->ARMin.Ld8S.cc;
3276         vassert(cc != ARMcc_NV);
3277         if (am->tag == ARMam2_RI) {
3278            HReg rN = am->ARMam2.RI.reg;
3279            Int  simm8;
3280            UInt bP, imm8hi, imm8lo, instr;
3281            if (am->ARMam2.RI.simm9 < 0) {
3282               bP = 0;
3283               simm8 = -am->ARMam2.RI.simm9;
3284            } else {
3285               bP = 1;
3286               simm8 = am->ARMam2.RI.simm9;
3287            }
3288            vassert(simm8 >= 0 && simm8 <= 255);
3289            imm8hi = (simm8 >> 4) & 0xF;
3290            imm8lo = simm8 & 0xF;
3291            // ldrsb
3292            instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3293                             iregEnc(rD), imm8hi, X1101, imm8lo);
3294            *p++ = instr;
3295            goto done;
3296         } else {
3297            // RR case
3298            goto bad;
3299         }
3300      }
3301
3302      case ARMin_XDirect: {
3303         /* NB: what goes on here has to be very closely coordinated
3304            with the chainXDirect_ARM and unchainXDirect_ARM below. */
3305         /* We're generating chain-me requests here, so we need to be
3306            sure this is actually allowed -- no-redir translations
3307            can't use chain-me's.  Hence: */
3308         vassert(disp_cp_chain_me_to_slowEP != NULL);
3309         vassert(disp_cp_chain_me_to_fastEP != NULL);
3310
3311         /* Use ptmp for backpatching conditional jumps. */
3312         UInt* ptmp = NULL;
3313
3314         /* First off, if this is conditional, create a conditional
3315            jump over the rest of it.  Or at least, leave a space for
3316            it that we will shortly fill in. */
3317         if (i->ARMin.XDirect.cond != ARMcc_AL) {
3318            vassert(i->ARMin.XDirect.cond != ARMcc_NV);
3319            ptmp = p;
3320            *p++ = 0;
3321         }
3322
3323         /* Update the guest R15T. */
3324         /* movw r12, lo16(dstGA) */
3325         /* movt r12, hi16(dstGA) */
3326         /* str r12, amR15T */
3327         p = imm32_to_ireg(p, /*r*/12, i->ARMin.XDirect.dstGA);
3328         p = do_load_or_store32(p, False/*!isLoad*/,
3329                                /*r*/12, i->ARMin.XDirect.amR15T);
3330
3331         /* --- FIRST PATCHABLE BYTE follows --- */
3332         /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3333            calling to) backs up the return address, so as to find the
3334            address of the first patchable byte.  So: don't change the
3335            number of instructions (3) below. */
3336         /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3337         /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3338         /* blx  r12  (A1) */
3339         const void* disp_cp_chain_me
3340                  = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3341                                              : disp_cp_chain_me_to_slowEP;
3342         p = imm32_to_ireg_EXACTLY2(p, /*r*/12,
3343                                    (UInt)(Addr)disp_cp_chain_me);
3344         *p++ = 0xE12FFF3C;
3345         /* --- END of PATCHABLE BYTES --- */
3346
3347         /* Fix up the conditional jump, if there was one. */
3348         if (i->ARMin.XDirect.cond != ARMcc_AL) {
3349            Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3350            vassert(delta > 0 && delta < 40);
3351            vassert((delta & 3) == 0);
3352            UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
3353            vassert(notCond <= 13); /* Neither AL nor NV */
3354            delta = (delta >> 2) - 2;
3355            *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3356         }
3357         goto done;
3358      }
3359
3360      case ARMin_XIndir: {
3361         /* We're generating transfers that could lead indirectly to a
3362            chain-me, so we need to be sure this is actually allowed
3363            -- no-redir translations are not allowed to reach normal
3364            translations without going through the scheduler.  That
3365            means no XDirects or XIndirs out from no-redir
3366            translations.  Hence: */
3367         vassert(disp_cp_xindir != NULL);
3368
3369         /* Use ptmp for backpatching conditional jumps. */
3370         UInt* ptmp = NULL;
3371
3372         /* First off, if this is conditional, create a conditional
3373            jump over the rest of it.  Or at least, leave a space for
3374            it that we will shortly fill in. */
3375         if (i->ARMin.XIndir.cond != ARMcc_AL) {
3376            vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3377            ptmp = p;
3378            *p++ = 0;
3379         }
3380
3381         /* Update the guest R15T. */
3382         /* str r-dstGA, amR15T */
3383         p = do_load_or_store32(p, False/*!isLoad*/,
3384                                iregEnc(i->ARMin.XIndir.dstGA),
3385                                i->ARMin.XIndir.amR15T);
3386
3387         /* movw r12, lo16(VG_(disp_cp_xindir)) */
3388         /* movt r12, hi16(VG_(disp_cp_xindir)) */
3389         /* bx   r12  (A1) */
3390         p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xindir);
3391         *p++ = 0xE12FFF1C;
3392
3393         /* Fix up the conditional jump, if there was one. */
3394         if (i->ARMin.XIndir.cond != ARMcc_AL) {
3395            Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3396            vassert(delta > 0 && delta < 40);
3397            vassert((delta & 3) == 0);
3398            UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3399            vassert(notCond <= 13); /* Neither AL nor NV */
3400            delta = (delta >> 2) - 2;
3401            *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3402         }
3403         goto done;
3404      }
3405
3406      case ARMin_XAssisted: {
3407         /* Use ptmp for backpatching conditional jumps. */
3408         UInt* ptmp = NULL;
3409
3410         /* First off, if this is conditional, create a conditional
3411            jump over the rest of it.  Or at least, leave a space for
3412            it that we will shortly fill in. */
3413         if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3414            vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
3415            ptmp = p;
3416            *p++ = 0;
3417         }
3418
3419         /* Update the guest R15T. */
3420         /* str r-dstGA, amR15T */
3421         p = do_load_or_store32(p, False/*!isLoad*/,
3422                                iregEnc(i->ARMin.XAssisted.dstGA),
3423                                i->ARMin.XAssisted.amR15T);
3424
3425         /* movw r8,  $magic_number */
3426         UInt trcval = 0;
3427         switch (i->ARMin.XAssisted.jk) {
3428            case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
3429            case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3430            //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
3431            case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
3432            //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
3433            //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
3434            case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
3435            case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3436            case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
3437            //case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
3438            //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
3439            case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
3440            /* We don't expect to see the following being assisted. */
3441            //case Ijk_Ret:
3442            //case Ijk_Call:
3443            /* fallthrough */
3444            default:
3445               ppIRJumpKind(i->ARMin.XAssisted.jk);
3446               vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
3447         }
3448         vassert(trcval != 0);
3449         p = imm32_to_ireg(p, /*r*/8, trcval);
3450
3451         /* movw r12, lo16(VG_(disp_cp_xassisted)) */
3452         /* movt r12, hi16(VG_(disp_cp_xassisted)) */
3453         /* bx   r12  (A1) */
3454         p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xassisted);
3455         *p++ = 0xE12FFF1C;
3456
3457         /* Fix up the conditional jump, if there was one. */
3458         if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3459            Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3460            vassert(delta > 0 && delta < 40);
3461            vassert((delta & 3) == 0);
3462            UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
3463            vassert(notCond <= 13); /* Neither AL nor NV */
3464            delta = (delta >> 2) - 2;
3465            *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3466         }
3467         goto done;
3468      }
3469
3470      case ARMin_CMov: {
3471         UInt instr  = skeletal_RI84(i->ARMin.CMov.src);
3472         UInt subopc = X1101; /* MOV */
3473         UInt SBZ    = 0;
3474         instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
3475                           (subopc << 1) & 0xF, SBZ,
3476                           iregEnc(i->ARMin.CMov.dst));
3477         *p++ = instr;
3478         goto done;
3479      }
3480
3481      case ARMin_Call: {
3482         UInt instr;
3483         /* Decide on a scratch reg used to hold to the call address.
3484            This has to be done as per the comments in getRegUsage. */
3485         Int scratchNo;
3486         switch (i->ARMin.Call.nArgRegs) {
3487            case 0:  scratchNo = 0;  break;
3488            case 1:  scratchNo = 1;  break;
3489            case 2:  scratchNo = 2;  break;
3490            case 3:  scratchNo = 3;  break;
3491            case 4:  scratchNo = 11; break;
3492            default: vassert(0);
3493         }
3494         /* If we don't need to do any fixup actions in the case that
3495            the call doesn't happen, just do the simple thing and emit
3496            straight-line code.  We hope this is the common case. */
3497         if (i->ARMin.Call.cond == ARMcc_AL/*call always happens*/
3498             || i->ARMin.Call.rloc.pri == RLPri_None/*no fixup action*/) {
3499            // r"scratchNo" = &target
3500            p = imm32_to_ireg( (UInt*)p,
3501                               scratchNo, (UInt)i->ARMin.Call.target );
3502            // blx{cond} r"scratchNo"
3503            instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
3504                             X0011, scratchNo);
3505            instr |= 0xFFF << 8; // stick in the SBOnes
3506            *p++ = instr;
3507         } else {
3508            Int delta;
3509            /* Complex case.  We have to generate an if-then-else
3510               diamond. */
3511            // before:
3512            //   b{!cond} else:
3513            //   r"scratchNo" = &target
3514            //   blx{AL} r"scratchNo"
3515            // preElse:
3516            //   b after:
3517            // else:
3518            //   mov r0, #0x55555555  // possibly
3519            //   mov r1, r0           // possibly
3520            // after:
3521
3522            // before:
3523            UInt* pBefore = p;
3524
3525            //   b{!cond} else:  // ptmp1 points here
3526            *p++ = 0; // filled in later
3527
3528            //   r"scratchNo" = &target
3529            p = imm32_to_ireg( (UInt*)p,
3530                               scratchNo, (UInt)i->ARMin.Call.target );
3531
3532            //   blx{AL} r"scratchNo"
3533            instr = XXX___XX(ARMcc_AL, X0001, X0010, /*___*/
3534                             X0011, scratchNo);
3535            instr |= 0xFFF << 8; // stick in the SBOnes
3536            *p++ = instr;
3537
3538            // preElse:
3539            UInt* pPreElse = p;
3540
3541            //   b after:
3542            *p++ = 0; // filled in later
3543
3544            // else:
3545            delta = (UChar*)p - (UChar*)pBefore;
3546            delta = (delta >> 2) - 2;
3547            *pBefore
3548               = XX______(1 ^ i->ARMin.Call.cond, X1010) | (delta & 0xFFFFFF);
3549
3550            /* Do the 'else' actions */
3551            switch (i->ARMin.Call.rloc.pri) {
3552               case RLPri_Int:
3553                  p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3554                  break;
3555               case RLPri_2Int:
3556                  vassert(0); //ATC
3557                  p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3558                  /* mov r1, r0 */
3559                  *p++ = 0xE1A01000;
3560                  break;
3561               case RLPri_None: case RLPri_INVALID: default:
3562                  vassert(0);
3563            }
3564
3565            // after:
3566            delta = (UChar*)p - (UChar*)pPreElse;
3567            delta = (delta >> 2) - 2;
3568            *pPreElse = XX______(ARMcc_AL, X1010) | (delta & 0xFFFFFF);
3569         }
3570
3571         goto done;
3572      }
3573
3574      case ARMin_Mul: {
3575         /* E0000392   mul     r0, r2, r3
3576            E0810392   umull   r0(LO), r1(HI), r2, r3
3577            E0C10392   smull   r0(LO), r1(HI), r2, r3
3578         */
3579         switch (i->ARMin.Mul.op) {
3580            case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
3581            case ARMmul_ZX:    *p++ = 0xE0810392; goto done;
3582            case ARMmul_SX:    *p++ = 0xE0C10392; goto done;
3583            default: vassert(0);
3584         }
3585         goto bad;
3586      }
3587      case ARMin_LdrEX: {
3588         /* E1D42F9F   ldrexb r2, [r4]
3589            E1F42F9F   ldrexh r2, [r4]
3590            E1942F9F   ldrex  r2, [r4]
3591            E1B42F9F   ldrexd r2, r3, [r4]
3592         */
3593         switch (i->ARMin.LdrEX.szB) {
3594            case 1: *p++ = 0xE1D42F9F; goto done;
3595            case 2: *p++ = 0xE1F42F9F; goto done;
3596            case 4: *p++ = 0xE1942F9F; goto done;
3597            case 8: *p++ = 0xE1B42F9F; goto done;
3598            default: break;
3599         }
3600         goto bad;
3601      }
3602      case ARMin_StrEX: {
3603         /* E1C40F92   strexb r0, r2, [r4]
3604            E1E40F92   strexh r0, r2, [r4]
3605            E1840F92   strex  r0, r2, [r4]
3606            E1A40F92   strexd r0, r2, r3, [r4]
3607         */
3608         switch (i->ARMin.StrEX.szB) {
3609            case 1: *p++ = 0xE1C40F92; goto done;
3610            case 2: *p++ = 0xE1E40F92; goto done;
3611            case 4: *p++ = 0xE1840F92; goto done;
3612            case 8: *p++ = 0xE1A40F92; goto done;
3613            default: break;
3614         }
3615         goto bad;
3616      }
3617      case ARMin_VLdStD: {
3618         UInt dD     = dregEnc(i->ARMin.VLdStD.dD);
3619         UInt rN     = iregEnc(i->ARMin.VLdStD.amode->reg);
3620         Int  simm11 = i->ARMin.VLdStD.amode->simm11;
3621         UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3622         UInt bU     = simm11 >= 0 ? 1 : 0;
3623         UInt bL     = i->ARMin.VLdStD.isLoad ? 1 : 0;
3624         UInt insn;
3625         vassert(0 == (off8 & 3));
3626         off8 >>= 2;
3627         vassert(0 == (off8 & 0xFFFFFF00));
3628         insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3629         insn |= off8;
3630         *p++ = insn;
3631         goto done;
3632      }
3633      case ARMin_VLdStS: {
3634         UInt fD     = fregEnc(i->ARMin.VLdStS.fD);
3635         UInt rN     = iregEnc(i->ARMin.VLdStS.amode->reg);
3636         Int  simm11 = i->ARMin.VLdStS.amode->simm11;
3637         UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3638         UInt bU     = simm11 >= 0 ? 1 : 0;
3639         UInt bL     = i->ARMin.VLdStS.isLoad ? 1 : 0;
3640         UInt bD     = fD & 1;
3641         UInt insn;
3642         vassert(0 == (off8 & 3));
3643         off8 >>= 2;
3644         vassert(0 == (off8 & 0xFFFFFF00));
3645         insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3646         insn |= off8;
3647         *p++ = insn;
3648         goto done;
3649      }
3650      case ARMin_VAluD: {
3651         UInt dN = dregEnc(i->ARMin.VAluD.argL);
3652         UInt dD = dregEnc(i->ARMin.VAluD.dst);
3653         UInt dM = dregEnc(i->ARMin.VAluD.argR);
3654         UInt pqrs = X1111; /* undefined */
3655         switch (i->ARMin.VAluD.op) {
3656            case ARMvfp_ADD: pqrs = X0110; break;
3657            case ARMvfp_SUB: pqrs = X0111; break;
3658            case ARMvfp_MUL: pqrs = X0100; break;
3659            case ARMvfp_DIV: pqrs = X1000; break;
3660            default: goto bad;
3661         }
3662         vassert(pqrs != X1111);
3663         UInt bP  = (pqrs >> 3) & 1;
3664         UInt bQ  = (pqrs >> 2) & 1;
3665         UInt bR  = (pqrs >> 1) & 1;
3666         UInt bS  = (pqrs >> 0) & 1;
3667         UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3668                              X1011, BITS4(0,bS,0,0), dM);
3669         *p++ = insn;
3670         goto done;
3671      }
3672      case ARMin_VAluS: {
3673         UInt dN = fregEnc(i->ARMin.VAluS.argL);
3674         UInt dD = fregEnc(i->ARMin.VAluS.dst);
3675         UInt dM = fregEnc(i->ARMin.VAluS.argR);
3676         UInt bN = dN & 1;
3677         UInt bD = dD & 1;
3678         UInt bM = dM & 1;
3679         UInt pqrs = X1111; /* undefined */
3680         switch (i->ARMin.VAluS.op) {
3681            case ARMvfp_ADD: pqrs = X0110; break;
3682            case ARMvfp_SUB: pqrs = X0111; break;
3683            case ARMvfp_MUL: pqrs = X0100; break;
3684            case ARMvfp_DIV: pqrs = X1000; break;
3685            default: goto bad;
3686         }
3687         vassert(pqrs != X1111);
3688         UInt bP  = (pqrs >> 3) & 1;
3689         UInt bQ  = (pqrs >> 2) & 1;
3690         UInt bR  = (pqrs >> 1) & 1;
3691         UInt bS  = (pqrs >> 0) & 1;
3692         UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3693                              (dN >> 1), (dD >> 1),
3694                              X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3695         *p++ = insn;
3696         goto done;
3697      }
3698      case ARMin_VUnaryD: {
3699         UInt dD   = dregEnc(i->ARMin.VUnaryD.dst);
3700         UInt dM   = dregEnc(i->ARMin.VUnaryD.src);
3701         UInt insn = 0;
3702         switch (i->ARMin.VUnaryD.op) {
3703            case ARMvfpu_COPY:
3704               insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3705               break;
3706            case ARMvfpu_ABS:
3707               insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3708               break;
3709            case ARMvfpu_NEG:
3710               insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3711               break;
3712            case ARMvfpu_SQRT:
3713               insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3714               break;
3715            default:
3716               goto bad;
3717         }
3718         *p++ = insn;
3719         goto done;
3720      }
3721      case ARMin_VUnaryS: {
3722         UInt fD   = fregEnc(i->ARMin.VUnaryS.dst);
3723         UInt fM   = fregEnc(i->ARMin.VUnaryS.src);
3724         UInt insn = 0;
3725         switch (i->ARMin.VUnaryS.op) {
3726            case ARMvfpu_COPY:
3727               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3728                               (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3729                               (fM >> 1));
3730               break;
3731            case ARMvfpu_ABS:
3732               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3733                               (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3734                               (fM >> 1));
3735               break;
3736            case ARMvfpu_NEG:
3737               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3738                               (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3739                               (fM >> 1));
3740               break;
3741            case ARMvfpu_SQRT:
3742               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3743                               (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3744                               (fM >> 1));
3745               break;
3746            default:
3747               goto bad;
3748         }
3749         *p++ = insn;
3750         goto done;
3751      }
3752      case ARMin_VCmpD: {
3753         UInt dD   = dregEnc(i->ARMin.VCmpD.argL);
3754         UInt dM   = dregEnc(i->ARMin.VCmpD.argR);
3755         UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3756         *p++ = insn;       /* FCMPD dD, dM */
3757         *p++ = 0xEEF1FA10; /* FMSTAT */
3758         goto done;
3759      }
3760      case ARMin_VCMovD: {
3761         UInt cc = (UInt)i->ARMin.VCMovD.cond;
3762         UInt dD = dregEnc(i->ARMin.VCMovD.dst);
3763         UInt dM = dregEnc(i->ARMin.VCMovD.src);
3764         vassert(cc < 16 && cc != ARMcc_AL);
3765         UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3766         *p++ = insn;
3767         goto done;
3768      }
3769      case ARMin_VCMovS: {
3770         UInt cc = (UInt)i->ARMin.VCMovS.cond;
3771         UInt fD = fregEnc(i->ARMin.VCMovS.dst);
3772         UInt fM = fregEnc(i->ARMin.VCMovS.src);
3773         vassert(cc < 16 && cc != ARMcc_AL);
3774         UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3775                              X0000,(fD >> 1),X1010,
3776                              BITS4(0,1,(fM & 1),0), (fM >> 1));
3777         *p++ = insn;
3778         goto done;
3779      }
3780      case ARMin_VCvtSD: {
3781         if (i->ARMin.VCvtSD.sToD) {
3782            UInt dD = dregEnc(i->ARMin.VCvtSD.dst);
3783            UInt fM = fregEnc(i->ARMin.VCvtSD.src);
3784            UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3785                                 BITS4(1,1, (fM & 1), 0),
3786                                 (fM >> 1));
3787            *p++ = insn;
3788            goto done;
3789         } else {
3790            UInt fD = fregEnc(i->ARMin.VCvtSD.dst);
3791            UInt dM = dregEnc(i->ARMin.VCvtSD.src);
3792            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3793                                 X0111, (fD >> 1),
3794                                 X1011, X1100, dM);
3795            *p++ = insn;
3796            goto done;
3797         }
3798      }
3799      case ARMin_VXferQ: {
3800         UInt insn;
3801         UInt qD  = qregEnc(i->ARMin.VXferQ.qD);
3802         UInt dHi = dregEnc(i->ARMin.VXferQ.dHi);
3803         UInt dLo = dregEnc(i->ARMin.VXferQ.dLo);
3804         /* This is a bit tricky.  We need to make 2 D-D moves and we rely
3805            on the fact that the Q register can be treated as two D registers.
3806            We also rely on the fact that the register allocator will allocate
3807            the two D's and the Q to disjoint parts of the register file,
3808            and so we don't have to worry about the first move's destination
3809            being the same as the second move's source, etc.  We do have
3810            assertions though. */
3811         /* The ARM ARM specifies that
3812              D<2n>   maps to the least significant half of Q<n>
3813              D<2n+1> maps to the most  significant half of Q<n>
3814            So there are no issues with endianness here.
3815         */
3816         UInt qDlo = 2 * qD + 0;
3817         UInt qDhi = 2 * qD + 1;
3818         /* Stay sane .. */
3819         vassert(qDhi != dHi && qDhi != dLo);
3820         vassert(qDlo != dHi && qDlo != dLo);
3821         /* vmov dX, dY is
3822            F 2 (0,dX[4],1,0) dY[3:0] dX[3:0] 1 (dY[4],0,dY[4],1) dY[3:0]
3823         */
3824#        define VMOV_D_D(_xx,_yy) \
3825            XXXXXXXX( 0xF, 0x2, BITS4(0, (((_xx) >> 4) & 1), 1, 0), \
3826                      ((_yy) & 0xF), ((_xx) & 0xF), 0x1, \
3827                      BITS4( (((_yy) >> 4) & 1), 0, (((_yy) >> 4) & 1), 1), \
3828                      ((_yy) & 0xF) )
3829         if (i->ARMin.VXferQ.toQ) {
3830            insn = VMOV_D_D(qDlo, dLo); *p++ = insn;
3831            insn = VMOV_D_D(qDhi, dHi); *p++ = insn;
3832         } else {
3833            insn = VMOV_D_D(dLo, qDlo); *p++ = insn;
3834            insn = VMOV_D_D(dHi, qDhi); *p++ = insn;
3835         }
3836#        undef VMOV_D_D
3837         goto done;
3838      }
3839      case ARMin_VXferD: {
3840         UInt dD  = dregEnc(i->ARMin.VXferD.dD);
3841         UInt rHi = iregEnc(i->ARMin.VXferD.rHi);
3842         UInt rLo = iregEnc(i->ARMin.VXferD.rLo);
3843         /* vmov dD, rLo, rHi is
3844            E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3845            vmov rLo, rHi, dD is
3846            E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3847         */
3848         UInt insn
3849            = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3850                       rHi, rLo, 0xB,
3851                       BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3852         *p++ = insn;
3853         goto done;
3854      }
3855      case ARMin_VXferS: {
3856         UInt fD  = fregEnc(i->ARMin.VXferS.fD);
3857         UInt rLo = iregEnc(i->ARMin.VXferS.rLo);
3858         /* vmov fD, rLo is
3859            E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3860            vmov rLo, fD is
3861            E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3862         */
3863         UInt insn
3864            = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3865                       (fD >> 1) & 0xF, rLo, 0xA,
3866                       BITS4((fD & 1),0,0,1), 0);
3867         *p++ = insn;
3868         goto done;
3869      }
3870      case ARMin_VCvtID: {
3871         Bool iToD = i->ARMin.VCvtID.iToD;
3872         Bool syned = i->ARMin.VCvtID.syned;
3873         if (iToD && syned) {
3874            // FSITOD: I32S-in-freg to F64-in-dreg
3875            UInt regF = fregEnc(i->ARMin.VCvtID.src);
3876            UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3877            UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3878                                 X1011, BITS4(1,1,(regF & 1),0),
3879                                 (regF >> 1) & 0xF);
3880            *p++ = insn;
3881            goto done;
3882         }
3883         if (iToD && (!syned)) {
3884            // FUITOD: I32U-in-freg to F64-in-dreg
3885            UInt regF = fregEnc(i->ARMin.VCvtID.src);
3886            UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3887            UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3888                                 X1011, BITS4(0,1,(regF & 1),0),
3889                                 (regF >> 1) & 0xF);
3890            *p++ = insn;
3891            goto done;
3892         }
3893         if ((!iToD) && syned) {
3894            // FTOSID: F64-in-dreg to I32S-in-freg
3895            UInt regD = dregEnc(i->ARMin.VCvtID.src);
3896            UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3897            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3898                                 X1101, (regF >> 1) & 0xF,
3899                                 X1011, X0100, regD);
3900            *p++ = insn;
3901            goto done;
3902         }
3903         if ((!iToD) && (!syned)) {
3904            // FTOUID: F64-in-dreg to I32U-in-freg
3905            UInt regD = dregEnc(i->ARMin.VCvtID.src);
3906            UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3907            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3908                                 X1100, (regF >> 1) & 0xF,
3909                                 X1011, X0100, regD);
3910            *p++ = insn;
3911            goto done;
3912         }
3913         /*UNREACHED*/
3914         vassert(0);
3915      }
3916      case ARMin_VRIntR: { /* NB: ARM v8 and above only */
3917         Bool isF64 = i->ARMin.VRIntR.isF64;
3918         UInt rDst  = (isF64 ? dregEnc : fregEnc)(i->ARMin.VRIntR.dst);
3919         UInt rSrc  = (isF64 ? dregEnc : fregEnc)(i->ARMin.VRIntR.src);
3920         /* The encoding of registers here differs strangely for the
3921            F32 and F64 cases. */
3922         UInt D, Vd, M, Vm;
3923         if (isF64) {
3924            D  = (rDst >> 4) & 1;
3925            Vd = rDst & 0xF;
3926            M  = (rSrc >> 4) & 1;
3927            Vm = rSrc & 0xF;
3928         } else {
3929            Vd = (rDst >> 1) & 0xF;
3930            D  = rDst & 1;
3931            Vm = (rSrc >> 1) & 0xF;
3932            M  = rSrc & 1;
3933         }
3934         vassert(D <= 1 && Vd <= 15 && M <= 1 && Vm <= 15);
3935         *p++ = XXXXXXXX(0xE, X1110, X1011 | (D << 2), X0110, Vd,
3936                         isF64 ? X1011 : X1010, X0100 | (M << 1), Vm);
3937         goto done;
3938      }
3939      case ARMin_VMinMaxNum: {
3940         Bool isF64 = i->ARMin.VMinMaxNum.isF64;
3941         Bool isMax = i->ARMin.VMinMaxNum.isMax;
3942         UInt rDst  = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.dst);
3943         UInt rSrcL = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.srcL);
3944         UInt rSrcR = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.srcR);
3945         /* The encoding of registers here differs strangely for the
3946            F32 and F64 cases. */
3947         UInt D, Vd, N, Vn, M, Vm;
3948         if (isF64) {
3949            D  = (rDst >> 4) & 1;
3950            Vd = rDst & 0xF;
3951            N  = (rSrcL >> 4) & 1;
3952            Vn = rSrcL & 0xF;
3953            M  = (rSrcR >> 4) & 1;
3954            Vm = rSrcR & 0xF;
3955         } else {
3956            Vd = (rDst >> 1) & 0xF;
3957            D  = rDst & 1;
3958            Vn = (rSrcL >> 1) & 0xF;
3959            N  = rSrcL & 1;
3960            Vm = (rSrcR >> 1) & 0xF;
3961            M  = rSrcR & 1;
3962         }
3963         vassert(D <= 1 && Vd <= 15 && M <= 1 && Vm <= 15 && N <= 1
3964                 && Vn <= 15);
3965         *p++ = XXXXXXXX(X1111,X1110, X1000 | (D << 2), Vn, Vd,
3966                         X1010 | (isF64 ? 1 : 0),
3967                         (N << 3) | ((isMax ? 0 : 1) << 2) | (M << 1) | 0,
3968                         Vm);
3969         goto done;
3970      }
3971      case ARMin_FPSCR: {
3972         Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3973         UInt iReg    = iregEnc(i->ARMin.FPSCR.iReg);
3974         if (toFPSCR) {
3975            /* fmxr fpscr, iReg is EEE1 iReg A10 */
3976            *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3977            goto done;
3978         }
3979         goto bad; // FPSCR -> iReg case currently ATC
3980      }
3981      case ARMin_MFence: {
3982         // It's not clear (to me) how these relate to the ARMv7
3983         // versions, so let's just use the v7 versions as they
3984         // are at least well documented.
3985         //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3986         //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3987         //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
3988         *p++ = 0xF57FF04F; /* DSB sy */
3989         *p++ = 0xF57FF05F; /* DMB sy */
3990         *p++ = 0xF57FF06F; /* ISB */
3991         goto done;
3992      }
3993      case ARMin_CLREX: {
3994         *p++ = 0xF57FF01F; /* clrex */
3995         goto done;
3996      }
3997
3998      case ARMin_NLdStQ: {
3999         UInt regD = qregEnc(i->ARMin.NLdStQ.dQ) << 1;
4000         UInt regN, regM;
4001         UInt D = regD >> 4;
4002         UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
4003         UInt insn;
4004         vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
4005         regD &= 0xF;
4006         if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
4007            regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
4008            regM = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
4009         } else {
4010            regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
4011            regM = 15;
4012         }
4013         insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
4014                              regN, regD, X1010, X1000, regM);
4015         *p++ = insn;
4016         goto done;
4017      }
4018      case ARMin_NLdStD: {
4019         UInt regD = dregEnc(i->ARMin.NLdStD.dD);
4020         UInt regN, regM;
4021         UInt D = regD >> 4;
4022         UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
4023         UInt insn;
4024         vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
4025         regD &= 0xF;
4026         if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
4027            regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
4028            regM = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
4029         } else {
4030            regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.R.rN);
4031            regM = 15;
4032         }
4033         insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
4034                              regN, regD, X0111, X1000, regM);
4035         *p++ = insn;
4036         goto done;
4037      }
4038      case ARMin_NUnaryS: {
4039         UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
4040         UInt regD, D;
4041         UInt regM, M;
4042         UInt size = i->ARMin.NUnaryS.size;
4043         UInt insn;
4044         UInt opc, opc1, opc2;
4045         switch (i->ARMin.NUnaryS.op) {
4046	    case ARMneon_VDUP:
4047               if (i->ARMin.NUnaryS.size >= 16)
4048                  goto bad;
4049               if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
4050                  goto bad;
4051               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4052                  goto bad;
4053               regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
4054                        ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1)
4055                        : dregEnc(i->ARMin.NUnaryS.dst->reg);
4056               regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
4057                        ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1)
4058                        : dregEnc(i->ARMin.NUnaryS.src->reg);
4059               D = regD >> 4;
4060               M = regM >> 4;
4061               regD &= 0xf;
4062               regM &= 0xf;
4063               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
4064                               (i->ARMin.NUnaryS.size & 0xf), regD,
4065                               X1100, BITS4(0,Q,M,0), regM);
4066               *p++ = insn;
4067               goto done;
4068            case ARMneon_SETELEM:
4069               regD = Q ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1) :
4070                                dregEnc(i->ARMin.NUnaryS.dst->reg);
4071               regM = iregEnc(i->ARMin.NUnaryS.src->reg);
4072               M = regM >> 4;
4073               D = regD >> 4;
4074               regM &= 0xF;
4075               regD &= 0xF;
4076               if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
4077                  goto bad;
4078               switch (size) {
4079                  case 0:
4080                     if (i->ARMin.NUnaryS.dst->index > 7)
4081                        goto bad;
4082                     opc = X1000 | i->ARMin.NUnaryS.dst->index;
4083                     break;
4084                  case 1:
4085                     if (i->ARMin.NUnaryS.dst->index > 3)
4086                        goto bad;
4087                     opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
4088                     break;
4089                  case 2:
4090                     if (i->ARMin.NUnaryS.dst->index > 1)
4091                        goto bad;
4092                     opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
4093                     break;
4094                  default:
4095                     goto bad;
4096               }
4097               opc1 = (opc >> 2) & 3;
4098               opc2 = opc & 3;
4099               insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
4100                               regD, regM, X1011,
4101                               BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
4102               *p++ = insn;
4103               goto done;
4104            case ARMneon_GETELEMU:
4105               regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
4106                                dregEnc(i->ARMin.NUnaryS.src->reg);
4107               regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
4108               M = regM >> 4;
4109               D = regD >> 4;
4110               regM &= 0xF;
4111               regD &= 0xF;
4112               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4113                  goto bad;
4114               switch (size) {
4115                  case 0:
4116                     if (Q && i->ARMin.NUnaryS.src->index > 7) {
4117                        regM++;
4118                        i->ARMin.NUnaryS.src->index -= 8;
4119                     }
4120                     if (i->ARMin.NUnaryS.src->index > 7)
4121                        goto bad;
4122                     opc = X1000 | i->ARMin.NUnaryS.src->index;
4123                     break;
4124                  case 1:
4125                     if (Q && i->ARMin.NUnaryS.src->index > 3) {
4126                        regM++;
4127                        i->ARMin.NUnaryS.src->index -= 4;
4128                     }
4129                     if (i->ARMin.NUnaryS.src->index > 3)
4130                        goto bad;
4131                     opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
4132                     break;
4133                  case 2:
4134                     goto bad;
4135                  default:
4136                     goto bad;
4137               }
4138               opc1 = (opc >> 2) & 3;
4139               opc2 = opc & 3;
4140               insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
4141                               regM, regD, X1011,
4142                               BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
4143               *p++ = insn;
4144               goto done;
4145            case ARMneon_GETELEMS:
4146               regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
4147                                dregEnc(i->ARMin.NUnaryS.src->reg);
4148               regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
4149               M = regM >> 4;
4150               D = regD >> 4;
4151               regM &= 0xF;
4152               regD &= 0xF;
4153               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4154                  goto bad;
4155               switch (size) {
4156                  case 0:
4157                     if (Q && i->ARMin.NUnaryS.src->index > 7) {
4158                        regM++;
4159                        i->ARMin.NUnaryS.src->index -= 8;
4160                     }
4161                     if (i->ARMin.NUnaryS.src->index > 7)
4162                        goto bad;
4163                     opc = X1000 | i->ARMin.NUnaryS.src->index;
4164                     break;
4165                  case 1:
4166                     if (Q && i->ARMin.NUnaryS.src->index > 3) {
4167                        regM++;
4168                        i->ARMin.NUnaryS.src->index -= 4;
4169                     }
4170                     if (i->ARMin.NUnaryS.src->index > 3)
4171                        goto bad;
4172                     opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
4173                     break;
4174                  case 2:
4175                     if (Q && i->ARMin.NUnaryS.src->index > 1) {
4176                        regM++;
4177                        i->ARMin.NUnaryS.src->index -= 2;
4178                     }
4179                     if (i->ARMin.NUnaryS.src->index > 1)
4180                        goto bad;
4181                     opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
4182                     break;
4183                  default:
4184                     goto bad;
4185               }
4186               opc1 = (opc >> 2) & 3;
4187               opc2 = opc & 3;
4188               insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
4189                               regM, regD, X1011,
4190                               BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
4191               *p++ = insn;
4192               goto done;
4193            default:
4194               goto bad;
4195         }
4196      }
4197      case ARMin_NUnary: {
4198         UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
4199         UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
4200                       ? (qregEnc(i->ARMin.NUnary.dst) << 1)
4201                       : dregEnc(i->ARMin.NUnary.dst);
4202         UInt regM, M;
4203         UInt D = regD >> 4;
4204         UInt sz1 = i->ARMin.NUnary.size >> 1;
4205         UInt sz2 = i->ARMin.NUnary.size & 1;
4206         UInt sz = i->ARMin.NUnary.size;
4207         UInt insn;
4208         UInt F = 0; /* TODO: floating point EQZ ??? */
4209         if (i->ARMin.NUnary.op != ARMneon_DUP) {
4210            regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
4211                     ? (qregEnc(i->ARMin.NUnary.src) << 1)
4212                     : dregEnc(i->ARMin.NUnary.src);
4213            M = regM >> 4;
4214         } else {
4215            regM = iregEnc(i->ARMin.NUnary.src);
4216            M = regM >> 4;
4217         }
4218         regD &= 0xF;
4219         regM &= 0xF;
4220         switch (i->ARMin.NUnary.op) {
4221            case ARMneon_COPY: /* VMOV reg, reg */
4222               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
4223                               BITS4(M,Q,M,1), regM);
4224               break;
4225            case ARMneon_COPYN: /* VMOVN regD, regQ */
4226               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4227                               regD, X0010, BITS4(0,0,M,0), regM);
4228               break;
4229            case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
4230               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4231                               regD, X0010, BITS4(1,0,M,0), regM);
4232               break;
4233            case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
4234               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4235                               regD, X0010, BITS4(0,1,M,0), regM);
4236               break;
4237            case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
4238               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4239                               regD, X0010, BITS4(1,1,M,0), regM);
4240               break;
4241            case ARMneon_COPYLS: /* VMOVL regQ, regD */
4242               if (sz >= 3)
4243                  goto bad;
4244               insn = XXXXXXXX(0xF, X0010,
4245                               BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4246                               BITS4((sz == 0) ? 1 : 0,0,0,0),
4247                               regD, X1010, BITS4(0,0,M,1), regM);
4248               break;
4249            case ARMneon_COPYLU: /* VMOVL regQ, regD */
4250               if (sz >= 3)
4251                  goto bad;
4252               insn = XXXXXXXX(0xF, X0011,
4253                               BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4254                               BITS4((sz == 0) ? 1 : 0,0,0,0),
4255                               regD, X1010, BITS4(0,0,M,1), regM);
4256               break;
4257            case ARMneon_NOT: /* VMVN reg, reg*/
4258               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4259                               BITS4(1,Q,M,0), regM);
4260               break;
4261            case ARMneon_EQZ:
4262               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4263                               regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
4264               break;
4265            case ARMneon_CNT:
4266               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4267                               BITS4(0,Q,M,0), regM);
4268               break;
4269            case ARMneon_CLZ:
4270               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4271                               regD, X0100, BITS4(1,Q,M,0), regM);
4272               break;
4273            case ARMneon_CLS:
4274               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4275                               regD, X0100, BITS4(0,Q,M,0), regM);
4276               break;
4277            case ARMneon_ABS:
4278               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4279                               regD, X0011, BITS4(0,Q,M,0), regM);
4280               break;
4281            case ARMneon_DUP:
4282               sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
4283               sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
4284               vassert(sz1 + sz2 < 2);
4285               insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
4286                               X1011, BITS4(D,0,sz2,1), X0000);
4287               break;
4288            case ARMneon_REV16:
4289               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4290                               regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
4291               break;
4292            case ARMneon_REV32:
4293               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4294                               regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
4295               break;
4296            case ARMneon_REV64:
4297               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4298                               regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
4299               break;
4300            case ARMneon_PADDLU:
4301               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4302                               regD, X0010, BITS4(1,Q,M,0), regM);
4303               break;
4304            case ARMneon_PADDLS:
4305               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4306                               regD, X0010, BITS4(0,Q,M,0), regM);
4307               break;
4308            case ARMneon_VQSHLNUU:
4309               insn = XXXXXXXX(0xF, X0011,
4310                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4311                               sz & 0xf, regD, X0111,
4312                               BITS4(sz >> 6,Q,M,1), regM);
4313               break;
4314            case ARMneon_VQSHLNSS:
4315               insn = XXXXXXXX(0xF, X0010,
4316                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4317                               sz & 0xf, regD, X0111,
4318                               BITS4(sz >> 6,Q,M,1), regM);
4319               break;
4320            case ARMneon_VQSHLNUS:
4321               insn = XXXXXXXX(0xF, X0011,
4322                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4323                               sz & 0xf, regD, X0110,
4324                               BITS4(sz >> 6,Q,M,1), regM);
4325               break;
4326            case ARMneon_VCVTFtoS:
4327               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4328                               BITS4(0,Q,M,0), regM);
4329               break;
4330            case ARMneon_VCVTFtoU:
4331               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4332                               BITS4(1,Q,M,0), regM);
4333               break;
4334            case ARMneon_VCVTStoF:
4335               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4336                               BITS4(0,Q,M,0), regM);
4337               break;
4338            case ARMneon_VCVTUtoF:
4339               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4340                               BITS4(1,Q,M,0), regM);
4341               break;
4342            case ARMneon_VCVTFtoFixedU:
4343               sz1 = (sz >> 5) & 1;
4344               sz2 = (sz >> 4) & 1;
4345               sz &= 0xf;
4346               insn = XXXXXXXX(0xF, X0011,
4347                               BITS4(1,D,sz1,sz2), sz, regD, X1111,
4348                               BITS4(0,Q,M,1), regM);
4349               break;
4350            case ARMneon_VCVTFtoFixedS:
4351               sz1 = (sz >> 5) & 1;
4352               sz2 = (sz >> 4) & 1;
4353               sz &= 0xf;
4354               insn = XXXXXXXX(0xF, X0010,
4355                               BITS4(1,D,sz1,sz2), sz, regD, X1111,
4356                               BITS4(0,Q,M,1), regM);
4357               break;
4358            case ARMneon_VCVTFixedUtoF:
4359               sz1 = (sz >> 5) & 1;
4360               sz2 = (sz >> 4) & 1;
4361               sz &= 0xf;
4362               insn = XXXXXXXX(0xF, X0011,
4363                               BITS4(1,D,sz1,sz2), sz, regD, X1110,
4364                               BITS4(0,Q,M,1), regM);
4365               break;
4366            case ARMneon_VCVTFixedStoF:
4367               sz1 = (sz >> 5) & 1;
4368               sz2 = (sz >> 4) & 1;
4369               sz &= 0xf;
4370               insn = XXXXXXXX(0xF, X0010,
4371                               BITS4(1,D,sz1,sz2), sz, regD, X1110,
4372                               BITS4(0,Q,M,1), regM);
4373               break;
4374            case ARMneon_VCVTF32toF16:
4375               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
4376                               BITS4(0,0,M,0), regM);
4377               break;
4378            case ARMneon_VCVTF16toF32:
4379               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
4380                               BITS4(0,0,M,0), regM);
4381               break;
4382            case ARMneon_VRECIP:
4383               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4384                               BITS4(0,Q,M,0), regM);
4385               break;
4386            case ARMneon_VRECIPF:
4387               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4388                               BITS4(0,Q,M,0), regM);
4389               break;
4390            case ARMneon_VABSFP:
4391               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4392                               BITS4(0,Q,M,0), regM);
4393               break;
4394            case ARMneon_VRSQRTEFP:
4395               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4396                               BITS4(1,Q,M,0), regM);
4397               break;
4398            case ARMneon_VRSQRTE:
4399               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4400                               BITS4(1,Q,M,0), regM);
4401               break;
4402            case ARMneon_VNEGF:
4403               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4404                               BITS4(1,Q,M,0), regM);
4405               break;
4406
4407            default:
4408               goto bad;
4409         }
4410         *p++ = insn;
4411         goto done;
4412      }
4413      case ARMin_NDual: {
4414         UInt Q = i->ARMin.NDual.Q ? 1 : 0;
4415         UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
4416                       ? (qregEnc(i->ARMin.NDual.arg1) << 1)
4417                       : dregEnc(i->ARMin.NDual.arg1);
4418         UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
4419                       ? (qregEnc(i->ARMin.NDual.arg2) << 1)
4420                       : dregEnc(i->ARMin.NDual.arg2);
4421         UInt D = regD >> 4;
4422         UInt M = regM >> 4;
4423         UInt sz1 = i->ARMin.NDual.size >> 1;
4424         UInt sz2 = i->ARMin.NDual.size & 1;
4425         UInt insn;
4426         regD &= 0xF;
4427         regM &= 0xF;
4428         switch (i->ARMin.NDual.op) {
4429            case ARMneon_TRN: /* VTRN reg, reg */
4430               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4431                               regD, X0000, BITS4(1,Q,M,0), regM);
4432               break;
4433            case ARMneon_ZIP: /* VZIP reg, reg */
4434               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4435                               regD, X0001, BITS4(1,Q,M,0), regM);
4436               break;
4437            case ARMneon_UZP: /* VUZP reg, reg */
4438               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4439                               regD, X0001, BITS4(0,Q,M,0), regM);
4440               break;
4441            default:
4442               goto bad;
4443         }
4444         *p++ = insn;
4445         goto done;
4446      }
4447      case ARMin_NBinary: {
4448         UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
4449         UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
4450                       ? (qregEnc(i->ARMin.NBinary.dst) << 1)
4451                       : dregEnc(i->ARMin.NBinary.dst);
4452         UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
4453                       ? (qregEnc(i->ARMin.NBinary.argL) << 1)
4454                       : dregEnc(i->ARMin.NBinary.argL);
4455         UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
4456                       ? (qregEnc(i->ARMin.NBinary.argR) << 1)
4457                       : dregEnc(i->ARMin.NBinary.argR);
4458         UInt sz1 = i->ARMin.NBinary.size >> 1;
4459         UInt sz2 = i->ARMin.NBinary.size & 1;
4460         UInt D = regD >> 4;
4461         UInt N = regN >> 4;
4462         UInt M = regM >> 4;
4463         UInt insn;
4464         regD &= 0xF;
4465         regM &= 0xF;
4466         regN &= 0xF;
4467         switch (i->ARMin.NBinary.op) {
4468            case ARMneon_VAND: /* VAND reg, reg, reg */
4469               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
4470                               BITS4(N,Q,M,1), regM);
4471               break;
4472            case ARMneon_VORR: /* VORR reg, reg, reg*/
4473               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
4474                               BITS4(N,Q,M,1), regM);
4475               break;
4476            case ARMneon_VXOR: /* VEOR reg, reg, reg */
4477               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
4478                               BITS4(N,Q,M,1), regM);
4479               break;
4480            case ARMneon_VADD: /* VADD reg, reg, reg */
4481               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4482                               X1000, BITS4(N,Q,M,0), regM);
4483               break;
4484            case ARMneon_VSUB: /* VSUB reg, reg, reg */
4485               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4486                               X1000, BITS4(N,Q,M,0), regM);
4487               break;
4488            case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
4489               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4490                               X0110, BITS4(N,Q,M,1), regM);
4491               break;
4492            case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
4493               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4494                               X0110, BITS4(N,Q,M,1), regM);
4495               break;
4496            case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
4497               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4498                               X0110, BITS4(N,Q,M,0), regM);
4499               break;
4500            case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
4501               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4502                               X0110, BITS4(N,Q,M,0), regM);
4503               break;
4504            case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
4505               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4506                               X0001, BITS4(N,Q,M,0), regM);
4507               break;
4508            case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
4509               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4510                               X0001, BITS4(N,Q,M,0), regM);
4511               break;
4512            case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
4513               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4514                               X0000, BITS4(N,Q,M,1), regM);
4515               break;
4516            case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
4517               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4518                               X0000, BITS4(N,Q,M,1), regM);
4519               break;
4520            case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
4521               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4522                               X0010, BITS4(N,Q,M,1), regM);
4523               break;
4524            case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
4525               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4526                               X0010, BITS4(N,Q,M,1), regM);
4527               break;
4528            case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
4529               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4530                               X0011, BITS4(N,Q,M,0), regM);
4531               break;
4532            case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
4533               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4534                               X0011, BITS4(N,Q,M,0), regM);
4535               break;
4536            case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
4537               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4538                               X0011, BITS4(N,Q,M,1), regM);
4539               break;
4540            case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
4541               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4542                               X0011, BITS4(N,Q,M,1), regM);
4543               break;
4544            case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
4545               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4546                               X1000, BITS4(N,Q,M,1), regM);
4547               break;
4548            case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
4549               if (i->ARMin.NBinary.size >= 16)
4550                  goto bad;
4551               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
4552                               i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
4553                               regM);
4554               break;
4555            case ARMneon_VMUL:
4556               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4557                               X1001, BITS4(N,Q,M,1), regM);
4558               break;
4559            case ARMneon_VMULLU:
4560               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
4561                               X1100, BITS4(N,0,M,0), regM);
4562               break;
4563            case ARMneon_VMULLS:
4564               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4565                               X1100, BITS4(N,0,M,0), regM);
4566               break;
4567            case ARMneon_VMULP:
4568               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4569                               X1001, BITS4(N,Q,M,1), regM);
4570               break;
4571            case ARMneon_VMULFP:
4572               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4573                               X1101, BITS4(N,Q,M,1), regM);
4574               break;
4575            case ARMneon_VMULLP:
4576               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4577                               X1110, BITS4(N,0,M,0), regM);
4578               break;
4579            case ARMneon_VQDMULH:
4580               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4581                               X1011, BITS4(N,Q,M,0), regM);
4582               break;
4583            case ARMneon_VQRDMULH:
4584               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4585                               X1011, BITS4(N,Q,M,0), regM);
4586               break;
4587            case ARMneon_VQDMULL:
4588               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4589                               X1101, BITS4(N,0,M,0), regM);
4590               break;
4591            case ARMneon_VTBL:
4592               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
4593                               X1000, BITS4(N,0,M,0), regM);
4594               break;
4595            case ARMneon_VPADD:
4596               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4597                               X1011, BITS4(N,Q,M,1), regM);
4598               break;
4599            case ARMneon_VPADDFP:
4600               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4601                               X1101, BITS4(N,Q,M,0), regM);
4602               break;
4603            case ARMneon_VPMINU:
4604               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4605                               X1010, BITS4(N,Q,M,1), regM);
4606               break;
4607            case ARMneon_VPMINS:
4608               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4609                               X1010, BITS4(N,Q,M,1), regM);
4610               break;
4611            case ARMneon_VPMAXU:
4612               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4613                               X1010, BITS4(N,Q,M,0), regM);
4614               break;
4615            case ARMneon_VPMAXS:
4616               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4617                               X1010, BITS4(N,Q,M,0), regM);
4618               break;
4619            case ARMneon_VADDFP: /* VADD reg, reg, reg */
4620               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4621                               X1101, BITS4(N,Q,M,0), regM);
4622               break;
4623            case ARMneon_VSUBFP: /* VADD reg, reg, reg */
4624               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4625                               X1101, BITS4(N,Q,M,0), regM);
4626               break;
4627            case ARMneon_VABDFP: /* VABD reg, reg, reg */
4628               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4629                               X1101, BITS4(N,Q,M,0), regM);
4630               break;
4631            case ARMneon_VMINF:
4632               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4633                               X1111, BITS4(N,Q,M,0), regM);
4634               break;
4635            case ARMneon_VMAXF:
4636               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4637                               X1111, BITS4(N,Q,M,0), regM);
4638               break;
4639            case ARMneon_VPMINF:
4640               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4641                               X1111, BITS4(N,Q,M,0), regM);
4642               break;
4643            case ARMneon_VPMAXF:
4644               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4645                               X1111, BITS4(N,Q,M,0), regM);
4646               break;
4647            case ARMneon_VRECPS:
4648               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
4649                               BITS4(N,Q,M,1), regM);
4650               break;
4651            case ARMneon_VCGTF:
4652               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
4653                               BITS4(N,Q,M,0), regM);
4654               break;
4655            case ARMneon_VCGEF:
4656               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
4657                               BITS4(N,Q,M,0), regM);
4658               break;
4659            case ARMneon_VCEQF:
4660               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
4661                               BITS4(N,Q,M,0), regM);
4662               break;
4663            case ARMneon_VRSQRTS:
4664               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
4665                               BITS4(N,Q,M,1), regM);
4666               break;
4667            default:
4668               goto bad;
4669         }
4670         *p++ = insn;
4671         goto done;
4672      }
4673      case ARMin_NShift: {
4674         UInt Q = i->ARMin.NShift.Q ? 1 : 0;
4675         UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
4676                       ? (qregEnc(i->ARMin.NShift.dst) << 1)
4677                       : dregEnc(i->ARMin.NShift.dst);
4678         UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
4679                       ? (qregEnc(i->ARMin.NShift.argL) << 1)
4680                       : dregEnc(i->ARMin.NShift.argL);
4681         UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
4682                       ? (qregEnc(i->ARMin.NShift.argR) << 1)
4683                       : dregEnc(i->ARMin.NShift.argR);
4684         UInt sz1 = i->ARMin.NShift.size >> 1;
4685         UInt sz2 = i->ARMin.NShift.size & 1;
4686         UInt D = regD >> 4;
4687         UInt N = regN >> 4;
4688         UInt M = regM >> 4;
4689         UInt insn;
4690         regD &= 0xF;
4691         regM &= 0xF;
4692         regN &= 0xF;
4693         switch (i->ARMin.NShift.op) {
4694            case ARMneon_VSHL:
4695               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4696                               X0100, BITS4(N,Q,M,0), regM);
4697               break;
4698            case ARMneon_VSAL:
4699               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4700                               X0100, BITS4(N,Q,M,0), regM);
4701               break;
4702            case ARMneon_VQSHL:
4703               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4704                               X0100, BITS4(N,Q,M,1), regM);
4705               break;
4706            case ARMneon_VQSAL:
4707               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4708                               X0100, BITS4(N,Q,M,1), regM);
4709               break;
4710            default:
4711               goto bad;
4712         }
4713         *p++ = insn;
4714         goto done;
4715      }
4716      case ARMin_NShl64: {
4717         HReg regDreg = i->ARMin.NShl64.dst;
4718         HReg regMreg = i->ARMin.NShl64.src;
4719         UInt amt     = i->ARMin.NShl64.amt;
4720         vassert(amt >= 1 && amt <= 63);
4721         vassert(hregClass(regDreg) == HRcFlt64);
4722         vassert(hregClass(regMreg) == HRcFlt64);
4723         UInt regD = dregEnc(regDreg);
4724         UInt regM = dregEnc(regMreg);
4725         UInt D    = (regD >> 4) & 1;
4726         UInt Vd   = regD & 0xF;
4727         UInt L    = 1;
4728         UInt Q    = 0; /* always 64-bit */
4729         UInt M    = (regM >> 4) & 1;
4730         UInt Vm   = regM & 0xF;
4731         UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
4732                              amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
4733         *p++ = insn;
4734         goto done;
4735      }
4736      case ARMin_NeonImm: {
4737         UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
4738         UInt regD = Q ? (qregEnc(i->ARMin.NeonImm.dst) << 1) :
4739                          dregEnc(i->ARMin.NeonImm.dst);
4740         UInt D = regD >> 4;
4741         UInt imm = i->ARMin.NeonImm.imm->imm8;
4742         UInt tp = i->ARMin.NeonImm.imm->type;
4743         UInt j = imm >> 7;
4744         UInt imm3 = (imm >> 4) & 0x7;
4745         UInt imm4 = imm & 0xF;
4746         UInt cmode, op;
4747         UInt insn;
4748         regD &= 0xF;
4749         if (tp == 9)
4750            op = 1;
4751         else
4752            op = 0;
4753         switch (tp) {
4754            case 0:
4755            case 1:
4756            case 2:
4757            case 3:
4758            case 4:
4759            case 5:
4760               cmode = tp << 1;
4761               break;
4762            case 9:
4763            case 6:
4764               cmode = 14;
4765               break;
4766            case 7:
4767               cmode = 12;
4768               break;
4769            case 8:
4770               cmode = 13;
4771               break;
4772            case 10:
4773               cmode = 15;
4774               break;
4775            default:
4776               vpanic("ARMin_NeonImm");
4777
4778         }
4779         insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4780                         cmode, BITS4(0,Q,op,1), imm4);
4781         *p++ = insn;
4782         goto done;
4783      }
4784      case ARMin_NCMovQ: {
4785         UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4786         UInt qM = qregEnc(i->ARMin.NCMovQ.src) << 1;
4787         UInt qD = qregEnc(i->ARMin.NCMovQ.dst) << 1;
4788         UInt vM = qM & 0xF;
4789         UInt vD = qD & 0xF;
4790         UInt M  = (qM >> 4) & 1;
4791         UInt D  = (qD >> 4) & 1;
4792         vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4793         /* b!cc here+8: !cc A00 0000 */
4794         UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4795         *p++ = insn;
4796         /* vmov qD, qM */
4797         insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4798                         vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4799         *p++ = insn;
4800         goto done;
4801      }
4802      case ARMin_Add32: {
4803         UInt regD = iregEnc(i->ARMin.Add32.rD);
4804         UInt regN = iregEnc(i->ARMin.Add32.rN);
4805         UInt imm32 = i->ARMin.Add32.imm32;
4806         vassert(regD != regN);
4807         /* MOV regD, imm32 */
4808         p = imm32_to_ireg((UInt *)p, regD, imm32);
4809         /* ADD regD, regN, regD */
4810         UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4811         *p++ = insn;
4812         goto done;
4813      }
4814
4815      case ARMin_EvCheck: {
4816         /* We generate:
4817               ldr  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
4818               subs r12, r12, #1  (A1)
4819               str  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
4820               bpl  nofail
4821               ldr  r12, [r8 + #0]   0 == offsetof(host_EvC_FAILADDR)
4822               bx   r12
4823              nofail:
4824         */
4825         UInt* p0 = p;
4826         p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4827                                i->ARMin.EvCheck.amCounter);
4828         *p++ = 0xE25CC001; /* subs r12, r12, #1 */
4829         p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
4830                                i->ARMin.EvCheck.amCounter);
4831         *p++ = 0x5A000001; /* bpl nofail */
4832         p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4833                                i->ARMin.EvCheck.amFailAddr);
4834         *p++ = 0xE12FFF1C; /* bx r12 */
4835         /* nofail: */
4836
4837         /* Crosscheck */
4838         vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
4839         goto done;
4840      }
4841
4842      case ARMin_ProfInc: {
4843         /* We generate:
4844              (ctrP is unknown now, so use 0x65556555 in the
4845              expectation that a later call to LibVEX_patchProfCtr
4846              will be used to fill in the immediate fields once the
4847              right value is known.)
4848            movw r12, lo16(0x65556555)
4849            movt r12, lo16(0x65556555)
4850            ldr  r11, [r12]
4851            adds r11, r11, #1
4852            str  r11, [r12]
4853            ldr  r11, [r12+4]
4854            adc  r11, r11, #0
4855            str  r11, [r12+4]
4856         */
4857         p = imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555);
4858         *p++ = 0xE59CB000;
4859         *p++ = 0xE29BB001;
4860         *p++ = 0xE58CB000;
4861         *p++ = 0xE59CB004;
4862         *p++ = 0xE2ABB000;
4863         *p++ = 0xE58CB004;
4864         /* Tell the caller .. */
4865         vassert(!(*is_profInc));
4866         *is_profInc = True;
4867         goto done;
4868      }
4869
4870      /* ... */
4871      default:
4872         goto bad;
4873    }
4874
4875  bad:
4876   ppARMInstr(i);
4877   vpanic("emit_ARMInstr");
4878   /*NOTREACHED*/
4879
4880  done:
4881   vassert(((UChar*)p) - &buf[0] <= 32);
4882   return ((UChar*)p) - &buf[0];
4883}
4884
4885
4886/* How big is an event check?  See case for ARMin_EvCheck in
4887   emit_ARMInstr just above.  That crosschecks what this returns, so
4888   we can tell if we're inconsistent. */
4889Int evCheckSzB_ARM (void)
4890{
4891   return 24;
4892}
4893
4894
4895/* NB: what goes on here has to be very closely coordinated with the
4896   emitInstr case for XDirect, above. */
4897VexInvalRange chainXDirect_ARM ( VexEndness endness_host,
4898                                 void* place_to_chain,
4899                                 const void* disp_cp_chain_me_EXPECTED,
4900                                 const void* place_to_jump_to )
4901{
4902   vassert(endness_host == VexEndnessLE);
4903
4904   /* What we're expecting to see is:
4905        movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
4906        movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
4907        blx  r12
4908      viz
4909        <8 bytes generated by imm32_to_ireg_EXACTLY2>
4910        E1 2F FF 3C
4911   */
4912   UInt* p = (UInt*)place_to_chain;
4913   vassert(0 == (3 & (HWord)p));
4914   vassert(is_imm32_to_ireg_EXACTLY2(
4915              p, /*r*/12, (UInt)(Addr)disp_cp_chain_me_EXPECTED));
4916   vassert(p[2] == 0xE12FFF3C);
4917   /* And what we want to change it to is either:
4918        (general case)
4919          movw r12, lo16(place_to_jump_to)
4920          movt r12, hi16(place_to_jump_to)
4921          bx   r12
4922        viz
4923          <8 bytes generated by imm32_to_ireg_EXACTLY2>
4924          E1 2F FF 1C
4925      ---OR---
4926        in the case where the displacement falls within 26 bits
4927          b disp24; undef; undef
4928        viz
4929          EA <3 bytes == disp24>
4930          FF 00 00 00
4931          FF 00 00 00
4932
4933      In both cases the replacement has the same length as the original.
4934      To remain sane & verifiable,
4935      (1) limit the displacement for the short form to
4936          (say) +/- 30 million, so as to avoid wraparound
4937          off-by-ones
4938      (2) even if the short form is applicable, once every (say)
4939          1024 times use the long form anyway, so as to maintain
4940          verifiability
4941   */
4942
4943   /* This is the delta we need to put into a B insn.  It's relative
4944      to the start of the next-but-one insn, hence the -8.  */
4945   Long delta   = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 8;
4946   Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
4947   vassert(0 == (delta & (Long)3));
4948
4949   static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4950   if (shortOK) {
4951      shortCTR++; // thread safety bleh
4952      if (0 == (shortCTR & 0x3FF)) {
4953         shortOK = False;
4954         if (0)
4955            vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
4956                       "using long form\n", shortCTR);
4957      }
4958   }
4959
4960   /* And make the modifications. */
4961   if (shortOK) {
4962      UInt uimm24      = (UInt)(delta >> 2);
4963      UInt uimm24_shl8 = uimm24 << 8;
4964      Int  simm24      = (Int)uimm24_shl8;
4965      simm24 >>= 8;
4966      vassert(uimm24 == simm24);
4967      p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
4968      p[1] = 0xFF000000;
4969      p[2] = 0xFF000000;
4970   } else {
4971      (void)imm32_to_ireg_EXACTLY2(
4972               p, /*r*/12, (UInt)(Addr)place_to_jump_to);
4973      p[2] = 0xE12FFF1C;
4974   }
4975
4976   VexInvalRange vir = {(HWord)p, 12};
4977   return vir;
4978}
4979
4980
4981/* NB: what goes on here has to be very closely coordinated with the
4982   emitInstr case for XDirect, above. */
4983VexInvalRange unchainXDirect_ARM ( VexEndness endness_host,
4984                                   void* place_to_unchain,
4985                                   const void* place_to_jump_to_EXPECTED,
4986                                   const void* disp_cp_chain_me )
4987{
4988   vassert(endness_host == VexEndnessLE);
4989
4990   /* What we're expecting to see is:
4991        (general case)
4992          movw r12, lo16(place_to_jump_to_EXPECTED)
4993          movt r12, lo16(place_to_jump_to_EXPECTED)
4994          bx   r12
4995        viz
4996          <8 bytes generated by imm32_to_ireg_EXACTLY2>
4997          E1 2F FF 1C
4998      ---OR---
4999        in the case where the displacement falls within 26 bits
5000          b disp24; undef; undef
5001        viz
5002          EA <3 bytes == disp24>
5003          FF 00 00 00
5004          FF 00 00 00
5005   */
5006   UInt* p = (UInt*)place_to_unchain;
5007   vassert(0 == (3 & (HWord)p));
5008
5009   Bool valid = False;
5010   if (is_imm32_to_ireg_EXACTLY2(
5011          p, /*r*/12, (UInt)(Addr)place_to_jump_to_EXPECTED)
5012       && p[2] == 0xE12FFF1C) {
5013      valid = True; /* it's the long form */
5014      if (0)
5015         vex_printf("QQQ unchainXDirect_ARM: found long form\n");
5016   } else
5017   if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
5018      /* It's the short form.  Check the displacement is right. */
5019      Int simm24 = p[0] & 0x00FFFFFF;
5020      simm24 <<= 8; simm24 >>= 8;
5021      if ((UChar*)p + (simm24 << 2) + 8 == place_to_jump_to_EXPECTED) {
5022         valid = True;
5023         if (0)
5024            vex_printf("QQQ unchainXDirect_ARM: found short form\n");
5025      }
5026   }
5027   vassert(valid);
5028
5029   /* And what we want to change it to is:
5030        movw r12, lo16(disp_cp_chain_me)
5031        movt r12, hi16(disp_cp_chain_me)
5032        blx  r12
5033      viz
5034        <8 bytes generated by imm32_to_ireg_EXACTLY2>
5035        E1 2F FF 3C
5036   */
5037   (void)imm32_to_ireg_EXACTLY2(
5038            p, /*r*/12, (UInt)(Addr)disp_cp_chain_me);
5039   p[2] = 0xE12FFF3C;
5040   VexInvalRange vir = {(HWord)p, 12};
5041   return vir;
5042}
5043
5044
5045/* Patch the counter address into a profile inc point, as previously
5046   created by the ARMin_ProfInc case for emit_ARMInstr. */
5047VexInvalRange patchProfInc_ARM ( VexEndness endness_host,
5048                                 void*  place_to_patch,
5049                                 const ULong* location_of_counter )
5050{
5051   vassert(endness_host == VexEndnessLE);
5052   vassert(sizeof(ULong*) == 4);
5053   UInt* p = (UInt*)place_to_patch;
5054   vassert(0 == (3 & (HWord)p));
5055   vassert(is_imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555));
5056   vassert(p[2] == 0xE59CB000);
5057   vassert(p[3] == 0xE29BB001);
5058   vassert(p[4] == 0xE58CB000);
5059   vassert(p[5] == 0xE59CB004);
5060   vassert(p[6] == 0xE2ABB000);
5061   vassert(p[7] == 0xE58CB004);
5062   imm32_to_ireg_EXACTLY2(p, /*r*/12, (UInt)(Addr)location_of_counter);
5063   VexInvalRange vir = {(HWord)p, 8};
5064   return vir;
5065}
5066
5067
5068#undef BITS4
5069#undef X0000
5070#undef X0001
5071#undef X0010
5072#undef X0011
5073#undef X0100
5074#undef X0101
5075#undef X0110
5076#undef X0111
5077#undef X1000
5078#undef X1001
5079#undef X1010
5080#undef X1011
5081#undef X1100
5082#undef X1101
5083#undef X1110
5084#undef X1111
5085#undef XXXXX___
5086#undef XXXXXX__
5087#undef XXX___XX
5088#undef XXXXX__X
5089#undef XXXXXXXX
5090#undef XX______
5091
5092/*---------------------------------------------------------------*/
5093/*--- end                                     host_arm_defs.c ---*/
5094/*---------------------------------------------------------------*/
5095