1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_arm_defs.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2015 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2015 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex.h"
38#include "libvex_trc_values.h"
39
40#include "main_util.h"
41#include "host_generic_regs.h"
42#include "host_arm_defs.h"
43
44UInt arm_hwcaps = 0;
45
46
47/* --------- Registers. --------- */
48
49const RRegUniverse* getRRegUniverse_ARM ( void )
50{
51   /* The real-register universe is a big constant, so we just want to
52      initialise it once. */
53   static RRegUniverse rRegUniverse_ARM;
54   static Bool         rRegUniverse_ARM_initted = False;
55
56   /* Handy shorthand, nothing more */
57   RRegUniverse* ru = &rRegUniverse_ARM;
58
59   /* This isn't thread-safe.  Sigh. */
60   if (LIKELY(rRegUniverse_ARM_initted))
61      return ru;
62
63   RRegUniverse__init(ru);
64
65   /* Add the registers.  The initial segment of this array must be
66      those available for allocation by reg-alloc, and those that
67      follow are not available for allocation. */
68
69   /* Callee saves ones are listed first, since we prefer them
70      if they're available. */
71   ru->regs[ru->size++] = hregARM_R4();
72   ru->regs[ru->size++] = hregARM_R5();
73   ru->regs[ru->size++] = hregARM_R6();
74   ru->regs[ru->size++] = hregARM_R7();
75   ru->regs[ru->size++] = hregARM_R10();
76   ru->regs[ru->size++] = hregARM_R11();
77   /* Otherwise we'll have to slum it out with caller-saves ones. */
78   ru->regs[ru->size++] = hregARM_R0();
79   ru->regs[ru->size++] = hregARM_R1();
80   ru->regs[ru->size++] = hregARM_R2();
81   ru->regs[ru->size++] = hregARM_R3();
82   ru->regs[ru->size++] = hregARM_R9();
83   /* FP registers.  Note: these are all callee-save.  Yay!  Hence we
84      don't need to mention them as trashed in getHRegUsage for
85      ARMInstr_Call. */
86   ru->regs[ru->size++] = hregARM_D8();
87   ru->regs[ru->size++] = hregARM_D9();
88   ru->regs[ru->size++] = hregARM_D10();
89   ru->regs[ru->size++] = hregARM_D11();
90   ru->regs[ru->size++] = hregARM_D12();
91   ru->regs[ru->size++] = hregARM_S26();
92   ru->regs[ru->size++] = hregARM_S27();
93   ru->regs[ru->size++] = hregARM_S28();
94   ru->regs[ru->size++] = hregARM_S29();
95   ru->regs[ru->size++] = hregARM_S30();
96   ru->regs[ru->size++] = hregARM_Q8();
97   ru->regs[ru->size++] = hregARM_Q9();
98   ru->regs[ru->size++] = hregARM_Q10();
99   ru->regs[ru->size++] = hregARM_Q11();
100   ru->regs[ru->size++] = hregARM_Q12();
101   ru->allocable = ru->size;
102
103   /* And other regs, not available to the allocator. */
104
105   // unavail: r8 as GSP
106   // r12 is used as a spill/reload temporary
107   // r13 as SP
108   // r14 as LR
109   // r15 as PC
110   //
111   // All in all, we have 11 allocatable integer registers:
112   // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
113   // and r12 dedicated as a spill temporary.
114   // 13 14 and 15 are not under the allocator's control.
115   //
116   // Hence for the allocatable registers we have:
117   //
118   // callee-saved: 4 5 6 7 (8) 9 10 11
119   // caller-saved: 0 1 2 3
120   // Note 9 is ambiguous: the base EABI does not give an e/r-saved
121   // designation for it, but the Linux instantiation of the ABI
122   // specifies it as callee-saved.
123   //
124   // If the set of available registers changes or if the e/r status
125   // changes, be sure to re-check/sync the definition of
126   // getHRegUsage for ARMInstr_Call too.
127   ru->regs[ru->size++] = hregARM_R8();
128   ru->regs[ru->size++] = hregARM_R12();
129   ru->regs[ru->size++] = hregARM_R13();
130   ru->regs[ru->size++] = hregARM_R14();
131   ru->regs[ru->size++] = hregARM_R15();
132   ru->regs[ru->size++] = hregARM_Q13();
133   ru->regs[ru->size++] = hregARM_Q14();
134   ru->regs[ru->size++] = hregARM_Q15();
135
136   rRegUniverse_ARM_initted = True;
137
138   RRegUniverse__check_is_sane(ru);
139   return ru;
140}
141
142
143void ppHRegARM ( HReg reg )  {
144   Int r;
145   /* Be generic for all virtual regs. */
146   if (hregIsVirtual(reg)) {
147      ppHReg(reg);
148      return;
149   }
150   /* But specific for real regs. */
151   switch (hregClass(reg)) {
152      case HRcInt32:
153         r = hregEncoding(reg);
154         vassert(r >= 0 && r < 16);
155         vex_printf("r%d", r);
156         return;
157      case HRcFlt64:
158         r = hregEncoding(reg);
159         vassert(r >= 0 && r < 32);
160         vex_printf("d%d", r);
161         return;
162      case HRcFlt32:
163         r = hregEncoding(reg);
164         vassert(r >= 0 && r < 32);
165         vex_printf("s%d", r);
166         return;
167      case HRcVec128:
168         r = hregEncoding(reg);
169         vassert(r >= 0 && r < 16);
170         vex_printf("q%d", r);
171         return;
172      default:
173         vpanic("ppHRegARM");
174   }
175}
176
177
178/* --------- Condition codes, ARM encoding. --------- */
179
180const HChar* showARMCondCode ( ARMCondCode cond ) {
181   switch (cond) {
182       case ARMcc_EQ:  return "eq";
183       case ARMcc_NE:  return "ne";
184       case ARMcc_HS:  return "hs";
185       case ARMcc_LO:  return "lo";
186       case ARMcc_MI:  return "mi";
187       case ARMcc_PL:  return "pl";
188       case ARMcc_VS:  return "vs";
189       case ARMcc_VC:  return "vc";
190       case ARMcc_HI:  return "hi";
191       case ARMcc_LS:  return "ls";
192       case ARMcc_GE:  return "ge";
193       case ARMcc_LT:  return "lt";
194       case ARMcc_GT:  return "gt";
195       case ARMcc_LE:  return "le";
196       case ARMcc_AL:  return "al"; // default
197       case ARMcc_NV:  return "nv";
198       default: vpanic("showARMCondCode");
199   }
200}
201
202
203/* --------- Mem AModes: Addressing Mode 1 --------- */
204
205ARMAMode1* ARMAMode1_RI  ( HReg reg, Int simm13 ) {
206   ARMAMode1* am        = LibVEX_Alloc_inline(sizeof(ARMAMode1));
207   am->tag              = ARMam1_RI;
208   am->ARMam1.RI.reg    = reg;
209   am->ARMam1.RI.simm13 = simm13;
210   vassert(-4095 <= simm13 && simm13 <= 4095);
211   return am;
212}
213ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
214   ARMAMode1* am        = LibVEX_Alloc_inline(sizeof(ARMAMode1));
215   am->tag              = ARMam1_RRS;
216   am->ARMam1.RRS.base  = base;
217   am->ARMam1.RRS.index = index;
218   am->ARMam1.RRS.shift = shift;
219   vassert(0 <= shift && shift <= 3);
220   return am;
221}
222
223void ppARMAMode1 ( ARMAMode1* am ) {
224   switch (am->tag) {
225      case ARMam1_RI:
226         vex_printf("%d(", am->ARMam1.RI.simm13);
227         ppHRegARM(am->ARMam1.RI.reg);
228         vex_printf(")");
229         break;
230      case ARMam1_RRS:
231         vex_printf("(");
232         ppHRegARM(am->ARMam1.RRS.base);
233         vex_printf(",");
234         ppHRegARM(am->ARMam1.RRS.index);
235         vex_printf(",%u)", am->ARMam1.RRS.shift);
236         break;
237      default:
238         vassert(0);
239   }
240}
241
242static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
243   switch (am->tag) {
244      case ARMam1_RI:
245         addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
246         return;
247      case ARMam1_RRS:
248         //    addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
249         //    addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
250         //   return;
251      default:
252         vpanic("addRegUsage_ARMAmode1");
253   }
254}
255
256static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
257   switch (am->tag) {
258      case ARMam1_RI:
259         am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
260         return;
261      case ARMam1_RRS:
262         //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
263         //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
264         //return;
265      default:
266         vpanic("mapRegs_ARMAmode1");
267   }
268}
269
270
271/* --------- Mem AModes: Addressing Mode 2 --------- */
272
273ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
274   ARMAMode2* am       = LibVEX_Alloc_inline(sizeof(ARMAMode2));
275   am->tag             = ARMam2_RI;
276   am->ARMam2.RI.reg   = reg;
277   am->ARMam2.RI.simm9 = simm9;
278   vassert(-255 <= simm9 && simm9 <= 255);
279   return am;
280}
281ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
282   ARMAMode2* am       = LibVEX_Alloc_inline(sizeof(ARMAMode2));
283   am->tag             = ARMam2_RR;
284   am->ARMam2.RR.base  = base;
285   am->ARMam2.RR.index = index;
286   return am;
287}
288
289void ppARMAMode2 ( ARMAMode2* am ) {
290   switch (am->tag) {
291      case ARMam2_RI:
292         vex_printf("%d(", am->ARMam2.RI.simm9);
293         ppHRegARM(am->ARMam2.RI.reg);
294         vex_printf(")");
295         break;
296      case ARMam2_RR:
297         vex_printf("(");
298         ppHRegARM(am->ARMam2.RR.base);
299         vex_printf(",");
300         ppHRegARM(am->ARMam2.RR.index);
301         vex_printf(")");
302         break;
303      default:
304         vassert(0);
305   }
306}
307
308static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
309   switch (am->tag) {
310      case ARMam2_RI:
311         addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
312         return;
313      case ARMam2_RR:
314         //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
315         //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
316         //   return;
317      default:
318         vpanic("addRegUsage_ARMAmode2");
319   }
320}
321
322static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
323   switch (am->tag) {
324      case ARMam2_RI:
325         am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
326         return;
327      case ARMam2_RR:
328         //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
329         //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
330         //return;
331      default:
332         vpanic("mapRegs_ARMAmode2");
333   }
334}
335
336
337/* --------- Mem AModes: Addressing Mode VFP --------- */
338
339ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
340   ARMAModeV* am = LibVEX_Alloc_inline(sizeof(ARMAModeV));
341   vassert(simm11 >= -1020 && simm11 <= 1020);
342   vassert(0 == (simm11 & 3));
343   am->reg    = reg;
344   am->simm11 = simm11;
345   return am;
346}
347
348void ppARMAModeV ( ARMAModeV* am ) {
349   vex_printf("%d(", am->simm11);
350   ppHRegARM(am->reg);
351   vex_printf(")");
352}
353
354static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
355   addHRegUse(u, HRmRead, am->reg);
356}
357
358static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
359   am->reg = lookupHRegRemap(m, am->reg);
360}
361
362
363/* --------- Mem AModes: Addressing Mode Neon ------- */
364
365ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
366   ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
367   am->tag = ARMamN_RR;
368   am->ARMamN.RR.rN = rN;
369   am->ARMamN.RR.rM = rM;
370   return am;
371}
372
373ARMAModeN *mkARMAModeN_R ( HReg rN ) {
374   ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
375   am->tag = ARMamN_R;
376   am->ARMamN.R.rN = rN;
377   return am;
378}
379
380static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
381   if (am->tag == ARMamN_R) {
382      addHRegUse(u, HRmRead, am->ARMamN.R.rN);
383   } else {
384      addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
385      addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
386   }
387}
388
389static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
390   if (am->tag == ARMamN_R) {
391      am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
392   } else {
393      am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
394      am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
395   }
396}
397
398void ppARMAModeN ( ARMAModeN* am ) {
399   vex_printf("[");
400   if (am->tag == ARMamN_R) {
401      ppHRegARM(am->ARMamN.R.rN);
402   } else {
403      ppHRegARM(am->ARMamN.RR.rN);
404   }
405   vex_printf("]");
406   if (am->tag == ARMamN_RR) {
407      vex_printf(", ");
408      ppHRegARM(am->ARMamN.RR.rM);
409   }
410}
411
412
413/* --------- Reg or imm-8x4 operands --------- */
414
415static UInt ROR32 ( UInt x, UInt sh ) {
416   vassert(sh >= 0 && sh < 32);
417   if (sh == 0)
418      return x;
419   else
420      return (x << (32-sh)) | (x >> sh);
421}
422
423ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
424   ARMRI84* ri84          = LibVEX_Alloc_inline(sizeof(ARMRI84));
425   ri84->tag              = ARMri84_I84;
426   ri84->ARMri84.I84.imm8 = imm8;
427   ri84->ARMri84.I84.imm4 = imm4;
428   vassert(imm8 >= 0 && imm8 <= 255);
429   vassert(imm4 >= 0 && imm4 <= 15);
430   return ri84;
431}
432ARMRI84* ARMRI84_R ( HReg reg ) {
433   ARMRI84* ri84       = LibVEX_Alloc_inline(sizeof(ARMRI84));
434   ri84->tag           = ARMri84_R;
435   ri84->ARMri84.R.reg = reg;
436   return ri84;
437}
438
439void ppARMRI84 ( ARMRI84* ri84 ) {
440   switch (ri84->tag) {
441      case ARMri84_I84:
442         vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
443                                  2 * ri84->ARMri84.I84.imm4));
444         break;
445      case ARMri84_R:
446         ppHRegARM(ri84->ARMri84.R.reg);
447         break;
448      default:
449         vassert(0);
450   }
451}
452
453static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
454   switch (ri84->tag) {
455      case ARMri84_I84:
456         return;
457      case ARMri84_R:
458         addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
459         return;
460      default:
461         vpanic("addRegUsage_ARMRI84");
462   }
463}
464
465static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
466   switch (ri84->tag) {
467      case ARMri84_I84:
468         return;
469      case ARMri84_R:
470         ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
471         return;
472      default:
473         vpanic("mapRegs_ARMRI84");
474   }
475}
476
477
478/* --------- Reg or imm5 operands --------- */
479
480ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
481   ARMRI5* ri5         = LibVEX_Alloc_inline(sizeof(ARMRI5));
482   ri5->tag            = ARMri5_I5;
483   ri5->ARMri5.I5.imm5 = imm5;
484   vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
485   return ri5;
486}
487ARMRI5* ARMRI5_R ( HReg reg ) {
488   ARMRI5* ri5       = LibVEX_Alloc_inline(sizeof(ARMRI5));
489   ri5->tag          = ARMri5_R;
490   ri5->ARMri5.R.reg = reg;
491   return ri5;
492}
493
494void ppARMRI5 ( ARMRI5* ri5 ) {
495   switch (ri5->tag) {
496      case ARMri5_I5:
497         vex_printf("%u", ri5->ARMri5.I5.imm5);
498         break;
499      case ARMri5_R:
500         ppHRegARM(ri5->ARMri5.R.reg);
501         break;
502      default:
503         vassert(0);
504   }
505}
506
507static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
508   switch (ri5->tag) {
509      case ARMri5_I5:
510         return;
511      case ARMri5_R:
512         addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
513         return;
514      default:
515         vpanic("addRegUsage_ARMRI5");
516   }
517}
518
519static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
520   switch (ri5->tag) {
521      case ARMri5_I5:
522         return;
523      case ARMri5_R:
524         ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
525         return;
526      default:
527         vpanic("mapRegs_ARMRI5");
528   }
529}
530
531/* -------- Neon Immediate operatnd --------- */
532
533ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
534   ARMNImm* i = LibVEX_Alloc_inline(sizeof(ARMNImm));
535   i->type = type;
536   i->imm8 = imm8;
537   return i;
538}
539
540ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
541   int i, j;
542   ULong y, x = imm->imm8;
543   switch (imm->type) {
544      case 3:
545         x = x << 8; /* fallthrough */
546      case 2:
547         x = x << 8; /* fallthrough */
548      case 1:
549         x = x << 8; /* fallthrough */
550      case 0:
551         return (x << 32) | x;
552      case 5:
553      case 6:
554         if (imm->type == 5)
555            x = x << 8;
556         else
557            x = (x << 8) | x;
558         /* fallthrough */
559      case 4:
560         x = (x << 16) | x;
561         return (x << 32) | x;
562      case 8:
563         x = (x << 8) | 0xFF;
564         /* fallthrough */
565      case 7:
566         x = (x << 8) | 0xFF;
567         return (x << 32) | x;
568      case 9:
569         x = 0;
570         for (i = 7; i >= 0; i--) {
571            y = ((ULong)imm->imm8 >> i) & 1;
572            for (j = 0; j < 8; j++) {
573               x = (x << 1) | y;
574            }
575         }
576         return x;
577      case 10:
578         x |= (x & 0x80) << 5;
579         x |= (~x & 0x40) << 5;
580         x &= 0x187F; /* 0001 1000 0111 1111 */
581         x |= (x & 0x40) << 4;
582         x |= (x & 0x40) << 3;
583         x |= (x & 0x40) << 2;
584         x |= (x & 0x40) << 1;
585         x = x << 19;
586         x = (x << 32) | x;
587         return x;
588      default:
589         vpanic("ARMNImm_to_Imm64");
590   }
591}
592
593ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
594   ARMNImm tmp;
595   if ((x & 0xFFFFFFFF) == (x >> 32)) {
596      if ((x & 0xFFFFFF00) == 0)
597         return ARMNImm_TI(0, x & 0xFF);
598      if ((x & 0xFFFF00FF) == 0)
599         return ARMNImm_TI(1, (x >> 8) & 0xFF);
600      if ((x & 0xFF00FFFF) == 0)
601         return ARMNImm_TI(2, (x >> 16) & 0xFF);
602      if ((x & 0x00FFFFFF) == 0)
603         return ARMNImm_TI(3, (x >> 24) & 0xFF);
604      if ((x & 0xFFFF00FF) == 0xFF)
605         return ARMNImm_TI(7, (x >> 8) & 0xFF);
606      if ((x & 0xFF00FFFF) == 0xFFFF)
607         return ARMNImm_TI(8, (x >> 16) & 0xFF);
608      if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
609         if ((x & 0xFF00) == 0)
610            return ARMNImm_TI(4, x & 0xFF);
611         if ((x & 0x00FF) == 0)
612            return ARMNImm_TI(5, (x >> 8) & 0xFF);
613         if ((x & 0xFF) == ((x >> 8) & 0xFF))
614            return ARMNImm_TI(6, x & 0xFF);
615      }
616      if ((x & 0x7FFFF) == 0) {
617         tmp.type = 10;
618         tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
619         if (ARMNImm_to_Imm64(&tmp) == x)
620            return ARMNImm_TI(tmp.type, tmp.imm8);
621      }
622   } else {
623      /* This can only be type 9. */
624      tmp.imm8 = (((x >> 56) & 1) << 7)
625               | (((x >> 48) & 1) << 6)
626               | (((x >> 40) & 1) << 5)
627               | (((x >> 32) & 1) << 4)
628               | (((x >> 24) & 1) << 3)
629               | (((x >> 16) & 1) << 2)
630               | (((x >>  8) & 1) << 1)
631               | (((x >>  0) & 1) << 0);
632      tmp.type = 9;
633      if (ARMNImm_to_Imm64 (&tmp) == x)
634         return ARMNImm_TI(tmp.type, tmp.imm8);
635   }
636   return NULL;
637}
638
639void ppARMNImm (ARMNImm* i) {
640   ULong x = ARMNImm_to_Imm64(i);
641   vex_printf("0x%llX%llX", x, x);
642}
643
644/* -- Register or scalar operand --- */
645
646ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
647{
648   ARMNRS *p = LibVEX_Alloc_inline(sizeof(ARMNRS));
649   p->tag = tag;
650   p->reg = reg;
651   p->index = index;
652   return p;
653}
654
655void ppARMNRS(ARMNRS *p)
656{
657   ppHRegARM(p->reg);
658   if (p->tag == ARMNRS_Scalar) {
659      vex_printf("[%u]", p->index);
660   }
661}
662
663/* --------- Instructions. --------- */
664
665const HChar* showARMAluOp ( ARMAluOp op ) {
666   switch (op) {
667      case ARMalu_ADD:  return "add";
668      case ARMalu_ADDS: return "adds";
669      case ARMalu_ADC:  return "adc";
670      case ARMalu_SUB:  return "sub";
671      case ARMalu_SUBS: return "subs";
672      case ARMalu_SBC:  return "sbc";
673      case ARMalu_AND:  return "and";
674      case ARMalu_BIC:  return "bic";
675      case ARMalu_OR:   return "orr";
676      case ARMalu_XOR:  return "xor";
677      default: vpanic("showARMAluOp");
678   }
679}
680
681const HChar* showARMShiftOp ( ARMShiftOp op ) {
682   switch (op) {
683      case ARMsh_SHL: return "shl";
684      case ARMsh_SHR: return "shr";
685      case ARMsh_SAR: return "sar";
686      default: vpanic("showARMShiftOp");
687   }
688}
689
690const HChar* showARMUnaryOp ( ARMUnaryOp op ) {
691   switch (op) {
692      case ARMun_NEG: return "neg";
693      case ARMun_NOT: return "not";
694      case ARMun_CLZ: return "clz";
695      default: vpanic("showARMUnaryOp");
696   }
697}
698
699const HChar* showARMMulOp ( ARMMulOp op ) {
700   switch (op) {
701      case ARMmul_PLAIN: return "mul";
702      case ARMmul_ZX:    return "umull";
703      case ARMmul_SX:    return "smull";
704      default: vpanic("showARMMulOp");
705   }
706}
707
708const HChar* showARMVfpOp ( ARMVfpOp op ) {
709   switch (op) {
710      case ARMvfp_ADD: return "add";
711      case ARMvfp_SUB: return "sub";
712      case ARMvfp_MUL: return "mul";
713      case ARMvfp_DIV: return "div";
714      default: vpanic("showARMVfpOp");
715   }
716}
717
718const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
719   switch (op) {
720      case ARMvfpu_COPY: return "cpy";
721      case ARMvfpu_NEG:  return "neg";
722      case ARMvfpu_ABS:  return "abs";
723      case ARMvfpu_SQRT: return "sqrt";
724      default: vpanic("showARMVfpUnaryOp");
725   }
726}
727
728const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
729   switch (op) {
730      case ARMneon_VAND: return "vand";
731      case ARMneon_VORR: return "vorr";
732      case ARMneon_VXOR: return "veor";
733      case ARMneon_VADD: return "vadd";
734      case ARMneon_VRHADDS: return "vrhadd";
735      case ARMneon_VRHADDU: return "vrhadd";
736      case ARMneon_VADDFP: return "vadd";
737      case ARMneon_VPADDFP: return "vpadd";
738      case ARMneon_VABDFP: return "vabd";
739      case ARMneon_VSUB: return "vsub";
740      case ARMneon_VSUBFP: return "vsub";
741      case ARMneon_VMINU: return "vmin";
742      case ARMneon_VMINS: return "vmin";
743      case ARMneon_VMINF: return "vmin";
744      case ARMneon_VMAXU: return "vmax";
745      case ARMneon_VMAXS: return "vmax";
746      case ARMneon_VMAXF: return "vmax";
747      case ARMneon_VQADDU: return "vqadd";
748      case ARMneon_VQADDS: return "vqadd";
749      case ARMneon_VQSUBU: return "vqsub";
750      case ARMneon_VQSUBS: return "vqsub";
751      case ARMneon_VCGTU:  return "vcgt";
752      case ARMneon_VCGTS:  return "vcgt";
753      case ARMneon_VCGTF:  return "vcgt";
754      case ARMneon_VCGEF:  return "vcgt";
755      case ARMneon_VCGEU:  return "vcge";
756      case ARMneon_VCGES:  return "vcge";
757      case ARMneon_VCEQ:  return "vceq";
758      case ARMneon_VCEQF:  return "vceq";
759      case ARMneon_VPADD:   return "vpadd";
760      case ARMneon_VPMINU:   return "vpmin";
761      case ARMneon_VPMINS:   return "vpmin";
762      case ARMneon_VPMINF:   return "vpmin";
763      case ARMneon_VPMAXU:   return "vpmax";
764      case ARMneon_VPMAXS:   return "vpmax";
765      case ARMneon_VPMAXF:   return "vpmax";
766      case ARMneon_VEXT:   return "vext";
767      case ARMneon_VMUL:   return "vmuli";
768      case ARMneon_VMULLU:   return "vmull";
769      case ARMneon_VMULLS:   return "vmull";
770      case ARMneon_VMULP:  return "vmul";
771      case ARMneon_VMULFP:  return "vmul";
772      case ARMneon_VMULLP:  return "vmul";
773      case ARMneon_VQDMULH: return "vqdmulh";
774      case ARMneon_VQRDMULH: return "vqrdmulh";
775      case ARMneon_VQDMULL: return "vqdmull";
776      case ARMneon_VTBL: return "vtbl";
777      case ARMneon_VRECPS: return "vrecps";
778      case ARMneon_VRSQRTS: return "vrecps";
779      case ARMneon_INVALID: return "??invalid??";
780      /* ... */
781      default: vpanic("showARMNeonBinOp");
782   }
783}
784
785const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
786   switch (op) {
787      case ARMneon_VAND:
788      case ARMneon_VORR:
789      case ARMneon_VXOR:
790         return "";
791      case ARMneon_VADD:
792      case ARMneon_VSUB:
793      case ARMneon_VEXT:
794      case ARMneon_VMUL:
795      case ARMneon_VPADD:
796      case ARMneon_VTBL:
797      case ARMneon_VCEQ:
798         return ".i";
799      case ARMneon_VRHADDU:
800      case ARMneon_VMINU:
801      case ARMneon_VMAXU:
802      case ARMneon_VQADDU:
803      case ARMneon_VQSUBU:
804      case ARMneon_VCGTU:
805      case ARMneon_VCGEU:
806      case ARMneon_VMULLU:
807      case ARMneon_VPMINU:
808      case ARMneon_VPMAXU:
809         return ".u";
810      case ARMneon_VRHADDS:
811      case ARMneon_VMINS:
812      case ARMneon_VMAXS:
813      case ARMneon_VQADDS:
814      case ARMneon_VQSUBS:
815      case ARMneon_VCGTS:
816      case ARMneon_VCGES:
817      case ARMneon_VQDMULL:
818      case ARMneon_VMULLS:
819      case ARMneon_VPMINS:
820      case ARMneon_VPMAXS:
821      case ARMneon_VQDMULH:
822      case ARMneon_VQRDMULH:
823         return ".s";
824      case ARMneon_VMULP:
825      case ARMneon_VMULLP:
826         return ".p";
827      case ARMneon_VADDFP:
828      case ARMneon_VABDFP:
829      case ARMneon_VPADDFP:
830      case ARMneon_VSUBFP:
831      case ARMneon_VMULFP:
832      case ARMneon_VMINF:
833      case ARMneon_VMAXF:
834      case ARMneon_VPMINF:
835      case ARMneon_VPMAXF:
836      case ARMneon_VCGTF:
837      case ARMneon_VCGEF:
838      case ARMneon_VCEQF:
839      case ARMneon_VRECPS:
840      case ARMneon_VRSQRTS:
841         return ".f";
842      /* ... */
843      default: vpanic("showARMNeonBinOpDataType");
844   }
845}
846
847const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
848   switch (op) {
849      case ARMneon_COPY: return "vmov";
850      case ARMneon_COPYLS: return "vmov";
851      case ARMneon_COPYLU: return "vmov";
852      case ARMneon_COPYN: return "vmov";
853      case ARMneon_COPYQNSS: return "vqmovn";
854      case ARMneon_COPYQNUS: return "vqmovun";
855      case ARMneon_COPYQNUU: return "vqmovn";
856      case ARMneon_NOT: return "vmvn";
857      case ARMneon_EQZ: return "vceq";
858      case ARMneon_CNT: return "vcnt";
859      case ARMneon_CLS: return "vcls";
860      case ARMneon_CLZ: return "vclz";
861      case ARMneon_DUP: return "vdup";
862      case ARMneon_PADDLS: return "vpaddl";
863      case ARMneon_PADDLU: return "vpaddl";
864      case ARMneon_VQSHLNSS: return "vqshl";
865      case ARMneon_VQSHLNUU: return "vqshl";
866      case ARMneon_VQSHLNUS: return "vqshlu";
867      case ARMneon_REV16: return "vrev16";
868      case ARMneon_REV32: return "vrev32";
869      case ARMneon_REV64: return "vrev64";
870      case ARMneon_VCVTFtoU: return "vcvt";
871      case ARMneon_VCVTFtoS: return "vcvt";
872      case ARMneon_VCVTUtoF: return "vcvt";
873      case ARMneon_VCVTStoF: return "vcvt";
874      case ARMneon_VCVTFtoFixedU: return "vcvt";
875      case ARMneon_VCVTFtoFixedS: return "vcvt";
876      case ARMneon_VCVTFixedUtoF: return "vcvt";
877      case ARMneon_VCVTFixedStoF: return "vcvt";
878      case ARMneon_VCVTF32toF16: return "vcvt";
879      case ARMneon_VCVTF16toF32: return "vcvt";
880      case ARMneon_VRECIP: return "vrecip";
881      case ARMneon_VRECIPF: return "vrecipf";
882      case ARMneon_VNEGF: return "vneg";
883      case ARMneon_ABS: return "vabs";
884      case ARMneon_VABSFP: return "vabsfp";
885      case ARMneon_VRSQRTEFP: return "vrsqrtefp";
886      case ARMneon_VRSQRTE: return "vrsqrte";
887      /* ... */
888      default: vpanic("showARMNeonUnOp");
889   }
890}
891
892const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
893   switch (op) {
894      case ARMneon_COPY:
895      case ARMneon_NOT:
896         return "";
897      case ARMneon_COPYN:
898      case ARMneon_EQZ:
899      case ARMneon_CNT:
900      case ARMneon_DUP:
901      case ARMneon_REV16:
902      case ARMneon_REV32:
903      case ARMneon_REV64:
904         return ".i";
905      case ARMneon_COPYLU:
906      case ARMneon_PADDLU:
907      case ARMneon_COPYQNUU:
908      case ARMneon_VQSHLNUU:
909      case ARMneon_VRECIP:
910      case ARMneon_VRSQRTE:
911         return ".u";
912      case ARMneon_CLS:
913      case ARMneon_CLZ:
914      case ARMneon_COPYLS:
915      case ARMneon_PADDLS:
916      case ARMneon_COPYQNSS:
917      case ARMneon_COPYQNUS:
918      case ARMneon_VQSHLNSS:
919      case ARMneon_VQSHLNUS:
920      case ARMneon_ABS:
921         return ".s";
922      case ARMneon_VRECIPF:
923      case ARMneon_VNEGF:
924      case ARMneon_VABSFP:
925      case ARMneon_VRSQRTEFP:
926         return ".f";
927      case ARMneon_VCVTFtoU: return ".u32.f32";
928      case ARMneon_VCVTFtoS: return ".s32.f32";
929      case ARMneon_VCVTUtoF: return ".f32.u32";
930      case ARMneon_VCVTStoF: return ".f32.s32";
931      case ARMneon_VCVTF16toF32: return ".f32.f16";
932      case ARMneon_VCVTF32toF16: return ".f16.f32";
933      case ARMneon_VCVTFtoFixedU: return ".u32.f32";
934      case ARMneon_VCVTFtoFixedS: return ".s32.f32";
935      case ARMneon_VCVTFixedUtoF: return ".f32.u32";
936      case ARMneon_VCVTFixedStoF: return ".f32.s32";
937      /* ... */
938      default: vpanic("showARMNeonUnOpDataType");
939   }
940}
941
942const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
943   switch (op) {
944      case ARMneon_SETELEM: return "vmov";
945      case ARMneon_GETELEMU: return "vmov";
946      case ARMneon_GETELEMS: return "vmov";
947      case ARMneon_VDUP: return "vdup";
948      /* ... */
949      default: vpanic("showARMNeonUnarySOp");
950   }
951}
952
953const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
954   switch (op) {
955      case ARMneon_SETELEM:
956      case ARMneon_VDUP:
957         return ".i";
958      case ARMneon_GETELEMS:
959         return ".s";
960      case ARMneon_GETELEMU:
961         return ".u";
962      /* ... */
963      default: vpanic("showARMNeonUnarySOp");
964   }
965}
966
967const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
968   switch (op) {
969      case ARMneon_VSHL: return "vshl";
970      case ARMneon_VSAL: return "vshl";
971      case ARMneon_VQSHL: return "vqshl";
972      case ARMneon_VQSAL: return "vqshl";
973      /* ... */
974      default: vpanic("showARMNeonShiftOp");
975   }
976}
977
978const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
979   switch (op) {
980      case ARMneon_VSHL:
981      case ARMneon_VQSHL:
982         return ".u";
983      case ARMneon_VSAL:
984      case ARMneon_VQSAL:
985         return ".s";
986      /* ... */
987      default: vpanic("showARMNeonShiftOpDataType");
988   }
989}
990
991const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
992   switch (op) {
993      case ARMneon_TRN: return "vtrn";
994      case ARMneon_ZIP: return "vzip";
995      case ARMneon_UZP: return "vuzp";
996      /* ... */
997      default: vpanic("showARMNeonDualOp");
998   }
999}
1000
1001const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1002   switch (op) {
1003      case ARMneon_TRN:
1004      case ARMneon_ZIP:
1005      case ARMneon_UZP:
1006         return "i";
1007      /* ... */
1008      default: vpanic("showARMNeonDualOp");
1009   }
1010}
1011
1012static const HChar* showARMNeonDataSize_wrk ( UInt size )
1013{
1014   switch (size) {
1015      case 0: return "8";
1016      case 1: return "16";
1017      case 2: return "32";
1018      case 3: return "64";
1019      default: vpanic("showARMNeonDataSize");
1020   }
1021}
1022
1023static const HChar* showARMNeonDataSize ( const ARMInstr* i )
1024{
1025   switch (i->tag) {
1026      case ARMin_NBinary:
1027         if (i->ARMin.NBinary.op == ARMneon_VEXT)
1028            return "8";
1029         if (i->ARMin.NBinary.op == ARMneon_VAND ||
1030             i->ARMin.NBinary.op == ARMneon_VORR ||
1031             i->ARMin.NBinary.op == ARMneon_VXOR)
1032            return "";
1033         return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1034      case ARMin_NUnary:
1035         if (i->ARMin.NUnary.op == ARMneon_COPY ||
1036             i->ARMin.NUnary.op == ARMneon_NOT ||
1037             i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1038             i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1039             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1040             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1041             i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1042             i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1043             i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1044             i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1045             i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1046             i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1047            return "";
1048         if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1049             i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1050             i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1051            UInt size;
1052            size = i->ARMin.NUnary.size;
1053            if (size & 0x40)
1054               return "64";
1055            if (size & 0x20)
1056               return "32";
1057            if (size & 0x10)
1058               return "16";
1059            if (size & 0x08)
1060               return "8";
1061            vpanic("showARMNeonDataSize");
1062         }
1063         return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1064      case ARMin_NUnaryS:
1065         if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1066            int size;
1067            size = i->ARMin.NUnaryS.size;
1068            if ((size & 1) == 1)
1069               return "8";
1070            if ((size & 3) == 2)
1071               return "16";
1072            if ((size & 7) == 4)
1073               return "32";
1074            vpanic("showARMNeonDataSize");
1075         }
1076         return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1077      case ARMin_NShift:
1078         return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1079      case ARMin_NDual:
1080         return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1081      default:
1082         vpanic("showARMNeonDataSize");
1083   }
1084}
1085
1086ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1087                         HReg dst, HReg argL, ARMRI84* argR ) {
1088   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1089   i->tag            = ARMin_Alu;
1090   i->ARMin.Alu.op   = op;
1091   i->ARMin.Alu.dst  = dst;
1092   i->ARMin.Alu.argL = argL;
1093   i->ARMin.Alu.argR = argR;
1094   return i;
1095}
1096ARMInstr* ARMInstr_Shift  ( ARMShiftOp op,
1097                            HReg dst, HReg argL, ARMRI5* argR ) {
1098   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1099   i->tag              = ARMin_Shift;
1100   i->ARMin.Shift.op   = op;
1101   i->ARMin.Shift.dst  = dst;
1102   i->ARMin.Shift.argL = argL;
1103   i->ARMin.Shift.argR = argR;
1104   return i;
1105}
1106ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1107   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1108   i->tag             = ARMin_Unary;
1109   i->ARMin.Unary.op  = op;
1110   i->ARMin.Unary.dst = dst;
1111   i->ARMin.Unary.src = src;
1112   return i;
1113}
1114ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1115   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1116   i->tag                  = ARMin_CmpOrTst;
1117   i->ARMin.CmpOrTst.isCmp = isCmp;
1118   i->ARMin.CmpOrTst.argL  = argL;
1119   i->ARMin.CmpOrTst.argR  = argR;
1120   return i;
1121}
1122ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1123   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1124   i->tag           = ARMin_Mov;
1125   i->ARMin.Mov.dst = dst;
1126   i->ARMin.Mov.src = src;
1127   return i;
1128}
1129ARMInstr* ARMInstr_Imm32  ( HReg dst, UInt imm32 ) {
1130   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1131   i->tag               = ARMin_Imm32;
1132   i->ARMin.Imm32.dst   = dst;
1133   i->ARMin.Imm32.imm32 = imm32;
1134   return i;
1135}
1136ARMInstr* ARMInstr_LdSt32 ( ARMCondCode cc,
1137                            Bool isLoad, HReg rD, ARMAMode1* amode ) {
1138   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1139   i->tag                 = ARMin_LdSt32;
1140   i->ARMin.LdSt32.cc     = cc;
1141   i->ARMin.LdSt32.isLoad = isLoad;
1142   i->ARMin.LdSt32.rD     = rD;
1143   i->ARMin.LdSt32.amode  = amode;
1144   vassert(cc != ARMcc_NV);
1145   return i;
1146}
1147ARMInstr* ARMInstr_LdSt16 ( ARMCondCode cc,
1148                            Bool isLoad, Bool signedLoad,
1149                            HReg rD, ARMAMode2* amode ) {
1150   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1151   i->tag                     = ARMin_LdSt16;
1152   i->ARMin.LdSt16.cc         = cc;
1153   i->ARMin.LdSt16.isLoad     = isLoad;
1154   i->ARMin.LdSt16.signedLoad = signedLoad;
1155   i->ARMin.LdSt16.rD         = rD;
1156   i->ARMin.LdSt16.amode      = amode;
1157   vassert(cc != ARMcc_NV);
1158   return i;
1159}
1160ARMInstr* ARMInstr_LdSt8U ( ARMCondCode cc,
1161                            Bool isLoad, HReg rD, ARMAMode1* amode ) {
1162   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1163   i->tag                 = ARMin_LdSt8U;
1164   i->ARMin.LdSt8U.cc     = cc;
1165   i->ARMin.LdSt8U.isLoad = isLoad;
1166   i->ARMin.LdSt8U.rD     = rD;
1167   i->ARMin.LdSt8U.amode  = amode;
1168   vassert(cc != ARMcc_NV);
1169   return i;
1170}
1171ARMInstr* ARMInstr_Ld8S ( ARMCondCode cc, HReg rD, ARMAMode2* amode ) {
1172   ARMInstr* i         = LibVEX_Alloc_inline(sizeof(ARMInstr));
1173   i->tag              = ARMin_Ld8S;
1174   i->ARMin.Ld8S.cc    = cc;
1175   i->ARMin.Ld8S.rD    = rD;
1176   i->ARMin.Ld8S.amode = amode;
1177   vassert(cc != ARMcc_NV);
1178   return i;
1179}
1180ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
1181                             ARMCondCode cond, Bool toFastEP ) {
1182   ARMInstr* i               = LibVEX_Alloc_inline(sizeof(ARMInstr));
1183   i->tag                    = ARMin_XDirect;
1184   i->ARMin.XDirect.dstGA    = dstGA;
1185   i->ARMin.XDirect.amR15T   = amR15T;
1186   i->ARMin.XDirect.cond     = cond;
1187   i->ARMin.XDirect.toFastEP = toFastEP;
1188   return i;
1189}
1190ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
1191                            ARMCondCode cond ) {
1192   ARMInstr* i            = LibVEX_Alloc_inline(sizeof(ARMInstr));
1193   i->tag                 = ARMin_XIndir;
1194   i->ARMin.XIndir.dstGA  = dstGA;
1195   i->ARMin.XIndir.amR15T = amR15T;
1196   i->ARMin.XIndir.cond   = cond;
1197   return i;
1198}
1199ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
1200                               ARMCondCode cond, IRJumpKind jk ) {
1201   ARMInstr* i               = LibVEX_Alloc_inline(sizeof(ARMInstr));
1202   i->tag                    = ARMin_XAssisted;
1203   i->ARMin.XAssisted.dstGA  = dstGA;
1204   i->ARMin.XAssisted.amR15T = amR15T;
1205   i->ARMin.XAssisted.cond   = cond;
1206   i->ARMin.XAssisted.jk     = jk;
1207   return i;
1208}
1209ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1210   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1211   i->tag             = ARMin_CMov;
1212   i->ARMin.CMov.cond = cond;
1213   i->ARMin.CMov.dst  = dst;
1214   i->ARMin.CMov.src  = src;
1215   vassert(cond != ARMcc_AL);
1216   return i;
1217}
1218ARMInstr* ARMInstr_Call ( ARMCondCode cond, Addr32 target, Int nArgRegs,
1219                          RetLoc rloc ) {
1220   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1221   i->tag                 = ARMin_Call;
1222   i->ARMin.Call.cond     = cond;
1223   i->ARMin.Call.target   = target;
1224   i->ARMin.Call.nArgRegs = nArgRegs;
1225   i->ARMin.Call.rloc     = rloc;
1226   vassert(is_sane_RetLoc(rloc));
1227   return i;
1228}
1229ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
1230   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1231   i->tag          = ARMin_Mul;
1232   i->ARMin.Mul.op = op;
1233   return i;
1234}
1235ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1236   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1237   i->tag             = ARMin_LdrEX;
1238   i->ARMin.LdrEX.szB = szB;
1239   vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1240   return i;
1241}
1242ARMInstr* ARMInstr_StrEX ( Int szB ) {
1243   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1244   i->tag             = ARMin_StrEX;
1245   i->ARMin.StrEX.szB = szB;
1246   vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1247   return i;
1248}
1249ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1250   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1251   i->tag                 = ARMin_VLdStD;
1252   i->ARMin.VLdStD.isLoad = isLoad;
1253   i->ARMin.VLdStD.dD     = dD;
1254   i->ARMin.VLdStD.amode  = am;
1255   return i;
1256}
1257ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1258   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1259   i->tag                 = ARMin_VLdStS;
1260   i->ARMin.VLdStS.isLoad = isLoad;
1261   i->ARMin.VLdStS.fD     = fD;
1262   i->ARMin.VLdStS.amode  = am;
1263   return i;
1264}
1265ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1266   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1267   i->tag              = ARMin_VAluD;
1268   i->ARMin.VAluD.op   = op;
1269   i->ARMin.VAluD.dst  = dst;
1270   i->ARMin.VAluD.argL = argL;
1271   i->ARMin.VAluD.argR = argR;
1272   return i;
1273}
1274ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1275   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1276   i->tag              = ARMin_VAluS;
1277   i->ARMin.VAluS.op   = op;
1278   i->ARMin.VAluS.dst  = dst;
1279   i->ARMin.VAluS.argL = argL;
1280   i->ARMin.VAluS.argR = argR;
1281   return i;
1282}
1283ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1284   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1285   i->tag               = ARMin_VUnaryD;
1286   i->ARMin.VUnaryD.op  = op;
1287   i->ARMin.VUnaryD.dst = dst;
1288   i->ARMin.VUnaryD.src = src;
1289   return i;
1290}
1291ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1292   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1293   i->tag               = ARMin_VUnaryS;
1294   i->ARMin.VUnaryS.op  = op;
1295   i->ARMin.VUnaryS.dst = dst;
1296   i->ARMin.VUnaryS.src = src;
1297   return i;
1298}
1299ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1300   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1301   i->tag              = ARMin_VCmpD;
1302   i->ARMin.VCmpD.argL = argL;
1303   i->ARMin.VCmpD.argR = argR;
1304   return i;
1305}
1306ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1307   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1308   i->tag               = ARMin_VCMovD;
1309   i->ARMin.VCMovD.cond = cond;
1310   i->ARMin.VCMovD.dst  = dst;
1311   i->ARMin.VCMovD.src  = src;
1312   vassert(cond != ARMcc_AL);
1313   return i;
1314}
1315ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1316   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1317   i->tag               = ARMin_VCMovS;
1318   i->ARMin.VCMovS.cond = cond;
1319   i->ARMin.VCMovS.dst  = dst;
1320   i->ARMin.VCMovS.src  = src;
1321   vassert(cond != ARMcc_AL);
1322   return i;
1323}
1324ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1325   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1326   i->tag               = ARMin_VCvtSD;
1327   i->ARMin.VCvtSD.sToD = sToD;
1328   i->ARMin.VCvtSD.dst  = dst;
1329   i->ARMin.VCvtSD.src  = src;
1330   return i;
1331}
1332ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1333   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1334   i->tag              = ARMin_VXferD;
1335   i->ARMin.VXferD.toD = toD;
1336   i->ARMin.VXferD.dD  = dD;
1337   i->ARMin.VXferD.rHi = rHi;
1338   i->ARMin.VXferD.rLo = rLo;
1339   return i;
1340}
1341ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1342   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1343   i->tag              = ARMin_VXferS;
1344   i->ARMin.VXferS.toS = toS;
1345   i->ARMin.VXferS.fD  = fD;
1346   i->ARMin.VXferS.rLo = rLo;
1347   return i;
1348}
1349ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1350                            HReg dst, HReg src ) {
1351   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1352   i->tag                = ARMin_VCvtID;
1353   i->ARMin.VCvtID.iToD  = iToD;
1354   i->ARMin.VCvtID.syned = syned;
1355   i->ARMin.VCvtID.dst   = dst;
1356   i->ARMin.VCvtID.src   = src;
1357   return i;
1358}
1359ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1360   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1361   i->tag                 = ARMin_FPSCR;
1362   i->ARMin.FPSCR.toFPSCR = toFPSCR;
1363   i->ARMin.FPSCR.iReg    = iReg;
1364   return i;
1365}
1366ARMInstr* ARMInstr_MFence ( void ) {
1367   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1368   i->tag      = ARMin_MFence;
1369   return i;
1370}
1371ARMInstr* ARMInstr_CLREX( void ) {
1372   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1373   i->tag      = ARMin_CLREX;
1374   return i;
1375}
1376
1377ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1378   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1379   i->tag                  = ARMin_NLdStQ;
1380   i->ARMin.NLdStQ.isLoad  = isLoad;
1381   i->ARMin.NLdStQ.dQ      = dQ;
1382   i->ARMin.NLdStQ.amode   = amode;
1383   return i;
1384}
1385
1386ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1387   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1388   i->tag                  = ARMin_NLdStD;
1389   i->ARMin.NLdStD.isLoad  = isLoad;
1390   i->ARMin.NLdStD.dD      = dD;
1391   i->ARMin.NLdStD.amode   = amode;
1392   return i;
1393}
1394
1395ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1396                            UInt size, Bool Q ) {
1397   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1398   i->tag                = ARMin_NUnary;
1399   i->ARMin.NUnary.op   = op;
1400   i->ARMin.NUnary.src  = nQ;
1401   i->ARMin.NUnary.dst  = dQ;
1402   i->ARMin.NUnary.size = size;
1403   i->ARMin.NUnary.Q    = Q;
1404   return i;
1405}
1406
1407ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1408                             UInt size, Bool Q ) {
1409   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1410   i->tag                = ARMin_NUnaryS;
1411   i->ARMin.NUnaryS.op   = op;
1412   i->ARMin.NUnaryS.src  = src;
1413   i->ARMin.NUnaryS.dst  = dst;
1414   i->ARMin.NUnaryS.size = size;
1415   i->ARMin.NUnaryS.Q    = Q;
1416   return i;
1417}
1418
1419ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1420                           UInt size, Bool Q ) {
1421   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1422   i->tag                = ARMin_NDual;
1423   i->ARMin.NDual.op   = op;
1424   i->ARMin.NDual.arg1 = nQ;
1425   i->ARMin.NDual.arg2 = mQ;
1426   i->ARMin.NDual.size = size;
1427   i->ARMin.NDual.Q    = Q;
1428   return i;
1429}
1430
1431ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1432                             HReg dst, HReg argL, HReg argR,
1433                             UInt size, Bool Q ) {
1434   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1435   i->tag                = ARMin_NBinary;
1436   i->ARMin.NBinary.op   = op;
1437   i->ARMin.NBinary.argL = argL;
1438   i->ARMin.NBinary.argR = argR;
1439   i->ARMin.NBinary.dst  = dst;
1440   i->ARMin.NBinary.size = size;
1441   i->ARMin.NBinary.Q    = Q;
1442   return i;
1443}
1444
1445ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1446   ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1447   i->tag         = ARMin_NeonImm;
1448   i->ARMin.NeonImm.dst = dst;
1449   i->ARMin.NeonImm.imm = imm;
1450   return i;
1451}
1452
1453ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1454   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1455   i->tag               = ARMin_NCMovQ;
1456   i->ARMin.NCMovQ.cond = cond;
1457   i->ARMin.NCMovQ.dst  = dst;
1458   i->ARMin.NCMovQ.src  = src;
1459   vassert(cond != ARMcc_AL);
1460   return i;
1461}
1462
1463ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1464                            HReg dst, HReg argL, HReg argR,
1465                            UInt size, Bool Q ) {
1466   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1467   i->tag                = ARMin_NShift;
1468   i->ARMin.NShift.op   = op;
1469   i->ARMin.NShift.argL = argL;
1470   i->ARMin.NShift.argR = argR;
1471   i->ARMin.NShift.dst  = dst;
1472   i->ARMin.NShift.size = size;
1473   i->ARMin.NShift.Q    = Q;
1474   return i;
1475}
1476
1477ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
1478{
1479   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1480   i->tag              = ARMin_NShl64;
1481   i->ARMin.NShl64.dst = dst;
1482   i->ARMin.NShl64.src = src;
1483   i->ARMin.NShl64.amt = amt;
1484   vassert(amt >= 1 && amt <= 63);
1485   return i;
1486}
1487
1488/* Helper copy-pasted from isel.c */
1489static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1490{
1491   UInt i;
1492   for (i = 0; i < 16; i++) {
1493      if (0 == (u & 0xFFFFFF00)) {
1494         *u8 = u;
1495         *u4 = i;
1496         return True;
1497      }
1498      u = ROR32(u, 30);
1499   }
1500   vassert(i == 16);
1501   return False;
1502}
1503
1504ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1505   UInt u8, u4;
1506   ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1507   /* Try to generate single ADD if possible */
1508   if (fitsIn8x4(&u8, &u4, imm32)) {
1509      i->tag            = ARMin_Alu;
1510      i->ARMin.Alu.op   = ARMalu_ADD;
1511      i->ARMin.Alu.dst  = rD;
1512      i->ARMin.Alu.argL = rN;
1513      i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1514   } else {
1515      i->tag               = ARMin_Add32;
1516      i->ARMin.Add32.rD    = rD;
1517      i->ARMin.Add32.rN    = rN;
1518      i->ARMin.Add32.imm32 = imm32;
1519   }
1520   return i;
1521}
1522
1523ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
1524                             ARMAMode1* amFailAddr ) {
1525   ARMInstr* i                 = LibVEX_Alloc_inline(sizeof(ARMInstr));
1526   i->tag                      = ARMin_EvCheck;
1527   i->ARMin.EvCheck.amCounter  = amCounter;
1528   i->ARMin.EvCheck.amFailAddr = amFailAddr;
1529   return i;
1530}
1531
1532ARMInstr* ARMInstr_ProfInc ( void ) {
1533   ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1534   i->tag      = ARMin_ProfInc;
1535   return i;
1536}
1537
1538/* ... */
1539
1540void ppARMInstr ( const ARMInstr* i ) {
1541   switch (i->tag) {
1542      case ARMin_Alu:
1543         vex_printf("%-4s  ", showARMAluOp(i->ARMin.Alu.op));
1544         ppHRegARM(i->ARMin.Alu.dst);
1545         vex_printf(", ");
1546         ppHRegARM(i->ARMin.Alu.argL);
1547         vex_printf(", ");
1548         ppARMRI84(i->ARMin.Alu.argR);
1549         return;
1550      case ARMin_Shift:
1551         vex_printf("%s   ", showARMShiftOp(i->ARMin.Shift.op));
1552         ppHRegARM(i->ARMin.Shift.dst);
1553         vex_printf(", ");
1554         ppHRegARM(i->ARMin.Shift.argL);
1555         vex_printf(", ");
1556         ppARMRI5(i->ARMin.Shift.argR);
1557         return;
1558      case ARMin_Unary:
1559         vex_printf("%s   ", showARMUnaryOp(i->ARMin.Unary.op));
1560         ppHRegARM(i->ARMin.Unary.dst);
1561         vex_printf(", ");
1562         ppHRegARM(i->ARMin.Unary.src);
1563         return;
1564      case ARMin_CmpOrTst:
1565         vex_printf("%s   ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1566         ppHRegARM(i->ARMin.CmpOrTst.argL);
1567         vex_printf(", ");
1568         ppARMRI84(i->ARMin.CmpOrTst.argR);
1569         return;
1570      case ARMin_Mov:
1571         vex_printf("mov   ");
1572         ppHRegARM(i->ARMin.Mov.dst);
1573         vex_printf(", ");
1574         ppARMRI84(i->ARMin.Mov.src);
1575         return;
1576      case ARMin_Imm32:
1577         vex_printf("imm   ");
1578         ppHRegARM(i->ARMin.Imm32.dst);
1579         vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1580         return;
1581      case ARMin_LdSt32:
1582         if (i->ARMin.LdSt32.isLoad) {
1583            vex_printf("ldr%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
1584                                    : showARMCondCode(i->ARMin.LdSt32.cc));
1585            ppHRegARM(i->ARMin.LdSt32.rD);
1586            vex_printf(", ");
1587            ppARMAMode1(i->ARMin.LdSt32.amode);
1588         } else {
1589            vex_printf("str%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? "  "
1590                                    : showARMCondCode(i->ARMin.LdSt32.cc));
1591            ppARMAMode1(i->ARMin.LdSt32.amode);
1592            vex_printf(", ");
1593            ppHRegARM(i->ARMin.LdSt32.rD);
1594         }
1595         return;
1596      case ARMin_LdSt16:
1597         if (i->ARMin.LdSt16.isLoad) {
1598            vex_printf("%s%s%s",
1599                       i->ARMin.LdSt16.signedLoad ? "ldrsh" : "ldrh",
1600                       i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
1601                          : showARMCondCode(i->ARMin.LdSt16.cc),
1602                       i->ARMin.LdSt16.signedLoad ? " " : "  ");
1603            ppHRegARM(i->ARMin.LdSt16.rD);
1604            vex_printf(", ");
1605            ppARMAMode2(i->ARMin.LdSt16.amode);
1606         } else {
1607            vex_printf("strh%s  ",
1608                       i->ARMin.LdSt16.cc == ARMcc_AL ? "  "
1609                          : showARMCondCode(i->ARMin.LdSt16.cc));
1610            ppARMAMode2(i->ARMin.LdSt16.amode);
1611            vex_printf(", ");
1612            ppHRegARM(i->ARMin.LdSt16.rD);
1613         }
1614         return;
1615      case ARMin_LdSt8U:
1616         if (i->ARMin.LdSt8U.isLoad) {
1617            vex_printf("ldrb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
1618                                      : showARMCondCode(i->ARMin.LdSt8U.cc));
1619            ppHRegARM(i->ARMin.LdSt8U.rD);
1620            vex_printf(", ");
1621            ppARMAMode1(i->ARMin.LdSt8U.amode);
1622         } else {
1623            vex_printf("strb%s  ", i->ARMin.LdSt8U.cc == ARMcc_AL ? "  "
1624                                      : showARMCondCode(i->ARMin.LdSt8U.cc));
1625            ppARMAMode1(i->ARMin.LdSt8U.amode);
1626            vex_printf(", ");
1627            ppHRegARM(i->ARMin.LdSt8U.rD);
1628         }
1629         return;
1630      case ARMin_Ld8S:
1631         vex_printf("ldrsb%s ", i->ARMin.Ld8S.cc == ARMcc_AL ? "  "
1632                                   : showARMCondCode(i->ARMin.Ld8S.cc));
1633         ppARMAMode2(i->ARMin.Ld8S.amode);
1634         vex_printf(", ");
1635         ppHRegARM(i->ARMin.Ld8S.rD);
1636         return;
1637      case ARMin_XDirect:
1638         vex_printf("(xDirect) ");
1639         vex_printf("if (%%cpsr.%s) { ",
1640                    showARMCondCode(i->ARMin.XDirect.cond));
1641         vex_printf("movw r12,0x%x; ",
1642                    (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
1643         vex_printf("movt r12,0x%x; ",
1644                    (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
1645         vex_printf("str r12,");
1646         ppARMAMode1(i->ARMin.XDirect.amR15T);
1647         vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
1648                    i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1649         vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
1650                    i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1651         vex_printf("blx r12 }");
1652         return;
1653      case ARMin_XIndir:
1654         vex_printf("(xIndir) ");
1655         vex_printf("if (%%cpsr.%s) { ",
1656                    showARMCondCode(i->ARMin.XIndir.cond));
1657         vex_printf("str ");
1658         ppHRegARM(i->ARMin.XIndir.dstGA);
1659         vex_printf(",");
1660         ppARMAMode1(i->ARMin.XIndir.amR15T);
1661         vex_printf("; movw r12,LO16($disp_cp_xindir); ");
1662         vex_printf("movt r12,HI16($disp_cp_xindir); ");
1663         vex_printf("blx r12 }");
1664         return;
1665      case ARMin_XAssisted:
1666         vex_printf("(xAssisted) ");
1667         vex_printf("if (%%cpsr.%s) { ",
1668                    showARMCondCode(i->ARMin.XAssisted.cond));
1669         vex_printf("str ");
1670         ppHRegARM(i->ARMin.XAssisted.dstGA);
1671         vex_printf(",");
1672         ppARMAMode1(i->ARMin.XAssisted.amR15T);
1673         vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
1674                    (Int)i->ARMin.XAssisted.jk);
1675         vex_printf("movw r12,LO16($disp_cp_xassisted); ");
1676         vex_printf("movt r12,HI16($disp_cp_xassisted); ");
1677         vex_printf("blx r12 }");
1678         return;
1679      case ARMin_CMov:
1680         vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1681         ppHRegARM(i->ARMin.CMov.dst);
1682         vex_printf(", ");
1683         ppARMRI84(i->ARMin.CMov.src);
1684         return;
1685      case ARMin_Call:
1686         vex_printf("call%s  ",
1687                    i->ARMin.Call.cond==ARMcc_AL
1688                       ? "" : showARMCondCode(i->ARMin.Call.cond));
1689         vex_printf("0x%x [nArgRegs=%d, ",
1690                    i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1691         ppRetLoc(i->ARMin.Call.rloc);
1692         vex_printf("]");
1693         return;
1694      case ARMin_Mul:
1695         vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1696         if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1697            vex_printf("r0, r2, r3");
1698         } else {
1699            vex_printf("r1:r0, r2, r3");
1700         }
1701         return;
1702      case ARMin_LdrEX: {
1703         const HChar* sz = "";
1704         switch (i->ARMin.LdrEX.szB) {
1705            case 1: sz = "b"; break; case 2: sz = "h"; break;
1706            case 8: sz = "d"; break; case 4: break;
1707            default: vassert(0);
1708         }
1709         vex_printf("ldrex%s %sr2, [r4]",
1710                    sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
1711         return;
1712      }
1713      case ARMin_StrEX: {
1714         const HChar* sz = "";
1715         switch (i->ARMin.StrEX.szB) {
1716            case 1: sz = "b"; break; case 2: sz = "h"; break;
1717            case 8: sz = "d"; break; case 4: break;
1718            default: vassert(0);
1719         }
1720         vex_printf("strex%s r0, %sr2, [r4]",
1721                    sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
1722         return;
1723      }
1724      case ARMin_VLdStD:
1725         if (i->ARMin.VLdStD.isLoad) {
1726            vex_printf("fldd  ");
1727            ppHRegARM(i->ARMin.VLdStD.dD);
1728            vex_printf(", ");
1729            ppARMAModeV(i->ARMin.VLdStD.amode);
1730         } else {
1731            vex_printf("fstd  ");
1732            ppARMAModeV(i->ARMin.VLdStD.amode);
1733            vex_printf(", ");
1734            ppHRegARM(i->ARMin.VLdStD.dD);
1735         }
1736         return;
1737      case ARMin_VLdStS:
1738         if (i->ARMin.VLdStS.isLoad) {
1739            vex_printf("flds  ");
1740            ppHRegARM(i->ARMin.VLdStS.fD);
1741            vex_printf(", ");
1742            ppARMAModeV(i->ARMin.VLdStS.amode);
1743         } else {
1744            vex_printf("fsts  ");
1745            ppARMAModeV(i->ARMin.VLdStS.amode);
1746            vex_printf(", ");
1747            ppHRegARM(i->ARMin.VLdStS.fD);
1748         }
1749         return;
1750      case ARMin_VAluD:
1751         vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1752         ppHRegARM(i->ARMin.VAluD.dst);
1753         vex_printf(", ");
1754         ppHRegARM(i->ARMin.VAluD.argL);
1755         vex_printf(", ");
1756         ppHRegARM(i->ARMin.VAluD.argR);
1757         return;
1758      case ARMin_VAluS:
1759         vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1760         ppHRegARM(i->ARMin.VAluS.dst);
1761         vex_printf(", ");
1762         ppHRegARM(i->ARMin.VAluS.argL);
1763         vex_printf(", ");
1764         ppHRegARM(i->ARMin.VAluS.argR);
1765         return;
1766      case ARMin_VUnaryD:
1767         vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1768         ppHRegARM(i->ARMin.VUnaryD.dst);
1769         vex_printf(", ");
1770         ppHRegARM(i->ARMin.VUnaryD.src);
1771         return;
1772      case ARMin_VUnaryS:
1773         vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1774         ppHRegARM(i->ARMin.VUnaryS.dst);
1775         vex_printf(", ");
1776         ppHRegARM(i->ARMin.VUnaryS.src);
1777         return;
1778      case ARMin_VCmpD:
1779         vex_printf("fcmpd ");
1780         ppHRegARM(i->ARMin.VCmpD.argL);
1781         vex_printf(", ");
1782         ppHRegARM(i->ARMin.VCmpD.argR);
1783         vex_printf(" ; fmstat");
1784         return;
1785      case ARMin_VCMovD:
1786         vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1787         ppHRegARM(i->ARMin.VCMovD.dst);
1788         vex_printf(", ");
1789         ppHRegARM(i->ARMin.VCMovD.src);
1790         return;
1791      case ARMin_VCMovS:
1792         vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1793         ppHRegARM(i->ARMin.VCMovS.dst);
1794         vex_printf(", ");
1795         ppHRegARM(i->ARMin.VCMovS.src);
1796         return;
1797      case ARMin_VCvtSD:
1798         vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1799         ppHRegARM(i->ARMin.VCvtSD.dst);
1800         vex_printf(", ");
1801         ppHRegARM(i->ARMin.VCvtSD.src);
1802         return;
1803      case ARMin_VXferD:
1804         vex_printf("vmov  ");
1805         if (i->ARMin.VXferD.toD) {
1806            ppHRegARM(i->ARMin.VXferD.dD);
1807            vex_printf(", ");
1808            ppHRegARM(i->ARMin.VXferD.rLo);
1809            vex_printf(", ");
1810            ppHRegARM(i->ARMin.VXferD.rHi);
1811         } else {
1812            ppHRegARM(i->ARMin.VXferD.rLo);
1813            vex_printf(", ");
1814            ppHRegARM(i->ARMin.VXferD.rHi);
1815            vex_printf(", ");
1816            ppHRegARM(i->ARMin.VXferD.dD);
1817         }
1818         return;
1819      case ARMin_VXferS:
1820         vex_printf("vmov  ");
1821         if (i->ARMin.VXferS.toS) {
1822            ppHRegARM(i->ARMin.VXferS.fD);
1823            vex_printf(", ");
1824            ppHRegARM(i->ARMin.VXferS.rLo);
1825         } else {
1826            ppHRegARM(i->ARMin.VXferS.rLo);
1827            vex_printf(", ");
1828            ppHRegARM(i->ARMin.VXferS.fD);
1829         }
1830         return;
1831      case ARMin_VCvtID: {
1832         const HChar* nm = "?";
1833         if (i->ARMin.VCvtID.iToD) {
1834            nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1835         } else {
1836            nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1837         }
1838         vex_printf("%s ", nm);
1839         ppHRegARM(i->ARMin.VCvtID.dst);
1840         vex_printf(", ");
1841         ppHRegARM(i->ARMin.VCvtID.src);
1842         return;
1843      }
1844      case ARMin_FPSCR:
1845         if (i->ARMin.FPSCR.toFPSCR) {
1846            vex_printf("fmxr  fpscr, ");
1847            ppHRegARM(i->ARMin.FPSCR.iReg);
1848         } else {
1849            vex_printf("fmrx  ");
1850            ppHRegARM(i->ARMin.FPSCR.iReg);
1851            vex_printf(", fpscr");
1852         }
1853         return;
1854      case ARMin_MFence:
1855         vex_printf("(mfence) dsb sy; dmb sy; isb");
1856         return;
1857      case ARMin_CLREX:
1858         vex_printf("clrex");
1859         return;
1860      case ARMin_NLdStQ:
1861         if (i->ARMin.NLdStQ.isLoad)
1862            vex_printf("vld1.32 {");
1863         else
1864            vex_printf("vst1.32 {");
1865         ppHRegARM(i->ARMin.NLdStQ.dQ);
1866         vex_printf("} ");
1867         ppARMAModeN(i->ARMin.NLdStQ.amode);
1868         return;
1869      case ARMin_NLdStD:
1870         if (i->ARMin.NLdStD.isLoad)
1871            vex_printf("vld1.32 {");
1872         else
1873            vex_printf("vst1.32 {");
1874         ppHRegARM(i->ARMin.NLdStD.dD);
1875         vex_printf("} ");
1876         ppARMAModeN(i->ARMin.NLdStD.amode);
1877         return;
1878      case ARMin_NUnary:
1879         vex_printf("%s%s%s  ",
1880                    showARMNeonUnOp(i->ARMin.NUnary.op),
1881                    showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1882                    showARMNeonDataSize(i));
1883         ppHRegARM(i->ARMin.NUnary.dst);
1884         vex_printf(", ");
1885         ppHRegARM(i->ARMin.NUnary.src);
1886         if (i->ARMin.NUnary.op == ARMneon_EQZ)
1887            vex_printf(", #0");
1888         if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1889             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1890             i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1891             i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1892            vex_printf(", #%u", i->ARMin.NUnary.size);
1893         }
1894         if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1895             i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1896             i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1897            UInt size;
1898            size = i->ARMin.NUnary.size;
1899            if (size & 0x40) {
1900               vex_printf(", #%u", size - 64);
1901            } else if (size & 0x20) {
1902               vex_printf(", #%u", size - 32);
1903            } else if (size & 0x10) {
1904               vex_printf(", #%u", size - 16);
1905            } else if (size & 0x08) {
1906               vex_printf(", #%u", size - 8);
1907            }
1908         }
1909         return;
1910      case ARMin_NUnaryS:
1911         vex_printf("%s%s%s  ",
1912                    showARMNeonUnOpS(i->ARMin.NUnaryS.op),
1913                    showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
1914                    showARMNeonDataSize(i));
1915         ppARMNRS(i->ARMin.NUnaryS.dst);
1916         vex_printf(", ");
1917         ppARMNRS(i->ARMin.NUnaryS.src);
1918         return;
1919      case ARMin_NShift:
1920         vex_printf("%s%s%s  ",
1921                    showARMNeonShiftOp(i->ARMin.NShift.op),
1922                    showARMNeonShiftOpDataType(i->ARMin.NShift.op),
1923                    showARMNeonDataSize(i));
1924         ppHRegARM(i->ARMin.NShift.dst);
1925         vex_printf(", ");
1926         ppHRegARM(i->ARMin.NShift.argL);
1927         vex_printf(", ");
1928         ppHRegARM(i->ARMin.NShift.argR);
1929         return;
1930      case ARMin_NShl64:
1931         vex_printf("vshl.i64 ");
1932         ppHRegARM(i->ARMin.NShl64.dst);
1933         vex_printf(", ");
1934         ppHRegARM(i->ARMin.NShl64.src);
1935         vex_printf(", #%u", i->ARMin.NShl64.amt);
1936         return;
1937      case ARMin_NDual:
1938         vex_printf("%s%s%s  ",
1939                    showARMNeonDualOp(i->ARMin.NDual.op),
1940                    showARMNeonDualOpDataType(i->ARMin.NDual.op),
1941                    showARMNeonDataSize(i));
1942         ppHRegARM(i->ARMin.NDual.arg1);
1943         vex_printf(", ");
1944         ppHRegARM(i->ARMin.NDual.arg2);
1945         return;
1946      case ARMin_NBinary:
1947         vex_printf("%s%s%s",
1948                    showARMNeonBinOp(i->ARMin.NBinary.op),
1949                    showARMNeonBinOpDataType(i->ARMin.NBinary.op),
1950                    showARMNeonDataSize(i));
1951         vex_printf("  ");
1952         ppHRegARM(i->ARMin.NBinary.dst);
1953         vex_printf(", ");
1954         ppHRegARM(i->ARMin.NBinary.argL);
1955         vex_printf(", ");
1956         ppHRegARM(i->ARMin.NBinary.argR);
1957         return;
1958      case ARMin_NeonImm:
1959         vex_printf("vmov  ");
1960         ppHRegARM(i->ARMin.NeonImm.dst);
1961         vex_printf(", ");
1962         ppARMNImm(i->ARMin.NeonImm.imm);
1963         return;
1964      case ARMin_NCMovQ:
1965         vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
1966         ppHRegARM(i->ARMin.NCMovQ.dst);
1967         vex_printf(", ");
1968         ppHRegARM(i->ARMin.NCMovQ.src);
1969         return;
1970      case ARMin_Add32:
1971         vex_printf("add32 ");
1972         ppHRegARM(i->ARMin.Add32.rD);
1973         vex_printf(", ");
1974         ppHRegARM(i->ARMin.Add32.rN);
1975         vex_printf(", ");
1976         vex_printf("%u", i->ARMin.Add32.imm32);
1977         return;
1978      case ARMin_EvCheck:
1979         vex_printf("(evCheck) ldr r12,");
1980         ppARMAMode1(i->ARMin.EvCheck.amCounter);
1981         vex_printf("; subs r12,r12,$1; str r12,");
1982         ppARMAMode1(i->ARMin.EvCheck.amCounter);
1983         vex_printf("; bpl nofail; ldr r12,");
1984         ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
1985         vex_printf("; bx r12; nofail:");
1986         return;
1987      case ARMin_ProfInc:
1988         vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
1989                    "movw r12,HI16($NotKnownYet); "
1990                    "ldr r11,[r12]; "
1991                    "adds r11,r11,$1; "
1992                    "str r11,[r12]; "
1993                    "ldr r11,[r12+4]; "
1994                    "adc r11,r11,$0; "
1995                    "str r11,[r12+4]");
1996         return;
1997      default:
1998         vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
1999         vpanic("ppARMInstr(1)");
2000         return;
2001   }
2002}
2003
2004
2005/* --------- Helpers for register allocation. --------- */
2006
2007void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
2008{
2009   vassert(mode64 == False);
2010   initHRegUsage(u);
2011   switch (i->tag) {
2012      case ARMin_Alu:
2013         addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
2014         addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
2015         addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
2016         return;
2017      case ARMin_Shift:
2018         addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
2019         addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
2020         addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
2021         return;
2022      case ARMin_Unary:
2023         addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
2024         addHRegUse(u, HRmRead, i->ARMin.Unary.src);
2025         return;
2026      case ARMin_CmpOrTst:
2027         addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
2028         addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
2029         return;
2030      case ARMin_Mov:
2031         addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
2032         addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
2033         return;
2034      case ARMin_Imm32:
2035         addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
2036         return;
2037      case ARMin_LdSt32:
2038         addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
2039         if (i->ARMin.LdSt32.isLoad) {
2040            addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
2041            if (i->ARMin.LdSt32.cc != ARMcc_AL)
2042               addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2043         } else {
2044            addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2045         }
2046         return;
2047      case ARMin_LdSt16:
2048         addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
2049         if (i->ARMin.LdSt16.isLoad) {
2050            addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
2051            if (i->ARMin.LdSt16.cc != ARMcc_AL)
2052               addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2053         } else {
2054            addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2055         }
2056         return;
2057      case ARMin_LdSt8U:
2058         addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
2059         if (i->ARMin.LdSt8U.isLoad) {
2060            addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
2061            if (i->ARMin.LdSt8U.cc != ARMcc_AL)
2062               addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2063         } else {
2064            addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2065         }
2066         return;
2067      case ARMin_Ld8S:
2068         addRegUsage_ARMAMode2(u, i->ARMin.Ld8S.amode);
2069         addHRegUse(u, HRmWrite, i->ARMin.Ld8S.rD);
2070         if (i->ARMin.Ld8S.cc != ARMcc_AL)
2071            addHRegUse(u, HRmRead, i->ARMin.Ld8S.rD);
2072         return;
2073      /* XDirect/XIndir/XAssisted are also a bit subtle.  They
2074         conditionally exit the block.  Hence we only need to list (1)
2075         the registers that they read, and (2) the registers that they
2076         write in the case where the block is not exited.  (2) is
2077         empty, hence only (1) is relevant here. */
2078      case ARMin_XDirect:
2079         addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
2080         return;
2081      case ARMin_XIndir:
2082         addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
2083         addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
2084         return;
2085      case ARMin_XAssisted:
2086         addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
2087         addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
2088         return;
2089      case ARMin_CMov:
2090         addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
2091         addHRegUse(u, HRmRead,  i->ARMin.CMov.dst);
2092         addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
2093         return;
2094      case ARMin_Call:
2095         /* logic and comments copied/modified from x86 back end */
2096         /* This is a bit subtle. */
2097         /* First off, claim it trashes all the caller-saved regs
2098            which fall within the register allocator's jurisdiction.
2099            These I believe to be r0,1,2,3.  If it turns out that r9
2100            is also caller-saved, then we'll have to add that here
2101            too. */
2102         addHRegUse(u, HRmWrite, hregARM_R0());
2103         addHRegUse(u, HRmWrite, hregARM_R1());
2104         addHRegUse(u, HRmWrite, hregARM_R2());
2105         addHRegUse(u, HRmWrite, hregARM_R3());
2106         /* Now we have to state any parameter-carrying registers
2107            which might be read.  This depends on nArgRegs. */
2108         switch (i->ARMin.Call.nArgRegs) {
2109            case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
2110            case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
2111            case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
2112            case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
2113            case 0: break;
2114            default: vpanic("getRegUsage_ARM:Call:regparms");
2115         }
2116         /* Finally, there is the issue that the insn trashes a
2117            register because the literal target address has to be
2118            loaded into a register.  Fortunately, for the nArgRegs=
2119            0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
2120            this does not cause any further damage.  For the
2121            nArgRegs=4 case, we'll have to choose another register
2122            arbitrarily since all the caller saved regs are used for
2123            parameters, and so we might as well choose r11.
2124            */
2125         if (i->ARMin.Call.nArgRegs == 4)
2126            addHRegUse(u, HRmWrite, hregARM_R11());
2127         /* Upshot of this is that the assembler really must observe
2128            the here-stated convention of which register to use as an
2129            address temporary, depending on nArgRegs: 0==r0,
2130            1==r1, 2==r2, 3==r3, 4==r11 */
2131         return;
2132      case ARMin_Mul:
2133         addHRegUse(u, HRmRead, hregARM_R2());
2134         addHRegUse(u, HRmRead, hregARM_R3());
2135         addHRegUse(u, HRmWrite, hregARM_R0());
2136         if (i->ARMin.Mul.op != ARMmul_PLAIN)
2137            addHRegUse(u, HRmWrite, hregARM_R1());
2138         return;
2139      case ARMin_LdrEX:
2140         addHRegUse(u, HRmRead, hregARM_R4());
2141         addHRegUse(u, HRmWrite, hregARM_R2());
2142         if (i->ARMin.LdrEX.szB == 8)
2143            addHRegUse(u, HRmWrite, hregARM_R3());
2144         return;
2145      case ARMin_StrEX:
2146         addHRegUse(u, HRmRead, hregARM_R4());
2147         addHRegUse(u, HRmWrite, hregARM_R0());
2148         addHRegUse(u, HRmRead, hregARM_R2());
2149         if (i->ARMin.StrEX.szB == 8)
2150            addHRegUse(u, HRmRead, hregARM_R3());
2151         return;
2152      case ARMin_VLdStD:
2153         addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2154         if (i->ARMin.VLdStD.isLoad) {
2155            addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2156         } else {
2157            addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2158         }
2159         return;
2160      case ARMin_VLdStS:
2161         addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2162         if (i->ARMin.VLdStS.isLoad) {
2163            addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2164         } else {
2165            addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2166         }
2167         return;
2168      case ARMin_VAluD:
2169         addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2170         addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2171         addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2172         return;
2173      case ARMin_VAluS:
2174         addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2175         addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2176         addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2177         return;
2178      case ARMin_VUnaryD:
2179         addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2180         addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2181         return;
2182      case ARMin_VUnaryS:
2183         addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2184         addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2185         return;
2186      case ARMin_VCmpD:
2187         addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2188         addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2189         return;
2190      case ARMin_VCMovD:
2191         addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2192         addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
2193         addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
2194         return;
2195      case ARMin_VCMovS:
2196         addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2197         addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
2198         addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
2199         return;
2200      case ARMin_VCvtSD:
2201         addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2202         addHRegUse(u, HRmRead,  i->ARMin.VCvtSD.src);
2203         return;
2204      case ARMin_VXferD:
2205         if (i->ARMin.VXferD.toD) {
2206            addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2207            addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
2208            addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
2209         } else {
2210            addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
2211            addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2212            addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2213         }
2214         return;
2215      case ARMin_VXferS:
2216         if (i->ARMin.VXferS.toS) {
2217            addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2218            addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
2219         } else {
2220            addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
2221            addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2222         }
2223         return;
2224      case ARMin_VCvtID:
2225         addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2226         addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
2227         return;
2228      case ARMin_FPSCR:
2229         if (i->ARMin.FPSCR.toFPSCR)
2230            addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2231         else
2232            addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2233         return;
2234      case ARMin_MFence:
2235         return;
2236      case ARMin_CLREX:
2237         return;
2238      case ARMin_NLdStQ:
2239         if (i->ARMin.NLdStQ.isLoad)
2240            addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2241         else
2242            addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2243         addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2244         return;
2245      case ARMin_NLdStD:
2246         if (i->ARMin.NLdStD.isLoad)
2247            addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2248         else
2249            addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2250         addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2251         return;
2252      case ARMin_NUnary:
2253         addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2254         addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2255         return;
2256      case ARMin_NUnaryS:
2257         addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2258         addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2259         return;
2260      case ARMin_NShift:
2261         addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2262         addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2263         addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2264         return;
2265      case ARMin_NShl64:
2266         addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
2267         addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
2268         return;
2269      case ARMin_NDual:
2270         addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2271         addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2272         addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2273         addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2274         return;
2275      case ARMin_NBinary:
2276         addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2277         /* TODO: sometimes dst is also being read! */
2278         // XXX fix this
2279         addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2280         addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2281         return;
2282      case ARMin_NeonImm:
2283         addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2284         return;
2285      case ARMin_NCMovQ:
2286         addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2287         addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
2288         addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
2289         return;
2290      case ARMin_Add32:
2291         addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2292         addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2293         return;
2294      case ARMin_EvCheck:
2295         /* We expect both amodes only to mention r8, so this is in
2296            fact pointless, since r8 isn't allocatable, but
2297            anyway.. */
2298         addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
2299         addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
2300         addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
2301         return;
2302      case ARMin_ProfInc:
2303         addHRegUse(u, HRmWrite, hregARM_R12());
2304         addHRegUse(u, HRmWrite, hregARM_R11());
2305         return;
2306      default:
2307         ppARMInstr(i);
2308         vpanic("getRegUsage_ARMInstr");
2309   }
2310}
2311
2312
2313void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2314{
2315   vassert(mode64 == False);
2316   switch (i->tag) {
2317      case ARMin_Alu:
2318         i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2319         i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2320         mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2321         return;
2322      case ARMin_Shift:
2323         i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2324         i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2325         mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2326         return;
2327      case ARMin_Unary:
2328         i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2329         i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2330         return;
2331      case ARMin_CmpOrTst:
2332         i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2333         mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2334         return;
2335      case ARMin_Mov:
2336         i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2337         mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2338         return;
2339      case ARMin_Imm32:
2340         i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2341         return;
2342      case ARMin_LdSt32:
2343         i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2344         mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2345         return;
2346      case ARMin_LdSt16:
2347         i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2348         mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2349         return;
2350      case ARMin_LdSt8U:
2351         i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2352         mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2353         return;
2354      case ARMin_Ld8S:
2355         i->ARMin.Ld8S.rD = lookupHRegRemap(m, i->ARMin.Ld8S.rD);
2356         mapRegs_ARMAMode2(m, i->ARMin.Ld8S.amode);
2357         return;
2358      case ARMin_XDirect:
2359         mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
2360         return;
2361      case ARMin_XIndir:
2362         i->ARMin.XIndir.dstGA
2363            = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
2364         mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
2365         return;
2366      case ARMin_XAssisted:
2367         i->ARMin.XAssisted.dstGA
2368            = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
2369         mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
2370         return;
2371      case ARMin_CMov:
2372         i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2373         mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2374         return;
2375      case ARMin_Call:
2376         return;
2377      case ARMin_Mul:
2378         return;
2379      case ARMin_LdrEX:
2380         return;
2381      case ARMin_StrEX:
2382         return;
2383      case ARMin_VLdStD:
2384         i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2385         mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2386         return;
2387      case ARMin_VLdStS:
2388         i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2389         mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2390         return;
2391      case ARMin_VAluD:
2392         i->ARMin.VAluD.dst  = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2393         i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2394         i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2395         return;
2396      case ARMin_VAluS:
2397         i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2398         i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2399         i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2400         return;
2401      case ARMin_VUnaryD:
2402         i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2403         i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2404         return;
2405      case ARMin_VUnaryS:
2406         i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2407         i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2408         return;
2409      case ARMin_VCmpD:
2410         i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2411         i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2412         return;
2413      case ARMin_VCMovD:
2414         i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2415         i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2416         return;
2417      case ARMin_VCMovS:
2418         i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2419         i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2420         return;
2421      case ARMin_VCvtSD:
2422         i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2423         i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2424         return;
2425      case ARMin_VXferD:
2426         i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2427         i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2428         i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2429         return;
2430      case ARMin_VXferS:
2431         i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2432         i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2433         return;
2434      case ARMin_VCvtID:
2435         i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2436         i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2437         return;
2438      case ARMin_FPSCR:
2439         i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2440         return;
2441      case ARMin_MFence:
2442         return;
2443      case ARMin_CLREX:
2444         return;
2445      case ARMin_NLdStQ:
2446         i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2447         mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2448         return;
2449      case ARMin_NLdStD:
2450         i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2451         mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2452         return;
2453      case ARMin_NUnary:
2454         i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2455         i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2456         return;
2457      case ARMin_NUnaryS:
2458         i->ARMin.NUnaryS.src->reg
2459            = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2460         i->ARMin.NUnaryS.dst->reg
2461            = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2462         return;
2463      case ARMin_NShift:
2464         i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2465         i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2466         i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2467         return;
2468      case ARMin_NShl64:
2469         i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
2470         i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
2471         return;
2472      case ARMin_NDual:
2473         i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2474         i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2475         return;
2476      case ARMin_NBinary:
2477         i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2478         i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2479         i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2480         return;
2481      case ARMin_NeonImm:
2482         i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2483         return;
2484      case ARMin_NCMovQ:
2485         i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2486         i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2487         return;
2488      case ARMin_Add32:
2489         i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2490         i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2491         return;
2492      case ARMin_EvCheck:
2493         /* We expect both amodes only to mention r8, so this is in
2494            fact pointless, since r8 isn't allocatable, but
2495            anyway.. */
2496         mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
2497         mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
2498         return;
2499      case ARMin_ProfInc:
2500         /* hardwires r11 and r12 -- nothing to modify. */
2501         return;
2502      default:
2503         ppARMInstr(i);
2504         vpanic("mapRegs_ARMInstr");
2505   }
2506}
2507
2508/* Figure out if i represents a reg-reg move, and if so assign the
2509   source and destination to *src and *dst.  If in doubt say No.  Used
2510   by the register allocator to do move coalescing.
2511*/
2512Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst )
2513{
2514   /* Moves between integer regs */
2515   switch (i->tag) {
2516      case ARMin_Mov:
2517         if (i->ARMin.Mov.src->tag == ARMri84_R) {
2518            *src = i->ARMin.Mov.src->ARMri84.R.reg;
2519            *dst = i->ARMin.Mov.dst;
2520            return True;
2521         }
2522         break;
2523      case ARMin_VUnaryD:
2524         if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2525            *src = i->ARMin.VUnaryD.src;
2526            *dst = i->ARMin.VUnaryD.dst;
2527            return True;
2528         }
2529         break;
2530      case ARMin_VUnaryS:
2531         if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2532            *src = i->ARMin.VUnaryS.src;
2533            *dst = i->ARMin.VUnaryS.dst;
2534            return True;
2535         }
2536         break;
2537      case ARMin_NUnary:
2538         if (i->ARMin.NUnary.op == ARMneon_COPY) {
2539            *src = i->ARMin.NUnary.src;
2540            *dst = i->ARMin.NUnary.dst;
2541            return True;
2542         }
2543         break;
2544      default:
2545         break;
2546   }
2547
2548   return False;
2549}
2550
2551
2552/* Generate arm spill/reload instructions under the direction of the
2553   register allocator.  Note it's critical these don't write the
2554   condition codes. */
2555
2556void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2557                    HReg rreg, Int offsetB, Bool mode64 )
2558{
2559   HRegClass rclass;
2560   vassert(offsetB >= 0);
2561   vassert(!hregIsVirtual(rreg));
2562   vassert(mode64 == False);
2563   *i1 = *i2 = NULL;
2564   rclass = hregClass(rreg);
2565   switch (rclass) {
2566      case HRcInt32:
2567         vassert(offsetB <= 4095);
2568         *i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
2569                                rreg,
2570                                ARMAMode1_RI(hregARM_R8(), offsetB) );
2571         return;
2572      case HRcFlt32:
2573      case HRcFlt64: {
2574         HReg r8   = hregARM_R8();  /* baseblock */
2575         HReg r12  = hregARM_R12(); /* spill temp */
2576         HReg base = r8;
2577         vassert(0 == (offsetB & 3));
2578         if (offsetB >= 1024) {
2579            Int offsetKB = offsetB / 1024;
2580            /* r12 = r8 + (1024 * offsetKB) */
2581            *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2582                               ARMRI84_I84(offsetKB, 11));
2583            offsetB -= (1024 * offsetKB);
2584            base = r12;
2585         }
2586         vassert(offsetB <= 1020);
2587         if (rclass == HRcFlt32) {
2588            *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2589                                   rreg,
2590                                   mkARMAModeV(base, offsetB) );
2591         } else {
2592            *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2593                                   rreg,
2594                                   mkARMAModeV(base, offsetB) );
2595         }
2596         return;
2597      }
2598      case HRcVec128: {
2599         HReg r8  = hregARM_R8();
2600         HReg r12 = hregARM_R12();
2601         *i1 = ARMInstr_Add32(r12, r8, offsetB);
2602         *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2603         return;
2604      }
2605      default:
2606         ppHRegClass(rclass);
2607         vpanic("genSpill_ARM: unimplemented regclass");
2608   }
2609}
2610
2611void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2612                     HReg rreg, Int offsetB, Bool mode64 )
2613{
2614   HRegClass rclass;
2615   vassert(offsetB >= 0);
2616   vassert(!hregIsVirtual(rreg));
2617   vassert(mode64 == False);
2618   *i1 = *i2 = NULL;
2619   rclass = hregClass(rreg);
2620   switch (rclass) {
2621      case HRcInt32:
2622         vassert(offsetB <= 4095);
2623         *i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
2624                                rreg,
2625                                ARMAMode1_RI(hregARM_R8(), offsetB) );
2626         return;
2627      case HRcFlt32:
2628      case HRcFlt64: {
2629         HReg r8   = hregARM_R8();  /* baseblock */
2630         HReg r12  = hregARM_R12(); /* spill temp */
2631         HReg base = r8;
2632         vassert(0 == (offsetB & 3));
2633         if (offsetB >= 1024) {
2634            Int offsetKB = offsetB / 1024;
2635            /* r12 = r8 + (1024 * offsetKB) */
2636            *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2637                               ARMRI84_I84(offsetKB, 11));
2638            offsetB -= (1024 * offsetKB);
2639            base = r12;
2640         }
2641         vassert(offsetB <= 1020);
2642         if (rclass == HRcFlt32) {
2643            *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2644                                   rreg,
2645                                   mkARMAModeV(base, offsetB) );
2646         } else {
2647            *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2648                                   rreg,
2649                                   mkARMAModeV(base, offsetB) );
2650         }
2651         return;
2652      }
2653      case HRcVec128: {
2654         HReg r8  = hregARM_R8();
2655         HReg r12 = hregARM_R12();
2656         *i1 = ARMInstr_Add32(r12, r8, offsetB);
2657         *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2658         return;
2659      }
2660      default:
2661         ppHRegClass(rclass);
2662         vpanic("genReload_ARM: unimplemented regclass");
2663   }
2664}
2665
2666
2667/* Emit an instruction into buf and return the number of bytes used.
2668   Note that buf is not the insn's final place, and therefore it is
2669   imperative to emit position-independent code. */
2670
2671static inline UInt iregEnc ( HReg r )
2672{
2673   UInt n;
2674   vassert(hregClass(r) == HRcInt32);
2675   vassert(!hregIsVirtual(r));
2676   n = hregEncoding(r);
2677   vassert(n <= 15);
2678   return n;
2679}
2680
2681static inline UInt dregEnc ( HReg r )
2682{
2683   UInt n;
2684   vassert(hregClass(r) == HRcFlt64);
2685   vassert(!hregIsVirtual(r));
2686   n = hregEncoding(r);
2687   vassert(n <= 31);
2688   return n;
2689}
2690
2691static inline UInt fregEnc ( HReg r )
2692{
2693   UInt n;
2694   vassert(hregClass(r) == HRcFlt32);
2695   vassert(!hregIsVirtual(r));
2696   n = hregEncoding(r);
2697   vassert(n <= 31);
2698   return n;
2699}
2700
2701static inline UInt qregEnc ( HReg r )
2702{
2703   UInt n;
2704   vassert(hregClass(r) == HRcVec128);
2705   vassert(!hregIsVirtual(r));
2706   n = hregEncoding(r);
2707   vassert(n <= 15);
2708   return n;
2709}
2710
2711#define BITS4(zzb3,zzb2,zzb1,zzb0) \
2712   (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2713#define X0000  BITS4(0,0,0,0)
2714#define X0001  BITS4(0,0,0,1)
2715#define X0010  BITS4(0,0,1,0)
2716#define X0011  BITS4(0,0,1,1)
2717#define X0100  BITS4(0,1,0,0)
2718#define X0101  BITS4(0,1,0,1)
2719#define X0110  BITS4(0,1,1,0)
2720#define X0111  BITS4(0,1,1,1)
2721#define X1000  BITS4(1,0,0,0)
2722#define X1001  BITS4(1,0,0,1)
2723#define X1010  BITS4(1,0,1,0)
2724#define X1011  BITS4(1,0,1,1)
2725#define X1100  BITS4(1,1,0,0)
2726#define X1101  BITS4(1,1,0,1)
2727#define X1110  BITS4(1,1,1,0)
2728#define X1111  BITS4(1,1,1,1)
2729
2730#define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2731   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2732    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2733    (((zzx3) & 0xF) << 12))
2734
2735#define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
2736   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2737    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2738    (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
2739
2740#define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
2741   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2742    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2743    (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
2744
2745#define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2746  ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2747   (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2748   (((zzx0) & 0xF) << 0))
2749
2750#define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
2751   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2752    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2753    (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
2754    (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
2755
2756#define XX______(zzx7,zzx6) \
2757   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2758
2759/* Generate a skeletal insn that involves an a RI84 shifter operand.
2760   Returns a word which is all zeroes apart from bits 25 and 11..0,
2761   since it is those that encode the shifter operand (at least to the
2762   extent that we care about it.) */
2763static UInt skeletal_RI84 ( ARMRI84* ri )
2764{
2765   UInt instr;
2766   if (ri->tag == ARMri84_I84) {
2767      vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2768      vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2769      instr = 1 << 25;
2770      instr |= (ri->ARMri84.I84.imm4 << 8);
2771      instr |= ri->ARMri84.I84.imm8;
2772   } else {
2773      instr = 0 << 25;
2774      instr |= iregEnc(ri->ARMri84.R.reg);
2775   }
2776   return instr;
2777}
2778
2779/* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
2780   11..7. */
2781static UInt skeletal_RI5 ( ARMRI5* ri )
2782{
2783   UInt instr;
2784   if (ri->tag == ARMri5_I5) {
2785      UInt imm5 = ri->ARMri5.I5.imm5;
2786      vassert(imm5 >= 1 && imm5 <= 31);
2787      instr = 0 << 4;
2788      instr |= imm5 << 7;
2789   } else {
2790      instr = 1 << 4;
2791      instr |= iregEnc(ri->ARMri5.R.reg) << 8;
2792   }
2793   return instr;
2794}
2795
2796
2797/* Get an immediate into a register, using only that
2798   register.  (very lame..) */
2799static UInt* imm32_to_ireg ( UInt* p, Int rD, UInt imm32 )
2800{
2801   UInt instr;
2802   vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2803#if 0
2804   if (0 == (imm32 & ~0xFF)) {
2805      /* mov with a immediate shifter operand of (0, imm32) (??) */
2806      instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2807      instr |= imm32;
2808      *p++ = instr;
2809   } else {
2810      // this is very bad; causes Dcache pollution
2811      // ldr  rD, [pc]
2812      instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2813      *p++ = instr;
2814      // b .+8
2815      instr = 0xEA000000;
2816      *p++ = instr;
2817      // .word imm32
2818      *p++ = imm32;
2819   }
2820#else
2821   if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2822      /* Generate movw rD, #low16.  Then, if the high 16 are
2823         nonzero, generate movt rD, #high16. */
2824      UInt lo16 = imm32 & 0xFFFF;
2825      UInt hi16 = (imm32 >> 16) & 0xFFFF;
2826      instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2827                       (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2828                       lo16 & 0xF);
2829      *p++ = instr;
2830      if (hi16 != 0) {
2831         instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2832                          (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2833                          hi16 & 0xF);
2834         *p++ = instr;
2835      }
2836   } else {
2837      UInt imm, rot;
2838      UInt op = X1010;
2839      UInt rN = 0;
2840      if ((imm32 & 0xFF) || (imm32 == 0)) {
2841         imm = imm32 & 0xFF;
2842         rot = 0;
2843         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2844         *p++ = instr;
2845         op = X1000;
2846         rN = rD;
2847      }
2848      if (imm32 & 0xFF000000) {
2849         imm = (imm32 >> 24) & 0xFF;
2850         rot = 4;
2851         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2852         *p++ = instr;
2853         op = X1000;
2854         rN = rD;
2855      }
2856      if (imm32 & 0xFF0000) {
2857         imm = (imm32 >> 16) & 0xFF;
2858         rot = 8;
2859         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2860         *p++ = instr;
2861         op = X1000;
2862         rN = rD;
2863      }
2864      if (imm32 & 0xFF00) {
2865         imm = (imm32 >> 8) & 0xFF;
2866         rot = 12;
2867         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2868         *p++ = instr;
2869         op = X1000;
2870         rN = rD;
2871      }
2872   }
2873#endif
2874   return p;
2875}
2876
2877/* Get an immediate into a register, using only that register, and
2878   generating exactly 2 instructions, regardless of the value of the
2879   immediate. This is used when generating sections of code that need
2880   to be patched later, so as to guarantee a specific size. */
2881static UInt* imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2882{
2883   if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2884      /* Generate movw rD, #low16 ;  movt rD, #high16. */
2885      UInt lo16 = imm32 & 0xFFFF;
2886      UInt hi16 = (imm32 >> 16) & 0xFFFF;
2887      UInt instr;
2888      instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2889                       (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2890                       lo16 & 0xF);
2891      *p++ = instr;
2892      instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2893                       (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2894                       hi16 & 0xF);
2895      *p++ = instr;
2896   } else {
2897      vassert(0); /* lose */
2898   }
2899   return p;
2900}
2901
2902/* Check whether p points at a 2-insn sequence cooked up by
2903   imm32_to_ireg_EXACTLY2(). */
2904static Bool is_imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2905{
2906   if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2907      /* Generate movw rD, #low16 ;  movt rD, #high16. */
2908      UInt lo16 = imm32 & 0xFFFF;
2909      UInt hi16 = (imm32 >> 16) & 0xFFFF;
2910      UInt i0, i1;
2911      i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2912                    (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2913                    lo16 & 0xF);
2914      i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2915                    (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2916                    hi16 & 0xF);
2917      return p[0] == i0 && p[1] == i1;
2918   } else {
2919      vassert(0); /* lose */
2920   }
2921}
2922
2923
2924static UInt* do_load_or_store32 ( UInt* p,
2925                                  Bool isLoad, UInt rD, ARMAMode1* am )
2926{
2927   vassert(rD <= 12);
2928   vassert(am->tag == ARMam1_RI); // RR case is not handled
2929   UInt bB = 0;
2930   UInt bL = isLoad ? 1 : 0;
2931   Int  simm12;
2932   UInt instr, bP;
2933   if (am->ARMam1.RI.simm13 < 0) {
2934      bP = 0;
2935      simm12 = -am->ARMam1.RI.simm13;
2936   } else {
2937      bP = 1;
2938      simm12 = am->ARMam1.RI.simm13;
2939   }
2940   vassert(simm12 >= 0 && simm12 <= 4095);
2941   instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
2942                    iregEnc(am->ARMam1.RI.reg),
2943                    rD);
2944   instr |= simm12;
2945   *p++ = instr;
2946   return p;
2947}
2948
2949
2950/* Emit an instruction into buf and return the number of bytes used.
2951   Note that buf is not the insn's final place, and therefore it is
2952   imperative to emit position-independent code.  If the emitted
2953   instruction was a profiler inc, set *is_profInc to True, else
2954   leave it unchanged. */
2955
2956Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
2957                    UChar* buf, Int nbuf, const ARMInstr* i,
2958                    Bool mode64, VexEndness endness_host,
2959                    const void* disp_cp_chain_me_to_slowEP,
2960                    const void* disp_cp_chain_me_to_fastEP,
2961                    const void* disp_cp_xindir,
2962                    const void* disp_cp_xassisted )
2963{
2964   UInt* p = (UInt*)buf;
2965   vassert(nbuf >= 32);
2966   vassert(mode64 == False);
2967   vassert(0 == (((HWord)buf) & 3));
2968
2969   switch (i->tag) {
2970      case ARMin_Alu: {
2971         UInt     instr, subopc;
2972         UInt     rD   = iregEnc(i->ARMin.Alu.dst);
2973         UInt     rN   = iregEnc(i->ARMin.Alu.argL);
2974         ARMRI84* argR = i->ARMin.Alu.argR;
2975         switch (i->ARMin.Alu.op) {
2976            case ARMalu_ADDS: /* fallthru */
2977            case ARMalu_ADD:  subopc = X0100; break;
2978            case ARMalu_ADC:  subopc = X0101; break;
2979            case ARMalu_SUBS: /* fallthru */
2980            case ARMalu_SUB:  subopc = X0010; break;
2981            case ARMalu_SBC:  subopc = X0110; break;
2982            case ARMalu_AND:  subopc = X0000; break;
2983            case ARMalu_BIC:  subopc = X1110; break;
2984            case ARMalu_OR:   subopc = X1100; break;
2985            case ARMalu_XOR:  subopc = X0001; break;
2986            default: goto bad;
2987         }
2988         instr = skeletal_RI84(argR);
2989         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2990                           (subopc << 1) & 0xF, rN, rD);
2991         if (i->ARMin.Alu.op == ARMalu_ADDS
2992             || i->ARMin.Alu.op == ARMalu_SUBS) {
2993            instr |= 1<<20;  /* set the S bit */
2994         }
2995         *p++ = instr;
2996         goto done;
2997      }
2998      case ARMin_Shift: {
2999         UInt    instr, subopc;
3000         UInt    rD   = iregEnc(i->ARMin.Shift.dst);
3001         UInt    rM   = iregEnc(i->ARMin.Shift.argL);
3002         ARMRI5* argR = i->ARMin.Shift.argR;
3003         switch (i->ARMin.Shift.op) {
3004            case ARMsh_SHL: subopc = X0000; break;
3005            case ARMsh_SHR: subopc = X0001; break;
3006            case ARMsh_SAR: subopc = X0010; break;
3007            default: goto bad;
3008         }
3009         instr = skeletal_RI5(argR);
3010         instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
3011         instr |= (subopc & 3) << 5;
3012         *p++ = instr;
3013         goto done;
3014      }
3015      case ARMin_Unary: {
3016         UInt instr;
3017         UInt rDst = iregEnc(i->ARMin.Unary.dst);
3018         UInt rSrc = iregEnc(i->ARMin.Unary.src);
3019         switch (i->ARMin.Unary.op) {
3020            case ARMun_CLZ:
3021               instr = XXXXXXXX(X1110,X0001,X0110,X1111,
3022                                rDst,X1111,X0001,rSrc);
3023               *p++ = instr;
3024               goto done;
3025            case ARMun_NEG: /* RSB rD,rS,#0 */
3026               instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
3027               *p++ = instr;
3028               goto done;
3029            case ARMun_NOT: {
3030               UInt subopc = X1111; /* MVN */
3031               instr = rSrc;
3032               instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3033                                 (subopc << 1) & 0xF, 0, rDst);
3034               *p++ = instr;
3035               goto done;
3036            }
3037            default:
3038               break;
3039         }
3040         goto bad;
3041      }
3042      case ARMin_CmpOrTst: {
3043         UInt instr  = skeletal_RI84(i->ARMin.CmpOrTst.argR);
3044         UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
3045         UInt SBZ    = 0;
3046         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3047                           ((subopc << 1) & 0xF) | 1,
3048                           iregEnc(i->ARMin.CmpOrTst.argL), SBZ );
3049         *p++ = instr;
3050         goto done;
3051      }
3052      case ARMin_Mov: {
3053         UInt instr  = skeletal_RI84(i->ARMin.Mov.src);
3054         UInt subopc = X1101; /* MOV */
3055         UInt SBZ    = 0;
3056         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3057                           (subopc << 1) & 0xF, SBZ,
3058                           iregEnc(i->ARMin.Mov.dst));
3059         *p++ = instr;
3060         goto done;
3061      }
3062      case ARMin_Imm32: {
3063         p = imm32_to_ireg( (UInt*)p, iregEnc(i->ARMin.Imm32.dst),
3064                                      i->ARMin.Imm32.imm32 );
3065         goto done;
3066      }
3067      case ARMin_LdSt32:
3068      case ARMin_LdSt8U: {
3069         UInt        bL, bB;
3070         HReg        rD;
3071         ARMAMode1*  am;
3072         ARMCondCode cc;
3073         if (i->tag == ARMin_LdSt32) {
3074            bB = 0;
3075            bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
3076            am = i->ARMin.LdSt32.amode;
3077            rD = i->ARMin.LdSt32.rD;
3078            cc = i->ARMin.LdSt32.cc;
3079         } else {
3080            bB = 1;
3081            bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
3082            am = i->ARMin.LdSt8U.amode;
3083            rD = i->ARMin.LdSt8U.rD;
3084            cc = i->ARMin.LdSt8U.cc;
3085         }
3086         vassert(cc != ARMcc_NV);
3087         if (am->tag == ARMam1_RI) {
3088            Int  simm12;
3089            UInt instr, bP;
3090            if (am->ARMam1.RI.simm13 < 0) {
3091               bP = 0;
3092               simm12 = -am->ARMam1.RI.simm13;
3093            } else {
3094               bP = 1;
3095               simm12 = am->ARMam1.RI.simm13;
3096            }
3097            vassert(simm12 >= 0 && simm12 <= 4095);
3098            instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
3099                             iregEnc(am->ARMam1.RI.reg),
3100                             iregEnc(rD));
3101            instr |= simm12;
3102            *p++ = instr;
3103            goto done;
3104         } else {
3105            // RR case
3106            goto bad;
3107         }
3108      }
3109      case ARMin_LdSt16: {
3110         HReg        rD = i->ARMin.LdSt16.rD;
3111         UInt        bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
3112         UInt        bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
3113         ARMAMode2*  am = i->ARMin.LdSt16.amode;
3114         ARMCondCode cc = i->ARMin.LdSt16.cc;
3115         vassert(cc != ARMcc_NV);
3116         if (am->tag == ARMam2_RI) {
3117            HReg rN = am->ARMam2.RI.reg;
3118            Int  simm8;
3119            UInt bP, imm8hi, imm8lo, instr;
3120            if (am->ARMam2.RI.simm9 < 0) {
3121               bP = 0;
3122               simm8 = -am->ARMam2.RI.simm9;
3123            } else {
3124               bP = 1;
3125               simm8 = am->ARMam2.RI.simm9;
3126            }
3127            vassert(simm8 >= 0 && simm8 <= 255);
3128            imm8hi = (simm8 >> 4) & 0xF;
3129            imm8lo = simm8 & 0xF;
3130            vassert(!(bL == 0 && bS == 1)); // "! signed store"
3131            /**/ if (bL == 0 && bS == 0) {
3132               // strh
3133               instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregEnc(rN),
3134                                iregEnc(rD), imm8hi, X1011, imm8lo);
3135               *p++ = instr;
3136               goto done;
3137            }
3138            else if (bL == 1 && bS == 0) {
3139               // ldrh
3140               instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3141                                iregEnc(rD), imm8hi, X1011, imm8lo);
3142               *p++ = instr;
3143               goto done;
3144            }
3145            else if (bL == 1 && bS == 1) {
3146               // ldrsh
3147               instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3148                                iregEnc(rD), imm8hi, X1111, imm8lo);
3149               *p++ = instr;
3150               goto done;
3151            }
3152            else vassert(0); // ill-constructed insn
3153         } else {
3154            // RR case
3155            goto bad;
3156         }
3157      }
3158      case ARMin_Ld8S: {
3159         HReg        rD = i->ARMin.Ld8S.rD;
3160         ARMAMode2*  am = i->ARMin.Ld8S.amode;
3161         ARMCondCode cc = i->ARMin.Ld8S.cc;
3162         vassert(cc != ARMcc_NV);
3163         if (am->tag == ARMam2_RI) {
3164            HReg rN = am->ARMam2.RI.reg;
3165            Int  simm8;
3166            UInt bP, imm8hi, imm8lo, instr;
3167            if (am->ARMam2.RI.simm9 < 0) {
3168               bP = 0;
3169               simm8 = -am->ARMam2.RI.simm9;
3170            } else {
3171               bP = 1;
3172               simm8 = am->ARMam2.RI.simm9;
3173            }
3174            vassert(simm8 >= 0 && simm8 <= 255);
3175            imm8hi = (simm8 >> 4) & 0xF;
3176            imm8lo = simm8 & 0xF;
3177            // ldrsb
3178            instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3179                             iregEnc(rD), imm8hi, X1101, imm8lo);
3180            *p++ = instr;
3181            goto done;
3182         } else {
3183            // RR case
3184            goto bad;
3185         }
3186      }
3187
3188      case ARMin_XDirect: {
3189         /* NB: what goes on here has to be very closely coordinated
3190            with the chainXDirect_ARM and unchainXDirect_ARM below. */
3191         /* We're generating chain-me requests here, so we need to be
3192            sure this is actually allowed -- no-redir translations
3193            can't use chain-me's.  Hence: */
3194         vassert(disp_cp_chain_me_to_slowEP != NULL);
3195         vassert(disp_cp_chain_me_to_fastEP != NULL);
3196
3197         /* Use ptmp for backpatching conditional jumps. */
3198         UInt* ptmp = NULL;
3199
3200         /* First off, if this is conditional, create a conditional
3201            jump over the rest of it.  Or at least, leave a space for
3202            it that we will shortly fill in. */
3203         if (i->ARMin.XDirect.cond != ARMcc_AL) {
3204            vassert(i->ARMin.XDirect.cond != ARMcc_NV);
3205            ptmp = p;
3206            *p++ = 0;
3207         }
3208
3209         /* Update the guest R15T. */
3210         /* movw r12, lo16(dstGA) */
3211         /* movt r12, hi16(dstGA) */
3212         /* str r12, amR15T */
3213         p = imm32_to_ireg(p, /*r*/12, i->ARMin.XDirect.dstGA);
3214         p = do_load_or_store32(p, False/*!isLoad*/,
3215                                /*r*/12, i->ARMin.XDirect.amR15T);
3216
3217         /* --- FIRST PATCHABLE BYTE follows --- */
3218         /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3219            calling to) backs up the return address, so as to find the
3220            address of the first patchable byte.  So: don't change the
3221            number of instructions (3) below. */
3222         /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3223         /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3224         /* blx  r12  (A1) */
3225         const void* disp_cp_chain_me
3226                  = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3227                                              : disp_cp_chain_me_to_slowEP;
3228         p = imm32_to_ireg_EXACTLY2(p, /*r*/12,
3229                                    (UInt)(Addr)disp_cp_chain_me);
3230         *p++ = 0xE12FFF3C;
3231         /* --- END of PATCHABLE BYTES --- */
3232
3233         /* Fix up the conditional jump, if there was one. */
3234         if (i->ARMin.XDirect.cond != ARMcc_AL) {
3235            Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3236            vassert(delta > 0 && delta < 40);
3237            vassert((delta & 3) == 0);
3238            UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
3239            vassert(notCond <= 13); /* Neither AL nor NV */
3240            delta = (delta >> 2) - 2;
3241            *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3242         }
3243         goto done;
3244      }
3245
3246      case ARMin_XIndir: {
3247         /* We're generating transfers that could lead indirectly to a
3248            chain-me, so we need to be sure this is actually allowed
3249            -- no-redir translations are not allowed to reach normal
3250            translations without going through the scheduler.  That
3251            means no XDirects or XIndirs out from no-redir
3252            translations.  Hence: */
3253         vassert(disp_cp_xindir != NULL);
3254
3255         /* Use ptmp for backpatching conditional jumps. */
3256         UInt* ptmp = NULL;
3257
3258         /* First off, if this is conditional, create a conditional
3259            jump over the rest of it.  Or at least, leave a space for
3260            it that we will shortly fill in. */
3261         if (i->ARMin.XIndir.cond != ARMcc_AL) {
3262            vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3263            ptmp = p;
3264            *p++ = 0;
3265         }
3266
3267         /* Update the guest R15T. */
3268         /* str r-dstGA, amR15T */
3269         p = do_load_or_store32(p, False/*!isLoad*/,
3270                                iregEnc(i->ARMin.XIndir.dstGA),
3271                                i->ARMin.XIndir.amR15T);
3272
3273         /* movw r12, lo16(VG_(disp_cp_xindir)) */
3274         /* movt r12, hi16(VG_(disp_cp_xindir)) */
3275         /* bx   r12  (A1) */
3276         p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xindir);
3277         *p++ = 0xE12FFF1C;
3278
3279         /* Fix up the conditional jump, if there was one. */
3280         if (i->ARMin.XIndir.cond != ARMcc_AL) {
3281            Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3282            vassert(delta > 0 && delta < 40);
3283            vassert((delta & 3) == 0);
3284            UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3285            vassert(notCond <= 13); /* Neither AL nor NV */
3286            delta = (delta >> 2) - 2;
3287            *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3288         }
3289         goto done;
3290      }
3291
3292      case ARMin_XAssisted: {
3293         /* Use ptmp for backpatching conditional jumps. */
3294         UInt* ptmp = NULL;
3295
3296         /* First off, if this is conditional, create a conditional
3297            jump over the rest of it.  Or at least, leave a space for
3298            it that we will shortly fill in. */
3299         if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3300            vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
3301            ptmp = p;
3302            *p++ = 0;
3303         }
3304
3305         /* Update the guest R15T. */
3306         /* str r-dstGA, amR15T */
3307         p = do_load_or_store32(p, False/*!isLoad*/,
3308                                iregEnc(i->ARMin.XAssisted.dstGA),
3309                                i->ARMin.XAssisted.amR15T);
3310
3311         /* movw r8,  $magic_number */
3312         UInt trcval = 0;
3313         switch (i->ARMin.XAssisted.jk) {
3314            case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
3315            case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3316            //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
3317            case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
3318            //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
3319            //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
3320            case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
3321            case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3322            case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
3323            //case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
3324            //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
3325            case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
3326            /* We don't expect to see the following being assisted. */
3327            //case Ijk_Ret:
3328            //case Ijk_Call:
3329            /* fallthrough */
3330            default:
3331               ppIRJumpKind(i->ARMin.XAssisted.jk);
3332               vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
3333         }
3334         vassert(trcval != 0);
3335         p = imm32_to_ireg(p, /*r*/8, trcval);
3336
3337         /* movw r12, lo16(VG_(disp_cp_xassisted)) */
3338         /* movt r12, hi16(VG_(disp_cp_xassisted)) */
3339         /* bx   r12  (A1) */
3340         p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xassisted);
3341         *p++ = 0xE12FFF1C;
3342
3343         /* Fix up the conditional jump, if there was one. */
3344         if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3345            Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3346            vassert(delta > 0 && delta < 40);
3347            vassert((delta & 3) == 0);
3348            UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
3349            vassert(notCond <= 13); /* Neither AL nor NV */
3350            delta = (delta >> 2) - 2;
3351            *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3352         }
3353         goto done;
3354      }
3355
3356      case ARMin_CMov: {
3357         UInt instr  = skeletal_RI84(i->ARMin.CMov.src);
3358         UInt subopc = X1101; /* MOV */
3359         UInt SBZ    = 0;
3360         instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
3361                           (subopc << 1) & 0xF, SBZ,
3362                           iregEnc(i->ARMin.CMov.dst));
3363         *p++ = instr;
3364         goto done;
3365      }
3366
3367      case ARMin_Call: {
3368         UInt instr;
3369         /* Decide on a scratch reg used to hold to the call address.
3370            This has to be done as per the comments in getRegUsage. */
3371         Int scratchNo;
3372         switch (i->ARMin.Call.nArgRegs) {
3373            case 0:  scratchNo = 0;  break;
3374            case 1:  scratchNo = 1;  break;
3375            case 2:  scratchNo = 2;  break;
3376            case 3:  scratchNo = 3;  break;
3377            case 4:  scratchNo = 11; break;
3378            default: vassert(0);
3379         }
3380         /* If we don't need to do any fixup actions in the case that
3381            the call doesn't happen, just do the simple thing and emit
3382            straight-line code.  We hope this is the common case. */
3383         if (i->ARMin.Call.cond == ARMcc_AL/*call always happens*/
3384             || i->ARMin.Call.rloc.pri == RLPri_None/*no fixup action*/) {
3385            // r"scratchNo" = &target
3386            p = imm32_to_ireg( (UInt*)p,
3387                               scratchNo, (UInt)i->ARMin.Call.target );
3388            // blx{cond} r"scratchNo"
3389            instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
3390                             X0011, scratchNo);
3391            instr |= 0xFFF << 8; // stick in the SBOnes
3392            *p++ = instr;
3393         } else {
3394            Int delta;
3395            /* Complex case.  We have to generate an if-then-else
3396               diamond. */
3397            // before:
3398            //   b{!cond} else:
3399            //   r"scratchNo" = &target
3400            //   blx{AL} r"scratchNo"
3401            // preElse:
3402            //   b after:
3403            // else:
3404            //   mov r0, #0x55555555  // possibly
3405            //   mov r1, r0           // possibly
3406            // after:
3407
3408            // before:
3409            UInt* pBefore = p;
3410
3411            //   b{!cond} else:  // ptmp1 points here
3412            *p++ = 0; // filled in later
3413
3414            //   r"scratchNo" = &target
3415            p = imm32_to_ireg( (UInt*)p,
3416                               scratchNo, (UInt)i->ARMin.Call.target );
3417
3418            //   blx{AL} r"scratchNo"
3419            instr = XXX___XX(ARMcc_AL, X0001, X0010, /*___*/
3420                             X0011, scratchNo);
3421            instr |= 0xFFF << 8; // stick in the SBOnes
3422            *p++ = instr;
3423
3424            // preElse:
3425            UInt* pPreElse = p;
3426
3427            //   b after:
3428            *p++ = 0; // filled in later
3429
3430            // else:
3431            delta = (UChar*)p - (UChar*)pBefore;
3432            delta = (delta >> 2) - 2;
3433            *pBefore
3434               = XX______(1 ^ i->ARMin.Call.cond, X1010) | (delta & 0xFFFFFF);
3435
3436            /* Do the 'else' actions */
3437            switch (i->ARMin.Call.rloc.pri) {
3438               case RLPri_Int:
3439                  p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3440                  break;
3441               case RLPri_2Int:
3442                  vassert(0); //ATC
3443                  p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3444                  /* mov r1, r0 */
3445                  *p++ = 0xE1A01000;
3446                  break;
3447               case RLPri_None: case RLPri_INVALID: default:
3448                  vassert(0);
3449            }
3450
3451            // after:
3452            delta = (UChar*)p - (UChar*)pPreElse;
3453            delta = (delta >> 2) - 2;
3454            *pPreElse = XX______(ARMcc_AL, X1010) | (delta & 0xFFFFFF);
3455         }
3456
3457         goto done;
3458      }
3459
3460      case ARMin_Mul: {
3461         /* E0000392   mul     r0, r2, r3
3462            E0810392   umull   r0(LO), r1(HI), r2, r3
3463            E0C10392   smull   r0(LO), r1(HI), r2, r3
3464         */
3465         switch (i->ARMin.Mul.op) {
3466            case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
3467            case ARMmul_ZX:    *p++ = 0xE0810392; goto done;
3468            case ARMmul_SX:    *p++ = 0xE0C10392; goto done;
3469            default: vassert(0);
3470         }
3471         goto bad;
3472      }
3473      case ARMin_LdrEX: {
3474         /* E1D42F9F   ldrexb r2, [r4]
3475            E1F42F9F   ldrexh r2, [r4]
3476            E1942F9F   ldrex  r2, [r4]
3477            E1B42F9F   ldrexd r2, r3, [r4]
3478         */
3479         switch (i->ARMin.LdrEX.szB) {
3480            case 1: *p++ = 0xE1D42F9F; goto done;
3481            case 2: *p++ = 0xE1F42F9F; goto done;
3482            case 4: *p++ = 0xE1942F9F; goto done;
3483            case 8: *p++ = 0xE1B42F9F; goto done;
3484            default: break;
3485         }
3486         goto bad;
3487      }
3488      case ARMin_StrEX: {
3489         /* E1C40F92   strexb r0, r2, [r4]
3490            E1E40F92   strexh r0, r2, [r4]
3491            E1840F92   strex  r0, r2, [r4]
3492            E1A40F92   strexd r0, r2, r3, [r4]
3493         */
3494         switch (i->ARMin.StrEX.szB) {
3495            case 1: *p++ = 0xE1C40F92; goto done;
3496            case 2: *p++ = 0xE1E40F92; goto done;
3497            case 4: *p++ = 0xE1840F92; goto done;
3498            case 8: *p++ = 0xE1A40F92; goto done;
3499            default: break;
3500         }
3501         goto bad;
3502      }
3503      case ARMin_VLdStD: {
3504         UInt dD     = dregEnc(i->ARMin.VLdStD.dD);
3505         UInt rN     = iregEnc(i->ARMin.VLdStD.amode->reg);
3506         Int  simm11 = i->ARMin.VLdStD.amode->simm11;
3507         UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3508         UInt bU     = simm11 >= 0 ? 1 : 0;
3509         UInt bL     = i->ARMin.VLdStD.isLoad ? 1 : 0;
3510         UInt insn;
3511         vassert(0 == (off8 & 3));
3512         off8 >>= 2;
3513         vassert(0 == (off8 & 0xFFFFFF00));
3514         insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3515         insn |= off8;
3516         *p++ = insn;
3517         goto done;
3518      }
3519      case ARMin_VLdStS: {
3520         UInt fD     = fregEnc(i->ARMin.VLdStS.fD);
3521         UInt rN     = iregEnc(i->ARMin.VLdStS.amode->reg);
3522         Int  simm11 = i->ARMin.VLdStS.amode->simm11;
3523         UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3524         UInt bU     = simm11 >= 0 ? 1 : 0;
3525         UInt bL     = i->ARMin.VLdStS.isLoad ? 1 : 0;
3526         UInt bD     = fD & 1;
3527         UInt insn;
3528         vassert(0 == (off8 & 3));
3529         off8 >>= 2;
3530         vassert(0 == (off8 & 0xFFFFFF00));
3531         insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3532         insn |= off8;
3533         *p++ = insn;
3534         goto done;
3535      }
3536      case ARMin_VAluD: {
3537         UInt dN = dregEnc(i->ARMin.VAluD.argL);
3538         UInt dD = dregEnc(i->ARMin.VAluD.dst);
3539         UInt dM = dregEnc(i->ARMin.VAluD.argR);
3540         UInt pqrs = X1111; /* undefined */
3541         switch (i->ARMin.VAluD.op) {
3542            case ARMvfp_ADD: pqrs = X0110; break;
3543            case ARMvfp_SUB: pqrs = X0111; break;
3544            case ARMvfp_MUL: pqrs = X0100; break;
3545            case ARMvfp_DIV: pqrs = X1000; break;
3546            default: goto bad;
3547         }
3548         vassert(pqrs != X1111);
3549         UInt bP  = (pqrs >> 3) & 1;
3550         UInt bQ  = (pqrs >> 2) & 1;
3551         UInt bR  = (pqrs >> 1) & 1;
3552         UInt bS  = (pqrs >> 0) & 1;
3553         UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3554                              X1011, BITS4(0,bS,0,0), dM);
3555         *p++ = insn;
3556         goto done;
3557      }
3558      case ARMin_VAluS: {
3559         UInt dN = fregEnc(i->ARMin.VAluS.argL);
3560         UInt dD = fregEnc(i->ARMin.VAluS.dst);
3561         UInt dM = fregEnc(i->ARMin.VAluS.argR);
3562         UInt bN = dN & 1;
3563         UInt bD = dD & 1;
3564         UInt bM = dM & 1;
3565         UInt pqrs = X1111; /* undefined */
3566         switch (i->ARMin.VAluS.op) {
3567            case ARMvfp_ADD: pqrs = X0110; break;
3568            case ARMvfp_SUB: pqrs = X0111; break;
3569            case ARMvfp_MUL: pqrs = X0100; break;
3570            case ARMvfp_DIV: pqrs = X1000; break;
3571            default: goto bad;
3572         }
3573         vassert(pqrs != X1111);
3574         UInt bP  = (pqrs >> 3) & 1;
3575         UInt bQ  = (pqrs >> 2) & 1;
3576         UInt bR  = (pqrs >> 1) & 1;
3577         UInt bS  = (pqrs >> 0) & 1;
3578         UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3579                              (dN >> 1), (dD >> 1),
3580                              X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3581         *p++ = insn;
3582         goto done;
3583      }
3584      case ARMin_VUnaryD: {
3585         UInt dD   = dregEnc(i->ARMin.VUnaryD.dst);
3586         UInt dM   = dregEnc(i->ARMin.VUnaryD.src);
3587         UInt insn = 0;
3588         switch (i->ARMin.VUnaryD.op) {
3589            case ARMvfpu_COPY:
3590               insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3591               break;
3592            case ARMvfpu_ABS:
3593               insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3594               break;
3595            case ARMvfpu_NEG:
3596               insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3597               break;
3598            case ARMvfpu_SQRT:
3599               insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3600               break;
3601            default:
3602               goto bad;
3603         }
3604         *p++ = insn;
3605         goto done;
3606      }
3607      case ARMin_VUnaryS: {
3608         UInt fD   = fregEnc(i->ARMin.VUnaryS.dst);
3609         UInt fM   = fregEnc(i->ARMin.VUnaryS.src);
3610         UInt insn = 0;
3611         switch (i->ARMin.VUnaryS.op) {
3612            case ARMvfpu_COPY:
3613               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3614                               (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3615                               (fM >> 1));
3616               break;
3617            case ARMvfpu_ABS:
3618               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3619                               (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3620                               (fM >> 1));
3621               break;
3622            case ARMvfpu_NEG:
3623               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3624                               (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3625                               (fM >> 1));
3626               break;
3627            case ARMvfpu_SQRT:
3628               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3629                               (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3630                               (fM >> 1));
3631               break;
3632            default:
3633               goto bad;
3634         }
3635         *p++ = insn;
3636         goto done;
3637      }
3638      case ARMin_VCmpD: {
3639         UInt dD   = dregEnc(i->ARMin.VCmpD.argL);
3640         UInt dM   = dregEnc(i->ARMin.VCmpD.argR);
3641         UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3642         *p++ = insn;       /* FCMPD dD, dM */
3643         *p++ = 0xEEF1FA10; /* FMSTAT */
3644         goto done;
3645      }
3646      case ARMin_VCMovD: {
3647         UInt cc = (UInt)i->ARMin.VCMovD.cond;
3648         UInt dD = dregEnc(i->ARMin.VCMovD.dst);
3649         UInt dM = dregEnc(i->ARMin.VCMovD.src);
3650         vassert(cc < 16 && cc != ARMcc_AL);
3651         UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3652         *p++ = insn;
3653         goto done;
3654      }
3655      case ARMin_VCMovS: {
3656         UInt cc = (UInt)i->ARMin.VCMovS.cond;
3657         UInt fD = fregEnc(i->ARMin.VCMovS.dst);
3658         UInt fM = fregEnc(i->ARMin.VCMovS.src);
3659         vassert(cc < 16 && cc != ARMcc_AL);
3660         UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3661                              X0000,(fD >> 1),X1010,
3662                              BITS4(0,1,(fM & 1),0), (fM >> 1));
3663         *p++ = insn;
3664         goto done;
3665      }
3666      case ARMin_VCvtSD: {
3667         if (i->ARMin.VCvtSD.sToD) {
3668            UInt dD = dregEnc(i->ARMin.VCvtSD.dst);
3669            UInt fM = fregEnc(i->ARMin.VCvtSD.src);
3670            UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3671                                 BITS4(1,1, (fM & 1), 0),
3672                                 (fM >> 1));
3673            *p++ = insn;
3674            goto done;
3675         } else {
3676            UInt fD = fregEnc(i->ARMin.VCvtSD.dst);
3677            UInt dM = dregEnc(i->ARMin.VCvtSD.src);
3678            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3679                                 X0111, (fD >> 1),
3680                                 X1011, X1100, dM);
3681            *p++ = insn;
3682            goto done;
3683         }
3684      }
3685      case ARMin_VXferD: {
3686         UInt dD  = dregEnc(i->ARMin.VXferD.dD);
3687         UInt rHi = iregEnc(i->ARMin.VXferD.rHi);
3688         UInt rLo = iregEnc(i->ARMin.VXferD.rLo);
3689         /* vmov dD, rLo, rHi is
3690            E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3691            vmov rLo, rHi, dD is
3692            E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3693         */
3694         UInt insn
3695            = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3696                       rHi, rLo, 0xB,
3697                       BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3698         *p++ = insn;
3699         goto done;
3700      }
3701      case ARMin_VXferS: {
3702         UInt fD  = fregEnc(i->ARMin.VXferS.fD);
3703         UInt rLo = iregEnc(i->ARMin.VXferS.rLo);
3704         /* vmov fD, rLo is
3705            E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3706            vmov rLo, fD is
3707            E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3708         */
3709         UInt insn
3710            = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3711                       (fD >> 1) & 0xF, rLo, 0xA,
3712                       BITS4((fD & 1),0,0,1), 0);
3713         *p++ = insn;
3714         goto done;
3715      }
3716      case ARMin_VCvtID: {
3717         Bool iToD = i->ARMin.VCvtID.iToD;
3718         Bool syned = i->ARMin.VCvtID.syned;
3719         if (iToD && syned) {
3720            // FSITOD: I32S-in-freg to F64-in-dreg
3721            UInt regF = fregEnc(i->ARMin.VCvtID.src);
3722            UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3723            UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3724                                 X1011, BITS4(1,1,(regF & 1),0),
3725                                 (regF >> 1) & 0xF);
3726            *p++ = insn;
3727            goto done;
3728         }
3729         if (iToD && (!syned)) {
3730            // FUITOD: I32U-in-freg to F64-in-dreg
3731            UInt regF = fregEnc(i->ARMin.VCvtID.src);
3732            UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3733            UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3734                                 X1011, BITS4(0,1,(regF & 1),0),
3735                                 (regF >> 1) & 0xF);
3736            *p++ = insn;
3737            goto done;
3738         }
3739         if ((!iToD) && syned) {
3740            // FTOSID: F64-in-dreg to I32S-in-freg
3741            UInt regD = dregEnc(i->ARMin.VCvtID.src);
3742            UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3743            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3744                                 X1101, (regF >> 1) & 0xF,
3745                                 X1011, X0100, regD);
3746            *p++ = insn;
3747            goto done;
3748         }
3749         if ((!iToD) && (!syned)) {
3750            // FTOUID: F64-in-dreg to I32U-in-freg
3751            UInt regD = dregEnc(i->ARMin.VCvtID.src);
3752            UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3753            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3754                                 X1100, (regF >> 1) & 0xF,
3755                                 X1011, X0100, regD);
3756            *p++ = insn;
3757            goto done;
3758         }
3759         /*UNREACHED*/
3760         vassert(0);
3761      }
3762      case ARMin_FPSCR: {
3763         Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3764         UInt iReg    = iregEnc(i->ARMin.FPSCR.iReg);
3765         if (toFPSCR) {
3766            /* fmxr fpscr, iReg is EEE1 iReg A10 */
3767            *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3768            goto done;
3769         }
3770         goto bad; // FPSCR -> iReg case currently ATC
3771      }
3772      case ARMin_MFence: {
3773         // It's not clear (to me) how these relate to the ARMv7
3774         // versions, so let's just use the v7 versions as they
3775         // are at least well documented.
3776         //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3777         //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3778         //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
3779         *p++ = 0xF57FF04F; /* DSB sy */
3780         *p++ = 0xF57FF05F; /* DMB sy */
3781         *p++ = 0xF57FF06F; /* ISB */
3782         goto done;
3783      }
3784      case ARMin_CLREX: {
3785         *p++ = 0xF57FF01F; /* clrex */
3786         goto done;
3787      }
3788
3789      case ARMin_NLdStQ: {
3790         UInt regD = qregEnc(i->ARMin.NLdStQ.dQ) << 1;
3791         UInt regN, regM;
3792         UInt D = regD >> 4;
3793         UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
3794         UInt insn;
3795         vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
3796         regD &= 0xF;
3797         if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
3798            regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
3799            regM = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
3800         } else {
3801            regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
3802            regM = 15;
3803         }
3804         insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3805                              regN, regD, X1010, X1000, regM);
3806         *p++ = insn;
3807         goto done;
3808      }
3809      case ARMin_NLdStD: {
3810         UInt regD = dregEnc(i->ARMin.NLdStD.dD);
3811         UInt regN, regM;
3812         UInt D = regD >> 4;
3813         UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
3814         UInt insn;
3815         vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
3816         regD &= 0xF;
3817         if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
3818            regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
3819            regM = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
3820         } else {
3821            regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.R.rN);
3822            regM = 15;
3823         }
3824         insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3825                              regN, regD, X0111, X1000, regM);
3826         *p++ = insn;
3827         goto done;
3828      }
3829      case ARMin_NUnaryS: {
3830         UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
3831         UInt regD, D;
3832         UInt regM, M;
3833         UInt size = i->ARMin.NUnaryS.size;
3834         UInt insn;
3835         UInt opc, opc1, opc2;
3836         switch (i->ARMin.NUnaryS.op) {
3837	    case ARMneon_VDUP:
3838               if (i->ARMin.NUnaryS.size >= 16)
3839                  goto bad;
3840               if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
3841                  goto bad;
3842               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3843                  goto bad;
3844               regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
3845                        ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1)
3846                        : dregEnc(i->ARMin.NUnaryS.dst->reg);
3847               regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
3848                        ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1)
3849                        : dregEnc(i->ARMin.NUnaryS.src->reg);
3850               D = regD >> 4;
3851               M = regM >> 4;
3852               regD &= 0xf;
3853               regM &= 0xf;
3854               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
3855                               (i->ARMin.NUnaryS.size & 0xf), regD,
3856                               X1100, BITS4(0,Q,M,0), regM);
3857               *p++ = insn;
3858               goto done;
3859            case ARMneon_SETELEM:
3860               regD = Q ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1) :
3861                                dregEnc(i->ARMin.NUnaryS.dst->reg);
3862               regM = iregEnc(i->ARMin.NUnaryS.src->reg);
3863               M = regM >> 4;
3864               D = regD >> 4;
3865               regM &= 0xF;
3866               regD &= 0xF;
3867               if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
3868                  goto bad;
3869               switch (size) {
3870                  case 0:
3871                     if (i->ARMin.NUnaryS.dst->index > 7)
3872                        goto bad;
3873                     opc = X1000 | i->ARMin.NUnaryS.dst->index;
3874                     break;
3875                  case 1:
3876                     if (i->ARMin.NUnaryS.dst->index > 3)
3877                        goto bad;
3878                     opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
3879                     break;
3880                  case 2:
3881                     if (i->ARMin.NUnaryS.dst->index > 1)
3882                        goto bad;
3883                     opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
3884                     break;
3885                  default:
3886                     goto bad;
3887               }
3888               opc1 = (opc >> 2) & 3;
3889               opc2 = opc & 3;
3890               insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
3891                               regD, regM, X1011,
3892                               BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
3893               *p++ = insn;
3894               goto done;
3895            case ARMneon_GETELEMU:
3896               regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
3897                                dregEnc(i->ARMin.NUnaryS.src->reg);
3898               regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
3899               M = regM >> 4;
3900               D = regD >> 4;
3901               regM &= 0xF;
3902               regD &= 0xF;
3903               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3904                  goto bad;
3905               switch (size) {
3906                  case 0:
3907                     if (Q && i->ARMin.NUnaryS.src->index > 7) {
3908                        regM++;
3909                        i->ARMin.NUnaryS.src->index -= 8;
3910                     }
3911                     if (i->ARMin.NUnaryS.src->index > 7)
3912                        goto bad;
3913                     opc = X1000 | i->ARMin.NUnaryS.src->index;
3914                     break;
3915                  case 1:
3916                     if (Q && i->ARMin.NUnaryS.src->index > 3) {
3917                        regM++;
3918                        i->ARMin.NUnaryS.src->index -= 4;
3919                     }
3920                     if (i->ARMin.NUnaryS.src->index > 3)
3921                        goto bad;
3922                     opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3923                     break;
3924                  case 2:
3925                     goto bad;
3926                  default:
3927                     goto bad;
3928               }
3929               opc1 = (opc >> 2) & 3;
3930               opc2 = opc & 3;
3931               insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
3932                               regM, regD, X1011,
3933                               BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3934               *p++ = insn;
3935               goto done;
3936            case ARMneon_GETELEMS:
3937               regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
3938                                dregEnc(i->ARMin.NUnaryS.src->reg);
3939               regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
3940               M = regM >> 4;
3941               D = regD >> 4;
3942               regM &= 0xF;
3943               regD &= 0xF;
3944               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3945                  goto bad;
3946               switch (size) {
3947                  case 0:
3948                     if (Q && i->ARMin.NUnaryS.src->index > 7) {
3949                        regM++;
3950                        i->ARMin.NUnaryS.src->index -= 8;
3951                     }
3952                     if (i->ARMin.NUnaryS.src->index > 7)
3953                        goto bad;
3954                     opc = X1000 | i->ARMin.NUnaryS.src->index;
3955                     break;
3956                  case 1:
3957                     if (Q && i->ARMin.NUnaryS.src->index > 3) {
3958                        regM++;
3959                        i->ARMin.NUnaryS.src->index -= 4;
3960                     }
3961                     if (i->ARMin.NUnaryS.src->index > 3)
3962                        goto bad;
3963                     opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3964                     break;
3965                  case 2:
3966                     if (Q && i->ARMin.NUnaryS.src->index > 1) {
3967                        regM++;
3968                        i->ARMin.NUnaryS.src->index -= 2;
3969                     }
3970                     if (i->ARMin.NUnaryS.src->index > 1)
3971                        goto bad;
3972                     opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
3973                     break;
3974                  default:
3975                     goto bad;
3976               }
3977               opc1 = (opc >> 2) & 3;
3978               opc2 = opc & 3;
3979               insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
3980                               regM, regD, X1011,
3981                               BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3982               *p++ = insn;
3983               goto done;
3984            default:
3985               goto bad;
3986         }
3987      }
3988      case ARMin_NUnary: {
3989         UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
3990         UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
3991                       ? (qregEnc(i->ARMin.NUnary.dst) << 1)
3992                       : dregEnc(i->ARMin.NUnary.dst);
3993         UInt regM, M;
3994         UInt D = regD >> 4;
3995         UInt sz1 = i->ARMin.NUnary.size >> 1;
3996         UInt sz2 = i->ARMin.NUnary.size & 1;
3997         UInt sz = i->ARMin.NUnary.size;
3998         UInt insn;
3999         UInt F = 0; /* TODO: floating point EQZ ??? */
4000         if (i->ARMin.NUnary.op != ARMneon_DUP) {
4001            regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
4002                     ? (qregEnc(i->ARMin.NUnary.src) << 1)
4003                     : dregEnc(i->ARMin.NUnary.src);
4004            M = regM >> 4;
4005         } else {
4006            regM = iregEnc(i->ARMin.NUnary.src);
4007            M = regM >> 4;
4008         }
4009         regD &= 0xF;
4010         regM &= 0xF;
4011         switch (i->ARMin.NUnary.op) {
4012            case ARMneon_COPY: /* VMOV reg, reg */
4013               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
4014                               BITS4(M,Q,M,1), regM);
4015               break;
4016            case ARMneon_COPYN: /* VMOVN regD, regQ */
4017               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4018                               regD, X0010, BITS4(0,0,M,0), regM);
4019               break;
4020            case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
4021               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4022                               regD, X0010, BITS4(1,0,M,0), regM);
4023               break;
4024            case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
4025               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4026                               regD, X0010, BITS4(0,1,M,0), regM);
4027               break;
4028            case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
4029               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4030                               regD, X0010, BITS4(1,1,M,0), regM);
4031               break;
4032            case ARMneon_COPYLS: /* VMOVL regQ, regD */
4033               if (sz >= 3)
4034                  goto bad;
4035               insn = XXXXXXXX(0xF, X0010,
4036                               BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4037                               BITS4((sz == 0) ? 1 : 0,0,0,0),
4038                               regD, X1010, BITS4(0,0,M,1), regM);
4039               break;
4040            case ARMneon_COPYLU: /* VMOVL regQ, regD */
4041               if (sz >= 3)
4042                  goto bad;
4043               insn = XXXXXXXX(0xF, X0011,
4044                               BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4045                               BITS4((sz == 0) ? 1 : 0,0,0,0),
4046                               regD, X1010, BITS4(0,0,M,1), regM);
4047               break;
4048            case ARMneon_NOT: /* VMVN reg, reg*/
4049               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4050                               BITS4(1,Q,M,0), regM);
4051               break;
4052            case ARMneon_EQZ:
4053               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4054                               regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
4055               break;
4056            case ARMneon_CNT:
4057               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4058                               BITS4(0,Q,M,0), regM);
4059               break;
4060            case ARMneon_CLZ:
4061               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4062                               regD, X0100, BITS4(1,Q,M,0), regM);
4063               break;
4064            case ARMneon_CLS:
4065               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4066                               regD, X0100, BITS4(0,Q,M,0), regM);
4067               break;
4068            case ARMneon_ABS:
4069               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4070                               regD, X0011, BITS4(0,Q,M,0), regM);
4071               break;
4072            case ARMneon_DUP:
4073               sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
4074               sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
4075               vassert(sz1 + sz2 < 2);
4076               insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
4077                               X1011, BITS4(D,0,sz2,1), X0000);
4078               break;
4079            case ARMneon_REV16:
4080               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4081                               regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
4082               break;
4083            case ARMneon_REV32:
4084               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4085                               regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
4086               break;
4087            case ARMneon_REV64:
4088               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4089                               regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
4090               break;
4091            case ARMneon_PADDLU:
4092               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4093                               regD, X0010, BITS4(1,Q,M,0), regM);
4094               break;
4095            case ARMneon_PADDLS:
4096               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4097                               regD, X0010, BITS4(0,Q,M,0), regM);
4098               break;
4099            case ARMneon_VQSHLNUU:
4100               insn = XXXXXXXX(0xF, X0011,
4101                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4102                               sz & 0xf, regD, X0111,
4103                               BITS4(sz >> 6,Q,M,1), regM);
4104               break;
4105            case ARMneon_VQSHLNSS:
4106               insn = XXXXXXXX(0xF, X0010,
4107                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4108                               sz & 0xf, regD, X0111,
4109                               BITS4(sz >> 6,Q,M,1), regM);
4110               break;
4111            case ARMneon_VQSHLNUS:
4112               insn = XXXXXXXX(0xF, X0011,
4113                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4114                               sz & 0xf, regD, X0110,
4115                               BITS4(sz >> 6,Q,M,1), regM);
4116               break;
4117            case ARMneon_VCVTFtoS:
4118               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4119                               BITS4(0,Q,M,0), regM);
4120               break;
4121            case ARMneon_VCVTFtoU:
4122               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4123                               BITS4(1,Q,M,0), regM);
4124               break;
4125            case ARMneon_VCVTStoF:
4126               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4127                               BITS4(0,Q,M,0), regM);
4128               break;
4129            case ARMneon_VCVTUtoF:
4130               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4131                               BITS4(1,Q,M,0), regM);
4132               break;
4133            case ARMneon_VCVTFtoFixedU:
4134               sz1 = (sz >> 5) & 1;
4135               sz2 = (sz >> 4) & 1;
4136               sz &= 0xf;
4137               insn = XXXXXXXX(0xF, X0011,
4138                               BITS4(1,D,sz1,sz2), sz, regD, X1111,
4139                               BITS4(0,Q,M,1), regM);
4140               break;
4141            case ARMneon_VCVTFtoFixedS:
4142               sz1 = (sz >> 5) & 1;
4143               sz2 = (sz >> 4) & 1;
4144               sz &= 0xf;
4145               insn = XXXXXXXX(0xF, X0010,
4146                               BITS4(1,D,sz1,sz2), sz, regD, X1111,
4147                               BITS4(0,Q,M,1), regM);
4148               break;
4149            case ARMneon_VCVTFixedUtoF:
4150               sz1 = (sz >> 5) & 1;
4151               sz2 = (sz >> 4) & 1;
4152               sz &= 0xf;
4153               insn = XXXXXXXX(0xF, X0011,
4154                               BITS4(1,D,sz1,sz2), sz, regD, X1110,
4155                               BITS4(0,Q,M,1), regM);
4156               break;
4157            case ARMneon_VCVTFixedStoF:
4158               sz1 = (sz >> 5) & 1;
4159               sz2 = (sz >> 4) & 1;
4160               sz &= 0xf;
4161               insn = XXXXXXXX(0xF, X0010,
4162                               BITS4(1,D,sz1,sz2), sz, regD, X1110,
4163                               BITS4(0,Q,M,1), regM);
4164               break;
4165            case ARMneon_VCVTF32toF16:
4166               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
4167                               BITS4(0,0,M,0), regM);
4168               break;
4169            case ARMneon_VCVTF16toF32:
4170               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
4171                               BITS4(0,0,M,0), regM);
4172               break;
4173            case ARMneon_VRECIP:
4174               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4175                               BITS4(0,Q,M,0), regM);
4176               break;
4177            case ARMneon_VRECIPF:
4178               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4179                               BITS4(0,Q,M,0), regM);
4180               break;
4181            case ARMneon_VABSFP:
4182               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4183                               BITS4(0,Q,M,0), regM);
4184               break;
4185            case ARMneon_VRSQRTEFP:
4186               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4187                               BITS4(1,Q,M,0), regM);
4188               break;
4189            case ARMneon_VRSQRTE:
4190               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4191                               BITS4(1,Q,M,0), regM);
4192               break;
4193            case ARMneon_VNEGF:
4194               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4195                               BITS4(1,Q,M,0), regM);
4196               break;
4197
4198            default:
4199               goto bad;
4200         }
4201         *p++ = insn;
4202         goto done;
4203      }
4204      case ARMin_NDual: {
4205         UInt Q = i->ARMin.NDual.Q ? 1 : 0;
4206         UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
4207                       ? (qregEnc(i->ARMin.NDual.arg1) << 1)
4208                       : dregEnc(i->ARMin.NDual.arg1);
4209         UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
4210                       ? (qregEnc(i->ARMin.NDual.arg2) << 1)
4211                       : dregEnc(i->ARMin.NDual.arg2);
4212         UInt D = regD >> 4;
4213         UInt M = regM >> 4;
4214         UInt sz1 = i->ARMin.NDual.size >> 1;
4215         UInt sz2 = i->ARMin.NDual.size & 1;
4216         UInt insn;
4217         regD &= 0xF;
4218         regM &= 0xF;
4219         switch (i->ARMin.NDual.op) {
4220            case ARMneon_TRN: /* VTRN reg, reg */
4221               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4222                               regD, X0000, BITS4(1,Q,M,0), regM);
4223               break;
4224            case ARMneon_ZIP: /* VZIP reg, reg */
4225               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4226                               regD, X0001, BITS4(1,Q,M,0), regM);
4227               break;
4228            case ARMneon_UZP: /* VUZP reg, reg */
4229               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4230                               regD, X0001, BITS4(0,Q,M,0), regM);
4231               break;
4232            default:
4233               goto bad;
4234         }
4235         *p++ = insn;
4236         goto done;
4237      }
4238      case ARMin_NBinary: {
4239         UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
4240         UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
4241                       ? (qregEnc(i->ARMin.NBinary.dst) << 1)
4242                       : dregEnc(i->ARMin.NBinary.dst);
4243         UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
4244                       ? (qregEnc(i->ARMin.NBinary.argL) << 1)
4245                       : dregEnc(i->ARMin.NBinary.argL);
4246         UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
4247                       ? (qregEnc(i->ARMin.NBinary.argR) << 1)
4248                       : dregEnc(i->ARMin.NBinary.argR);
4249         UInt sz1 = i->ARMin.NBinary.size >> 1;
4250         UInt sz2 = i->ARMin.NBinary.size & 1;
4251         UInt D = regD >> 4;
4252         UInt N = regN >> 4;
4253         UInt M = regM >> 4;
4254         UInt insn;
4255         regD &= 0xF;
4256         regM &= 0xF;
4257         regN &= 0xF;
4258         switch (i->ARMin.NBinary.op) {
4259            case ARMneon_VAND: /* VAND reg, reg, reg */
4260               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
4261                               BITS4(N,Q,M,1), regM);
4262               break;
4263            case ARMneon_VORR: /* VORR reg, reg, reg*/
4264               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
4265                               BITS4(N,Q,M,1), regM);
4266               break;
4267            case ARMneon_VXOR: /* VEOR reg, reg, reg */
4268               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
4269                               BITS4(N,Q,M,1), regM);
4270               break;
4271            case ARMneon_VADD: /* VADD reg, reg, reg */
4272               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4273                               X1000, BITS4(N,Q,M,0), regM);
4274               break;
4275            case ARMneon_VSUB: /* VSUB reg, reg, reg */
4276               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4277                               X1000, BITS4(N,Q,M,0), regM);
4278               break;
4279            case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
4280               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4281                               X0110, BITS4(N,Q,M,1), regM);
4282               break;
4283            case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
4284               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4285                               X0110, BITS4(N,Q,M,1), regM);
4286               break;
4287            case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
4288               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4289                               X0110, BITS4(N,Q,M,0), regM);
4290               break;
4291            case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
4292               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4293                               X0110, BITS4(N,Q,M,0), regM);
4294               break;
4295            case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
4296               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4297                               X0001, BITS4(N,Q,M,0), regM);
4298               break;
4299            case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
4300               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4301                               X0001, BITS4(N,Q,M,0), regM);
4302               break;
4303            case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
4304               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4305                               X0000, BITS4(N,Q,M,1), regM);
4306               break;
4307            case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
4308               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4309                               X0000, BITS4(N,Q,M,1), regM);
4310               break;
4311            case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
4312               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4313                               X0010, BITS4(N,Q,M,1), regM);
4314               break;
4315            case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
4316               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4317                               X0010, BITS4(N,Q,M,1), regM);
4318               break;
4319            case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
4320               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4321                               X0011, BITS4(N,Q,M,0), regM);
4322               break;
4323            case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
4324               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4325                               X0011, BITS4(N,Q,M,0), regM);
4326               break;
4327            case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
4328               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4329                               X0011, BITS4(N,Q,M,1), regM);
4330               break;
4331            case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
4332               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4333                               X0011, BITS4(N,Q,M,1), regM);
4334               break;
4335            case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
4336               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4337                               X1000, BITS4(N,Q,M,1), regM);
4338               break;
4339            case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
4340               if (i->ARMin.NBinary.size >= 16)
4341                  goto bad;
4342               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
4343                               i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
4344                               regM);
4345               break;
4346            case ARMneon_VMUL:
4347               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4348                               X1001, BITS4(N,Q,M,1), regM);
4349               break;
4350            case ARMneon_VMULLU:
4351               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
4352                               X1100, BITS4(N,0,M,0), regM);
4353               break;
4354            case ARMneon_VMULLS:
4355               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4356                               X1100, BITS4(N,0,M,0), regM);
4357               break;
4358            case ARMneon_VMULP:
4359               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4360                               X1001, BITS4(N,Q,M,1), regM);
4361               break;
4362            case ARMneon_VMULFP:
4363               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4364                               X1101, BITS4(N,Q,M,1), regM);
4365               break;
4366            case ARMneon_VMULLP:
4367               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4368                               X1110, BITS4(N,0,M,0), regM);
4369               break;
4370            case ARMneon_VQDMULH:
4371               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4372                               X1011, BITS4(N,Q,M,0), regM);
4373               break;
4374            case ARMneon_VQRDMULH:
4375               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4376                               X1011, BITS4(N,Q,M,0), regM);
4377               break;
4378            case ARMneon_VQDMULL:
4379               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4380                               X1101, BITS4(N,0,M,0), regM);
4381               break;
4382            case ARMneon_VTBL:
4383               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
4384                               X1000, BITS4(N,0,M,0), regM);
4385               break;
4386            case ARMneon_VPADD:
4387               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4388                               X1011, BITS4(N,Q,M,1), regM);
4389               break;
4390            case ARMneon_VPADDFP:
4391               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4392                               X1101, BITS4(N,Q,M,0), regM);
4393               break;
4394            case ARMneon_VPMINU:
4395               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4396                               X1010, BITS4(N,Q,M,1), regM);
4397               break;
4398            case ARMneon_VPMINS:
4399               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4400                               X1010, BITS4(N,Q,M,1), regM);
4401               break;
4402            case ARMneon_VPMAXU:
4403               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4404                               X1010, BITS4(N,Q,M,0), regM);
4405               break;
4406            case ARMneon_VPMAXS:
4407               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4408                               X1010, BITS4(N,Q,M,0), regM);
4409               break;
4410            case ARMneon_VADDFP: /* VADD reg, reg, reg */
4411               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4412                               X1101, BITS4(N,Q,M,0), regM);
4413               break;
4414            case ARMneon_VSUBFP: /* VADD reg, reg, reg */
4415               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4416                               X1101, BITS4(N,Q,M,0), regM);
4417               break;
4418            case ARMneon_VABDFP: /* VABD reg, reg, reg */
4419               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4420                               X1101, BITS4(N,Q,M,0), regM);
4421               break;
4422            case ARMneon_VMINF:
4423               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4424                               X1111, BITS4(N,Q,M,0), regM);
4425               break;
4426            case ARMneon_VMAXF:
4427               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4428                               X1111, BITS4(N,Q,M,0), regM);
4429               break;
4430            case ARMneon_VPMINF:
4431               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4432                               X1111, BITS4(N,Q,M,0), regM);
4433               break;
4434            case ARMneon_VPMAXF:
4435               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4436                               X1111, BITS4(N,Q,M,0), regM);
4437               break;
4438            case ARMneon_VRECPS:
4439               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
4440                               BITS4(N,Q,M,1), regM);
4441               break;
4442            case ARMneon_VCGTF:
4443               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
4444                               BITS4(N,Q,M,0), regM);
4445               break;
4446            case ARMneon_VCGEF:
4447               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
4448                               BITS4(N,Q,M,0), regM);
4449               break;
4450            case ARMneon_VCEQF:
4451               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
4452                               BITS4(N,Q,M,0), regM);
4453               break;
4454            case ARMneon_VRSQRTS:
4455               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
4456                               BITS4(N,Q,M,1), regM);
4457               break;
4458            default:
4459               goto bad;
4460         }
4461         *p++ = insn;
4462         goto done;
4463      }
4464      case ARMin_NShift: {
4465         UInt Q = i->ARMin.NShift.Q ? 1 : 0;
4466         UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
4467                       ? (qregEnc(i->ARMin.NShift.dst) << 1)
4468                       : dregEnc(i->ARMin.NShift.dst);
4469         UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
4470                       ? (qregEnc(i->ARMin.NShift.argL) << 1)
4471                       : dregEnc(i->ARMin.NShift.argL);
4472         UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
4473                       ? (qregEnc(i->ARMin.NShift.argR) << 1)
4474                       : dregEnc(i->ARMin.NShift.argR);
4475         UInt sz1 = i->ARMin.NShift.size >> 1;
4476         UInt sz2 = i->ARMin.NShift.size & 1;
4477         UInt D = regD >> 4;
4478         UInt N = regN >> 4;
4479         UInt M = regM >> 4;
4480         UInt insn;
4481         regD &= 0xF;
4482         regM &= 0xF;
4483         regN &= 0xF;
4484         switch (i->ARMin.NShift.op) {
4485            case ARMneon_VSHL:
4486               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4487                               X0100, BITS4(N,Q,M,0), regM);
4488               break;
4489            case ARMneon_VSAL:
4490               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4491                               X0100, BITS4(N,Q,M,0), regM);
4492               break;
4493            case ARMneon_VQSHL:
4494               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4495                               X0100, BITS4(N,Q,M,1), regM);
4496               break;
4497            case ARMneon_VQSAL:
4498               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4499                               X0100, BITS4(N,Q,M,1), regM);
4500               break;
4501            default:
4502               goto bad;
4503         }
4504         *p++ = insn;
4505         goto done;
4506      }
4507      case ARMin_NShl64: {
4508         HReg regDreg = i->ARMin.NShl64.dst;
4509         HReg regMreg = i->ARMin.NShl64.src;
4510         UInt amt     = i->ARMin.NShl64.amt;
4511         vassert(amt >= 1 && amt <= 63);
4512         vassert(hregClass(regDreg) == HRcFlt64);
4513         vassert(hregClass(regMreg) == HRcFlt64);
4514         UInt regD = dregEnc(regDreg);
4515         UInt regM = dregEnc(regMreg);
4516         UInt D    = (regD >> 4) & 1;
4517         UInt Vd   = regD & 0xF;
4518         UInt L    = 1;
4519         UInt Q    = 0; /* always 64-bit */
4520         UInt M    = (regM >> 4) & 1;
4521         UInt Vm   = regM & 0xF;
4522         UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
4523                              amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
4524         *p++ = insn;
4525         goto done;
4526      }
4527      case ARMin_NeonImm: {
4528         UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
4529         UInt regD = Q ? (qregEnc(i->ARMin.NeonImm.dst) << 1) :
4530                          dregEnc(i->ARMin.NeonImm.dst);
4531         UInt D = regD >> 4;
4532         UInt imm = i->ARMin.NeonImm.imm->imm8;
4533         UInt tp = i->ARMin.NeonImm.imm->type;
4534         UInt j = imm >> 7;
4535         UInt imm3 = (imm >> 4) & 0x7;
4536         UInt imm4 = imm & 0xF;
4537         UInt cmode, op;
4538         UInt insn;
4539         regD &= 0xF;
4540         if (tp == 9)
4541            op = 1;
4542         else
4543            op = 0;
4544         switch (tp) {
4545            case 0:
4546            case 1:
4547            case 2:
4548            case 3:
4549            case 4:
4550            case 5:
4551               cmode = tp << 1;
4552               break;
4553            case 9:
4554            case 6:
4555               cmode = 14;
4556               break;
4557            case 7:
4558               cmode = 12;
4559               break;
4560            case 8:
4561               cmode = 13;
4562               break;
4563            case 10:
4564               cmode = 15;
4565               break;
4566            default:
4567               vpanic("ARMin_NeonImm");
4568
4569         }
4570         insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4571                         cmode, BITS4(0,Q,op,1), imm4);
4572         *p++ = insn;
4573         goto done;
4574      }
4575      case ARMin_NCMovQ: {
4576         UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4577         UInt qM = qregEnc(i->ARMin.NCMovQ.src) << 1;
4578         UInt qD = qregEnc(i->ARMin.NCMovQ.dst) << 1;
4579         UInt vM = qM & 0xF;
4580         UInt vD = qD & 0xF;
4581         UInt M  = (qM >> 4) & 1;
4582         UInt D  = (qD >> 4) & 1;
4583         vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4584         /* b!cc here+8: !cc A00 0000 */
4585         UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4586         *p++ = insn;
4587         /* vmov qD, qM */
4588         insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4589                         vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4590         *p++ = insn;
4591         goto done;
4592      }
4593      case ARMin_Add32: {
4594         UInt regD = iregEnc(i->ARMin.Add32.rD);
4595         UInt regN = iregEnc(i->ARMin.Add32.rN);
4596         UInt imm32 = i->ARMin.Add32.imm32;
4597         vassert(regD != regN);
4598         /* MOV regD, imm32 */
4599         p = imm32_to_ireg((UInt *)p, regD, imm32);
4600         /* ADD regD, regN, regD */
4601         UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4602         *p++ = insn;
4603         goto done;
4604      }
4605
4606      case ARMin_EvCheck: {
4607         /* We generate:
4608               ldr  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
4609               subs r12, r12, #1  (A1)
4610               str  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
4611               bpl  nofail
4612               ldr  r12, [r8 + #0]   0 == offsetof(host_EvC_FAILADDR)
4613               bx   r12
4614              nofail:
4615         */
4616         UInt* p0 = p;
4617         p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4618                                i->ARMin.EvCheck.amCounter);
4619         *p++ = 0xE25CC001; /* subs r12, r12, #1 */
4620         p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
4621                                i->ARMin.EvCheck.amCounter);
4622         *p++ = 0x5A000001; /* bpl nofail */
4623         p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4624                                i->ARMin.EvCheck.amFailAddr);
4625         *p++ = 0xE12FFF1C; /* bx r12 */
4626         /* nofail: */
4627
4628         /* Crosscheck */
4629         vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
4630         goto done;
4631      }
4632
4633      case ARMin_ProfInc: {
4634         /* We generate:
4635              (ctrP is unknown now, so use 0x65556555 in the
4636              expectation that a later call to LibVEX_patchProfCtr
4637              will be used to fill in the immediate fields once the
4638              right value is known.)
4639            movw r12, lo16(0x65556555)
4640            movt r12, lo16(0x65556555)
4641            ldr  r11, [r12]
4642            adds r11, r11, #1
4643            str  r11, [r12]
4644            ldr  r11, [r12+4]
4645            adc  r11, r11, #0
4646            str  r11, [r12+4]
4647         */
4648         p = imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555);
4649         *p++ = 0xE59CB000;
4650         *p++ = 0xE29BB001;
4651         *p++ = 0xE58CB000;
4652         *p++ = 0xE59CB004;
4653         *p++ = 0xE2ABB000;
4654         *p++ = 0xE58CB004;
4655         /* Tell the caller .. */
4656         vassert(!(*is_profInc));
4657         *is_profInc = True;
4658         goto done;
4659      }
4660
4661      /* ... */
4662      default:
4663         goto bad;
4664    }
4665
4666  bad:
4667   ppARMInstr(i);
4668   vpanic("emit_ARMInstr");
4669   /*NOTREACHED*/
4670
4671  done:
4672   vassert(((UChar*)p) - &buf[0] <= 32);
4673   return ((UChar*)p) - &buf[0];
4674}
4675
4676
4677/* How big is an event check?  See case for ARMin_EvCheck in
4678   emit_ARMInstr just above.  That crosschecks what this returns, so
4679   we can tell if we're inconsistent. */
4680Int evCheckSzB_ARM (void)
4681{
4682   return 24;
4683}
4684
4685
4686/* NB: what goes on here has to be very closely coordinated with the
4687   emitInstr case for XDirect, above. */
4688VexInvalRange chainXDirect_ARM ( VexEndness endness_host,
4689                                 void* place_to_chain,
4690                                 const void* disp_cp_chain_me_EXPECTED,
4691                                 const void* place_to_jump_to )
4692{
4693   vassert(endness_host == VexEndnessLE);
4694
4695   /* What we're expecting to see is:
4696        movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
4697        movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
4698        blx  r12
4699      viz
4700        <8 bytes generated by imm32_to_ireg_EXACTLY2>
4701        E1 2F FF 3C
4702   */
4703   UInt* p = (UInt*)place_to_chain;
4704   vassert(0 == (3 & (HWord)p));
4705   vassert(is_imm32_to_ireg_EXACTLY2(
4706              p, /*r*/12, (UInt)(Addr)disp_cp_chain_me_EXPECTED));
4707   vassert(p[2] == 0xE12FFF3C);
4708   /* And what we want to change it to is either:
4709        (general case)
4710          movw r12, lo16(place_to_jump_to)
4711          movt r12, hi16(place_to_jump_to)
4712          bx   r12
4713        viz
4714          <8 bytes generated by imm32_to_ireg_EXACTLY2>
4715          E1 2F FF 1C
4716      ---OR---
4717        in the case where the displacement falls within 26 bits
4718          b disp24; undef; undef
4719        viz
4720          EA <3 bytes == disp24>
4721          FF 00 00 00
4722          FF 00 00 00
4723
4724      In both cases the replacement has the same length as the original.
4725      To remain sane & verifiable,
4726      (1) limit the displacement for the short form to
4727          (say) +/- 30 million, so as to avoid wraparound
4728          off-by-ones
4729      (2) even if the short form is applicable, once every (say)
4730          1024 times use the long form anyway, so as to maintain
4731          verifiability
4732   */
4733
4734   /* This is the delta we need to put into a B insn.  It's relative
4735      to the start of the next-but-one insn, hence the -8.  */
4736   Long delta   = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 8;
4737   Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
4738   vassert(0 == (delta & (Long)3));
4739
4740   static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4741   if (shortOK) {
4742      shortCTR++; // thread safety bleh
4743      if (0 == (shortCTR & 0x3FF)) {
4744         shortOK = False;
4745         if (0)
4746            vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
4747                       "using long form\n", shortCTR);
4748      }
4749   }
4750
4751   /* And make the modifications. */
4752   if (shortOK) {
4753      Int simm24 = (Int)(delta >> 2);
4754      vassert(simm24 == ((simm24 << 8) >> 8));
4755      p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
4756      p[1] = 0xFF000000;
4757      p[2] = 0xFF000000;
4758   } else {
4759      (void)imm32_to_ireg_EXACTLY2(
4760               p, /*r*/12, (UInt)(Addr)place_to_jump_to);
4761      p[2] = 0xE12FFF1C;
4762   }
4763
4764   VexInvalRange vir = {(HWord)p, 12};
4765   return vir;
4766}
4767
4768
4769/* NB: what goes on here has to be very closely coordinated with the
4770   emitInstr case for XDirect, above. */
4771VexInvalRange unchainXDirect_ARM ( VexEndness endness_host,
4772                                   void* place_to_unchain,
4773                                   const void* place_to_jump_to_EXPECTED,
4774                                   const void* disp_cp_chain_me )
4775{
4776   vassert(endness_host == VexEndnessLE);
4777
4778   /* What we're expecting to see is:
4779        (general case)
4780          movw r12, lo16(place_to_jump_to_EXPECTED)
4781          movt r12, lo16(place_to_jump_to_EXPECTED)
4782          bx   r12
4783        viz
4784          <8 bytes generated by imm32_to_ireg_EXACTLY2>
4785          E1 2F FF 1C
4786      ---OR---
4787        in the case where the displacement falls within 26 bits
4788          b disp24; undef; undef
4789        viz
4790          EA <3 bytes == disp24>
4791          FF 00 00 00
4792          FF 00 00 00
4793   */
4794   UInt* p = (UInt*)place_to_unchain;
4795   vassert(0 == (3 & (HWord)p));
4796
4797   Bool valid = False;
4798   if (is_imm32_to_ireg_EXACTLY2(
4799          p, /*r*/12, (UInt)(Addr)place_to_jump_to_EXPECTED)
4800       && p[2] == 0xE12FFF1C) {
4801      valid = True; /* it's the long form */
4802      if (0)
4803         vex_printf("QQQ unchainXDirect_ARM: found long form\n");
4804   } else
4805   if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
4806      /* It's the short form.  Check the displacement is right. */
4807      Int simm24 = p[0] & 0x00FFFFFF;
4808      simm24 <<= 8; simm24 >>= 8;
4809      if ((UChar*)p + (simm24 << 2) + 8 == place_to_jump_to_EXPECTED) {
4810         valid = True;
4811         if (0)
4812            vex_printf("QQQ unchainXDirect_ARM: found short form\n");
4813      }
4814   }
4815   vassert(valid);
4816
4817   /* And what we want to change it to is:
4818        movw r12, lo16(disp_cp_chain_me)
4819        movt r12, hi16(disp_cp_chain_me)
4820        blx  r12
4821      viz
4822        <8 bytes generated by imm32_to_ireg_EXACTLY2>
4823        E1 2F FF 3C
4824   */
4825   (void)imm32_to_ireg_EXACTLY2(
4826            p, /*r*/12, (UInt)(Addr)disp_cp_chain_me);
4827   p[2] = 0xE12FFF3C;
4828   VexInvalRange vir = {(HWord)p, 12};
4829   return vir;
4830}
4831
4832
4833/* Patch the counter address into a profile inc point, as previously
4834   created by the ARMin_ProfInc case for emit_ARMInstr. */
4835VexInvalRange patchProfInc_ARM ( VexEndness endness_host,
4836                                 void*  place_to_patch,
4837                                 const ULong* location_of_counter )
4838{
4839   vassert(endness_host == VexEndnessLE);
4840   vassert(sizeof(ULong*) == 4);
4841   UInt* p = (UInt*)place_to_patch;
4842   vassert(0 == (3 & (HWord)p));
4843   vassert(is_imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555));
4844   vassert(p[2] == 0xE59CB000);
4845   vassert(p[3] == 0xE29BB001);
4846   vassert(p[4] == 0xE58CB000);
4847   vassert(p[5] == 0xE59CB004);
4848   vassert(p[6] == 0xE2ABB000);
4849   vassert(p[7] == 0xE58CB004);
4850   imm32_to_ireg_EXACTLY2(p, /*r*/12, (UInt)(Addr)location_of_counter);
4851   VexInvalRange vir = {(HWord)p, 8};
4852   return vir;
4853}
4854
4855
4856#undef BITS4
4857#undef X0000
4858#undef X0001
4859#undef X0010
4860#undef X0011
4861#undef X0100
4862#undef X0101
4863#undef X0110
4864#undef X0111
4865#undef X1000
4866#undef X1001
4867#undef X1010
4868#undef X1011
4869#undef X1100
4870#undef X1101
4871#undef X1110
4872#undef X1111
4873#undef XXXXX___
4874#undef XXXXXX__
4875#undef XXX___XX
4876#undef XXXXX__X
4877#undef XXXXXXXX
4878#undef XX______
4879
4880/*---------------------------------------------------------------*/
4881/*--- end                                     host_arm_defs.c ---*/
4882/*---------------------------------------------------------------*/
4883