1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_arm_defs.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2010 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2010 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex.h"
38#include "libvex_trc_values.h"
39
40#include "main_util.h"
41#include "host_generic_regs.h"
42#include "host_arm_defs.h"
43
44UInt arm_hwcaps = 0;
45
46
47/* --------- Registers. --------- */
48
49/* The usual HReg abstraction.
50   There are 16 general purpose regs.
51*/
52
53void ppHRegARM ( HReg reg )  {
54   Int r;
55   /* Be generic for all virtual regs. */
56   if (hregIsVirtual(reg)) {
57      ppHReg(reg);
58      return;
59   }
60   /* But specific for real regs. */
61   switch (hregClass(reg)) {
62      case HRcInt32:
63         r = hregNumber(reg);
64         vassert(r >= 0 && r < 16);
65         vex_printf("r%d", r);
66         return;
67      case HRcFlt64:
68         r = hregNumber(reg);
69         vassert(r >= 0 && r < 32);
70         vex_printf("d%d", r);
71         return;
72      case HRcFlt32:
73         r = hregNumber(reg);
74         vassert(r >= 0 && r < 32);
75         vex_printf("s%d", r);
76         return;
77      case HRcVec128:
78         r = hregNumber(reg);
79         vassert(r >= 0 && r < 16);
80         vex_printf("q%d", r);
81         return;
82      default:
83         vpanic("ppHRegARM");
84   }
85}
86
87HReg hregARM_R0  ( void ) { return mkHReg(0,  HRcInt32, False); }
88HReg hregARM_R1  ( void ) { return mkHReg(1,  HRcInt32, False); }
89HReg hregARM_R2  ( void ) { return mkHReg(2,  HRcInt32, False); }
90HReg hregARM_R3  ( void ) { return mkHReg(3,  HRcInt32, False); }
91HReg hregARM_R4  ( void ) { return mkHReg(4,  HRcInt32, False); }
92HReg hregARM_R5  ( void ) { return mkHReg(5,  HRcInt32, False); }
93HReg hregARM_R6  ( void ) { return mkHReg(6,  HRcInt32, False); }
94HReg hregARM_R7  ( void ) { return mkHReg(7,  HRcInt32, False); }
95HReg hregARM_R8  ( void ) { return mkHReg(8,  HRcInt32, False); }
96HReg hregARM_R9  ( void ) { return mkHReg(9,  HRcInt32, False); }
97HReg hregARM_R10 ( void ) { return mkHReg(10, HRcInt32, False); }
98HReg hregARM_R11 ( void ) { return mkHReg(11, HRcInt32, False); }
99HReg hregARM_R12 ( void ) { return mkHReg(12, HRcInt32, False); }
100HReg hregARM_R13 ( void ) { return mkHReg(13, HRcInt32, False); }
101HReg hregARM_R14 ( void ) { return mkHReg(14, HRcInt32, False); }
102HReg hregARM_R15 ( void ) { return mkHReg(15, HRcInt32, False); }
103HReg hregARM_D8  ( void ) { return mkHReg(8,  HRcFlt64, False); }
104HReg hregARM_D9  ( void ) { return mkHReg(9,  HRcFlt64, False); }
105HReg hregARM_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
106HReg hregARM_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
107HReg hregARM_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
108HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
109HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
110HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
111HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
112HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
113HReg hregARM_Q8  ( void ) { return mkHReg(8,  HRcVec128, False); }
114HReg hregARM_Q9  ( void ) { return mkHReg(9,  HRcVec128, False); }
115HReg hregARM_Q10 ( void ) { return mkHReg(10, HRcVec128, False); }
116HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
117HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
118HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
119HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
120HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
121
122void getAllocableRegs_ARM ( Int* nregs, HReg** arr )
123{
124   Int i = 0;
125   *nregs = 26;
126   *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
127   // callee saves ones are listed first, since we prefer them
128   // if they're available
129   (*arr)[i++] = hregARM_R4();
130   (*arr)[i++] = hregARM_R5();
131   (*arr)[i++] = hregARM_R6();
132   (*arr)[i++] = hregARM_R7();
133   (*arr)[i++] = hregARM_R10();
134   (*arr)[i++] = hregARM_R11();
135   // otherwise we'll have to slum it out with caller-saves ones
136   (*arr)[i++] = hregARM_R0();
137   (*arr)[i++] = hregARM_R1();
138   (*arr)[i++] = hregARM_R2();
139   (*arr)[i++] = hregARM_R3();
140   (*arr)[i++] = hregARM_R9();
141   // FP hreegisters.  Note: these are all callee-save.  Yay!
142   // Hence we don't need to mention them as trashed in
143   // getHRegUsage for ARMInstr_Call.
144   (*arr)[i++] = hregARM_D8();
145   (*arr)[i++] = hregARM_D9();
146   (*arr)[i++] = hregARM_D10();
147   (*arr)[i++] = hregARM_D11();
148   (*arr)[i++] = hregARM_D12();
149   (*arr)[i++] = hregARM_S26();
150   (*arr)[i++] = hregARM_S27();
151   (*arr)[i++] = hregARM_S28();
152   (*arr)[i++] = hregARM_S29();
153   (*arr)[i++] = hregARM_S30();
154
155   (*arr)[i++] = hregARM_Q8();
156   (*arr)[i++] = hregARM_Q9();
157   (*arr)[i++] = hregARM_Q10();
158   (*arr)[i++] = hregARM_Q11();
159   (*arr)[i++] = hregARM_Q12();
160
161   //(*arr)[i++] = hregARM_Q13();
162   //(*arr)[i++] = hregARM_Q14();
163   //(*arr)[i++] = hregARM_Q15();
164
165   // unavail: r8 as GSP
166   // r12 is used as a spill/reload temporary
167   // r13 as SP
168   // r14 as LR
169   // r15 as PC
170   //
171   // All in all, we have 11 allocatable integer registers:
172   // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
173   // and r12 dedicated as a spill temporary.
174   // 13 14 and 15 are not under the allocator's control.
175   //
176   // Hence for the allocatable registers we have:
177   //
178   // callee-saved: 4 5 6 7 (8) 9 10 11
179   // caller-saved: 0 1 2 3
180   // Note 9 is ambiguous: the base EABI does not give an e/r-saved
181   // designation for it, but the Linux instantiation of the ABI
182   // specifies it as callee-saved.
183   //
184   // If the set of available registers changes or if the e/r status
185   // changes, be sure to re-check/sync the definition of
186   // getHRegUsage for ARMInstr_Call too.
187   vassert(i == *nregs);
188}
189
190
191
192/* --------- Condition codes, ARM encoding. --------- */
193
194HChar* showARMCondCode ( ARMCondCode cond ) {
195   switch (cond) {
196       case ARMcc_EQ:  return "eq";
197       case ARMcc_NE:  return "ne";
198       case ARMcc_HS:  return "hs";
199       case ARMcc_LO:  return "lo";
200       case ARMcc_MI:  return "mi";
201       case ARMcc_PL:  return "pl";
202       case ARMcc_VS:  return "vs";
203       case ARMcc_VC:  return "vc";
204       case ARMcc_HI:  return "hi";
205       case ARMcc_LS:  return "ls";
206       case ARMcc_GE:  return "ge";
207       case ARMcc_LT:  return "lt";
208       case ARMcc_GT:  return "gt";
209       case ARMcc_LE:  return "le";
210       case ARMcc_AL:  return "al"; // default
211       case ARMcc_NV:  return "nv";
212       default: vpanic("showARMCondCode");
213   }
214}
215
216
217/* --------- Mem AModes: Addressing Mode 1 --------- */
218
219ARMAMode1* ARMAMode1_RI  ( HReg reg, Int simm13 ) {
220   ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
221   am->tag              = ARMam1_RI;
222   am->ARMam1.RI.reg    = reg;
223   am->ARMam1.RI.simm13 = simm13;
224   vassert(-4095 <= simm13 && simm13 <= 4095);
225   return am;
226}
227ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
228   ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
229   am->tag              = ARMam1_RRS;
230   am->ARMam1.RRS.base  = base;
231   am->ARMam1.RRS.index = index;
232   am->ARMam1.RRS.shift = shift;
233   vassert(0 <= shift && shift <= 3);
234   return am;
235}
236
237void ppARMAMode1 ( ARMAMode1* am ) {
238   switch (am->tag) {
239      case ARMam1_RI:
240         vex_printf("%d(", am->ARMam1.RI.simm13);
241         ppHRegARM(am->ARMam1.RI.reg);
242         vex_printf(")");
243         break;
244      case ARMam1_RRS:
245         vex_printf("(");
246         ppHRegARM(am->ARMam1.RRS.base);
247         vex_printf(",");
248         ppHRegARM(am->ARMam1.RRS.index);
249         vex_printf(",%u)", am->ARMam1.RRS.shift);
250         break;
251      default:
252         vassert(0);
253   }
254}
255
256static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
257   switch (am->tag) {
258      case ARMam1_RI:
259         addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
260         return;
261      case ARMam1_RRS:
262         //    addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
263         //    addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
264         //   return;
265      default:
266         vpanic("addRegUsage_ARMAmode1");
267   }
268}
269
270static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
271   switch (am->tag) {
272      case ARMam1_RI:
273         am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
274         return;
275      case ARMam1_RRS:
276         //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
277         //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
278         //return;
279      default:
280         vpanic("mapRegs_ARMAmode1");
281   }
282}
283
284
285/* --------- Mem AModes: Addressing Mode 2 --------- */
286
287ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
288   ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
289   am->tag             = ARMam2_RI;
290   am->ARMam2.RI.reg   = reg;
291   am->ARMam2.RI.simm9 = simm9;
292   vassert(-255 <= simm9 && simm9 <= 255);
293   return am;
294}
295ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
296   ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
297   am->tag             = ARMam2_RR;
298   am->ARMam2.RR.base  = base;
299   am->ARMam2.RR.index = index;
300   return am;
301}
302
303void ppARMAMode2 ( ARMAMode2* am ) {
304   switch (am->tag) {
305      case ARMam2_RI:
306         vex_printf("%d(", am->ARMam2.RI.simm9);
307         ppHRegARM(am->ARMam2.RI.reg);
308         vex_printf(")");
309         break;
310      case ARMam2_RR:
311         vex_printf("(");
312         ppHRegARM(am->ARMam2.RR.base);
313         vex_printf(",");
314         ppHRegARM(am->ARMam2.RR.index);
315         vex_printf(")");
316         break;
317      default:
318         vassert(0);
319   }
320}
321
322static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
323   switch (am->tag) {
324      case ARMam2_RI:
325         addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
326         return;
327      case ARMam2_RR:
328         //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
329         //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
330         //   return;
331      default:
332         vpanic("addRegUsage_ARMAmode2");
333   }
334}
335
336static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
337   switch (am->tag) {
338      case ARMam2_RI:
339         am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
340         return;
341      case ARMam2_RR:
342         //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
343         //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
344         //return;
345      default:
346         vpanic("mapRegs_ARMAmode2");
347   }
348}
349
350
351/* --------- Mem AModes: Addressing Mode VFP --------- */
352
353ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
354   ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
355   vassert(simm11 >= -1020 && simm11 <= 1020);
356   vassert(0 == (simm11 & 3));
357   am->reg    = reg;
358   am->simm11 = simm11;
359   return am;
360}
361
362void ppARMAModeV ( ARMAModeV* am ) {
363   vex_printf("%d(", am->simm11);
364   ppHRegARM(am->reg);
365   vex_printf(")");
366}
367
368static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
369   addHRegUse(u, HRmRead, am->reg);
370}
371
372static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
373   am->reg = lookupHRegRemap(m, am->reg);
374}
375
376
377/* --------- Mem AModes: Addressing Mode Neon ------- */
378
379ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
380   ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
381   am->tag = ARMamN_RR;
382   am->ARMamN.RR.rN = rN;
383   am->ARMamN.RR.rM = rM;
384   return am;
385}
386
387ARMAModeN *mkARMAModeN_R ( HReg rN ) {
388   ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
389   am->tag = ARMamN_R;
390   am->ARMamN.R.rN = rN;
391   return am;
392}
393
394static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
395   if (am->tag == ARMamN_R) {
396      addHRegUse(u, HRmRead, am->ARMamN.R.rN);
397   } else {
398      addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
399      addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
400   }
401}
402
403static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
404   if (am->tag == ARMamN_R) {
405      am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
406   } else {
407      am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
408      am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
409   }
410}
411
412void ppARMAModeN ( ARMAModeN* am ) {
413   vex_printf("[");
414   if (am->tag == ARMamN_R) {
415      ppHRegARM(am->ARMamN.R.rN);
416   } else {
417      ppHRegARM(am->ARMamN.RR.rN);
418   }
419   vex_printf("]");
420   if (am->tag == ARMamN_RR) {
421      vex_printf(", ");
422      ppHRegARM(am->ARMamN.RR.rM);
423   }
424}
425
426
427/* --------- Reg or imm-8x4 operands --------- */
428
429static UInt ROR32 ( UInt x, UInt sh ) {
430   vassert(sh >= 0 && sh < 32);
431   if (sh == 0)
432      return x;
433   else
434      return (x << (32-sh)) | (x >> sh);
435}
436
437ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
438   ARMRI84* ri84          = LibVEX_Alloc(sizeof(ARMRI84));
439   ri84->tag              = ARMri84_I84;
440   ri84->ARMri84.I84.imm8 = imm8;
441   ri84->ARMri84.I84.imm4 = imm4;
442   vassert(imm8 >= 0 && imm8 <= 255);
443   vassert(imm4 >= 0 && imm4 <= 15);
444   return ri84;
445}
446ARMRI84* ARMRI84_R ( HReg reg ) {
447   ARMRI84* ri84       = LibVEX_Alloc(sizeof(ARMRI84));
448   ri84->tag           = ARMri84_R;
449   ri84->ARMri84.R.reg = reg;
450   return ri84;
451}
452
453void ppARMRI84 ( ARMRI84* ri84 ) {
454   switch (ri84->tag) {
455      case ARMri84_I84:
456         vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
457                                  2 * ri84->ARMri84.I84.imm4));
458         break;
459      case ARMri84_R:
460         ppHRegARM(ri84->ARMri84.R.reg);
461         break;
462      default:
463         vassert(0);
464   }
465}
466
467static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
468   switch (ri84->tag) {
469      case ARMri84_I84:
470         return;
471      case ARMri84_R:
472         addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
473         return;
474      default:
475         vpanic("addRegUsage_ARMRI84");
476   }
477}
478
479static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
480   switch (ri84->tag) {
481      case ARMri84_I84:
482         return;
483      case ARMri84_R:
484         ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
485         return;
486      default:
487         vpanic("mapRegs_ARMRI84");
488   }
489}
490
491
492/* --------- Reg or imm5 operands --------- */
493
494ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
495   ARMRI5* ri5         = LibVEX_Alloc(sizeof(ARMRI5));
496   ri5->tag            = ARMri5_I5;
497   ri5->ARMri5.I5.imm5 = imm5;
498   vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
499   return ri5;
500}
501ARMRI5* ARMRI5_R ( HReg reg ) {
502   ARMRI5* ri5       = LibVEX_Alloc(sizeof(ARMRI5));
503   ri5->tag          = ARMri5_R;
504   ri5->ARMri5.R.reg = reg;
505   return ri5;
506}
507
508void ppARMRI5 ( ARMRI5* ri5 ) {
509   switch (ri5->tag) {
510      case ARMri5_I5:
511         vex_printf("%u", ri5->ARMri5.I5.imm5);
512         break;
513      case ARMri5_R:
514         ppHRegARM(ri5->ARMri5.R.reg);
515         break;
516      default:
517         vassert(0);
518   }
519}
520
521static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
522   switch (ri5->tag) {
523      case ARMri5_I5:
524         return;
525      case ARMri5_R:
526         addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
527         return;
528      default:
529         vpanic("addRegUsage_ARMRI5");
530   }
531}
532
533static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
534   switch (ri5->tag) {
535      case ARMri5_I5:
536         return;
537      case ARMri5_R:
538         ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
539         return;
540      default:
541         vpanic("mapRegs_ARMRI5");
542   }
543}
544
545/* -------- Neon Immediate operatnd --------- */
546
547ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
548   ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
549   i->type = type;
550   i->imm8 = imm8;
551   return i;
552}
553
554ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
555   int i, j;
556   ULong y, x = imm->imm8;
557   switch (imm->type) {
558      case 3:
559         x = x << 8;
560      case 2:
561         x = x << 8;
562      case 1:
563         x = x << 8;
564      case 0:
565         return (x << 32) | x;
566      case 5:
567      case 6:
568         if (imm->type == 5)
569            x = x << 8;
570         else
571            x = (x << 8) | x;
572      case 4:
573         x = (x << 16) | x;
574         return (x << 32) | x;
575      case 8:
576         x = (x << 8) | 0xFF;
577      case 7:
578         x = (x << 8) | 0xFF;
579         return (x << 32) | x;
580      case 9:
581         x = 0;
582         for (i = 7; i >= 0; i--) {
583            y = ((ULong)imm->imm8 >> i) & 1;
584            for (j = 0; j < 8; j++) {
585               x = (x << 1) | y;
586            }
587         }
588         return x;
589      case 10:
590         x |= (x & 0x80) << 5;
591         x |= ~(x & 0x40) << 5;
592         x &= 0x187F; /* 0001 1000 0111 1111 */
593         x |= (x & 0x40) << 4;
594         x |= (x & 0x40) << 3;
595         x |= (x & 0x40) << 2;
596         x |= (x & 0x40) << 1;
597         x = x << 19;
598         x = (x << 32) | x;
599         return x;
600      default:
601         vpanic("ARMNImm_to_Imm64");
602   }
603}
604
605ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
606   ARMNImm tmp;
607   if ((x & 0xFFFFFFFF) == (x >> 32)) {
608      if ((x & 0xFFFFFF00) == 0)
609         return ARMNImm_TI(0, x & 0xFF);
610      if ((x & 0xFFFF00FF) == 0)
611         return ARMNImm_TI(1, (x >> 8) & 0xFF);
612      if ((x & 0xFF00FFFF) == 0)
613         return ARMNImm_TI(2, (x >> 16) & 0xFF);
614      if ((x & 0x00FFFFFF) == 0)
615         return ARMNImm_TI(3, (x >> 24) & 0xFF);
616      if ((x & 0xFFFF00FF) == 0xFF)
617         return ARMNImm_TI(7, (x >> 8) & 0xFF);
618      if ((x & 0xFF00FFFF) == 0xFFFF)
619         return ARMNImm_TI(8, (x >> 16) & 0xFF);
620      if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
621         if ((x & 0xFF00) == 0)
622            return ARMNImm_TI(4, x & 0xFF);
623         if ((x & 0x00FF) == 0)
624            return ARMNImm_TI(5, (x >> 8) & 0xFF);
625         if ((x & 0xFF) == ((x >> 8) & 0xFF))
626            return ARMNImm_TI(6, x & 0xFF);
627      }
628      if ((x & 0x7FFFF) == 0) {
629         tmp.type = 10;
630         tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
631         if (ARMNImm_to_Imm64(&tmp) == x)
632            return ARMNImm_TI(tmp.type, tmp.imm8);
633      }
634   } else {
635      /* This can only be type 9. */
636      tmp.imm8 = (((x >> 56) & 1) << 7)
637               | (((x >> 48) & 1) << 6)
638               | (((x >> 40) & 1) << 5)
639               | (((x >> 32) & 1) << 4)
640               | (((x >> 24) & 1) << 3)
641               | (((x >> 16) & 1) << 2)
642               | (((x >>  8) & 1) << 1)
643               | (((x >>  0) & 1) << 0);
644      tmp.type = 9;
645      if (ARMNImm_to_Imm64 (&tmp) == x)
646         return ARMNImm_TI(tmp.type, tmp.imm8);
647   }
648   return NULL;
649}
650
651void ppARMNImm (ARMNImm* i) {
652   ULong x = ARMNImm_to_Imm64(i);
653   vex_printf("0x%llX%llX", x, x);
654}
655
656/* -- Register or scalar operand --- */
657
658ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
659{
660   ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
661   p->tag = tag;
662   p->reg = reg;
663   p->index = index;
664   return p;
665}
666
667void ppARMNRS(ARMNRS *p)
668{
669   ppHRegARM(p->reg);
670   if (p->tag == ARMNRS_Scalar) {
671      vex_printf("[%d]", p->index);
672   }
673}
674
675/* --------- Instructions. --------- */
676
677HChar* showARMAluOp ( ARMAluOp op ) {
678   switch (op) {
679      case ARMalu_ADD:  return "add";
680      case ARMalu_ADDS: return "adds";
681      case ARMalu_ADC:  return "adc";
682      case ARMalu_SUB:  return "sub";
683      case ARMalu_SUBS: return "subs";
684      case ARMalu_SBC:  return "sbc";
685      case ARMalu_AND:  return "and";
686      case ARMalu_BIC:  return "bic";
687      case ARMalu_OR:   return "orr";
688      case ARMalu_XOR:  return "xor";
689      default: vpanic("showARMAluOp");
690   }
691}
692
693HChar* showARMShiftOp ( ARMShiftOp op ) {
694   switch (op) {
695      case ARMsh_SHL: return "shl";
696      case ARMsh_SHR: return "shr";
697      case ARMsh_SAR: return "sar";
698      default: vpanic("showARMShiftOp");
699   }
700}
701
702HChar* showARMUnaryOp ( ARMUnaryOp op ) {
703   switch (op) {
704      case ARMun_NEG: return "neg";
705      case ARMun_NOT: return "not";
706      case ARMun_CLZ: return "clz";
707      default: vpanic("showARMUnaryOp");
708   }
709}
710
711HChar* showARMMulOp ( ARMMulOp op ) {
712   switch (op) {
713      case ARMmul_PLAIN: return "mul";
714      case ARMmul_ZX:    return "umull";
715      case ARMmul_SX:    return "smull";
716      default: vpanic("showARMMulOp");
717   }
718}
719
720HChar* showARMVfpOp ( ARMVfpOp op ) {
721   switch (op) {
722      case ARMvfp_ADD: return "add";
723      case ARMvfp_SUB: return "sub";
724      case ARMvfp_MUL: return "mul";
725      case ARMvfp_DIV: return "div";
726      default: vpanic("showARMVfpOp");
727   }
728}
729
730HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
731   switch (op) {
732      case ARMvfpu_COPY: return "cpy";
733      case ARMvfpu_NEG:  return "neg";
734      case ARMvfpu_ABS:  return "abs";
735      case ARMvfpu_SQRT: return "sqrt";
736      default: vpanic("showARMVfpUnaryOp");
737   }
738}
739
740HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
741   switch (op) {
742      case ARMneon_VAND: return "vand";
743      case ARMneon_VORR: return "vorr";
744      case ARMneon_VXOR: return "veor";
745      case ARMneon_VADD: return "vadd";
746      case ARMneon_VRHADDS: return "vrhadd";
747      case ARMneon_VRHADDU: return "vrhadd";
748      case ARMneon_VADDFP: return "vadd";
749      case ARMneon_VPADDFP: return "vpadd";
750      case ARMneon_VABDFP: return "vabd";
751      case ARMneon_VSUB: return "vsub";
752      case ARMneon_VSUBFP: return "vsub";
753      case ARMneon_VMINU: return "vmin";
754      case ARMneon_VMINS: return "vmin";
755      case ARMneon_VMINF: return "vmin";
756      case ARMneon_VMAXU: return "vmax";
757      case ARMneon_VMAXS: return "vmax";
758      case ARMneon_VMAXF: return "vmax";
759      case ARMneon_VQADDU: return "vqadd";
760      case ARMneon_VQADDS: return "vqadd";
761      case ARMneon_VQSUBU: return "vqsub";
762      case ARMneon_VQSUBS: return "vqsub";
763      case ARMneon_VCGTU:  return "vcgt";
764      case ARMneon_VCGTS:  return "vcgt";
765      case ARMneon_VCGTF:  return "vcgt";
766      case ARMneon_VCGEF:  return "vcgt";
767      case ARMneon_VCGEU:  return "vcge";
768      case ARMneon_VCGES:  return "vcge";
769      case ARMneon_VCEQ:  return "vceq";
770      case ARMneon_VCEQF:  return "vceq";
771      case ARMneon_VPADD:   return "vpadd";
772      case ARMneon_VPMINU:   return "vpmin";
773      case ARMneon_VPMINS:   return "vpmin";
774      case ARMneon_VPMINF:   return "vpmin";
775      case ARMneon_VPMAXU:   return "vpmax";
776      case ARMneon_VPMAXS:   return "vpmax";
777      case ARMneon_VPMAXF:   return "vpmax";
778      case ARMneon_VEXT:   return "vext";
779      case ARMneon_VMUL:   return "vmuli";
780      case ARMneon_VMULLU:   return "vmull";
781      case ARMneon_VMULLS:   return "vmull";
782      case ARMneon_VMULP:  return "vmul";
783      case ARMneon_VMULFP:  return "vmul";
784      case ARMneon_VMULLP:  return "vmul";
785      case ARMneon_VQDMULH: return "vqdmulh";
786      case ARMneon_VQRDMULH: return "vqrdmulh";
787      case ARMneon_VQDMULL: return "vqdmull";
788      case ARMneon_VTBL: return "vtbl";
789      case ARMneon_VRECPS: return "vrecps";
790      case ARMneon_VRSQRTS: return "vrecps";
791      /* ... */
792      default: vpanic("showARMNeonBinOp");
793   }
794}
795
796HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
797   switch (op) {
798      case ARMneon_VAND:
799      case ARMneon_VORR:
800      case ARMneon_VXOR:
801         return "";
802      case ARMneon_VADD:
803      case ARMneon_VSUB:
804      case ARMneon_VEXT:
805      case ARMneon_VMUL:
806      case ARMneon_VPADD:
807      case ARMneon_VTBL:
808      case ARMneon_VCEQ:
809         return ".i";
810      case ARMneon_VRHADDU:
811      case ARMneon_VMINU:
812      case ARMneon_VMAXU:
813      case ARMneon_VQADDU:
814      case ARMneon_VQSUBU:
815      case ARMneon_VCGTU:
816      case ARMneon_VCGEU:
817      case ARMneon_VMULLU:
818      case ARMneon_VPMINU:
819      case ARMneon_VPMAXU:
820         return ".u";
821      case ARMneon_VRHADDS:
822      case ARMneon_VMINS:
823      case ARMneon_VMAXS:
824      case ARMneon_VQADDS:
825      case ARMneon_VQSUBS:
826      case ARMneon_VCGTS:
827      case ARMneon_VCGES:
828      case ARMneon_VQDMULL:
829      case ARMneon_VMULLS:
830      case ARMneon_VPMINS:
831      case ARMneon_VPMAXS:
832      case ARMneon_VQDMULH:
833      case ARMneon_VQRDMULH:
834         return ".s";
835      case ARMneon_VMULP:
836      case ARMneon_VMULLP:
837         return ".p";
838      case ARMneon_VADDFP:
839      case ARMneon_VABDFP:
840      case ARMneon_VPADDFP:
841      case ARMneon_VSUBFP:
842      case ARMneon_VMULFP:
843      case ARMneon_VMINF:
844      case ARMneon_VMAXF:
845      case ARMneon_VPMINF:
846      case ARMneon_VPMAXF:
847      case ARMneon_VCGTF:
848      case ARMneon_VCGEF:
849      case ARMneon_VCEQF:
850      case ARMneon_VRECPS:
851      case ARMneon_VRSQRTS:
852         return ".f";
853      /* ... */
854      default: vpanic("showARMNeonBinOpDataType");
855   }
856}
857
858HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
859   switch (op) {
860      case ARMneon_COPY: return "vmov";
861      case ARMneon_COPYLS: return "vmov";
862      case ARMneon_COPYLU: return "vmov";
863      case ARMneon_COPYN: return "vmov";
864      case ARMneon_COPYQNSS: return "vqmovn";
865      case ARMneon_COPYQNUS: return "vqmovun";
866      case ARMneon_COPYQNUU: return "vqmovn";
867      case ARMneon_NOT: return "vmvn";
868      case ARMneon_EQZ: return "vceq";
869      case ARMneon_CNT: return "vcnt";
870      case ARMneon_CLS: return "vcls";
871      case ARMneon_CLZ: return "vclz";
872      case ARMneon_DUP: return "vdup";
873      case ARMneon_PADDLS: return "vpaddl";
874      case ARMneon_PADDLU: return "vpaddl";
875      case ARMneon_VQSHLNSS: return "vqshl";
876      case ARMneon_VQSHLNUU: return "vqshl";
877      case ARMneon_VQSHLNUS: return "vqshlu";
878      case ARMneon_REV16: return "vrev16";
879      case ARMneon_REV32: return "vrev32";
880      case ARMneon_REV64: return "vrev64";
881      case ARMneon_VCVTFtoU: return "vcvt";
882      case ARMneon_VCVTFtoS: return "vcvt";
883      case ARMneon_VCVTUtoF: return "vcvt";
884      case ARMneon_VCVTStoF: return "vcvt";
885      case ARMneon_VCVTFtoFixedU: return "vcvt";
886      case ARMneon_VCVTFtoFixedS: return "vcvt";
887      case ARMneon_VCVTFixedUtoF: return "vcvt";
888      case ARMneon_VCVTFixedStoF: return "vcvt";
889      case ARMneon_VCVTF32toF16: return "vcvt";
890      case ARMneon_VCVTF16toF32: return "vcvt";
891      case ARMneon_VRECIP: return "vrecip";
892      case ARMneon_VRECIPF: return "vrecipf";
893      case ARMneon_VNEGF: return "vneg";
894      case ARMneon_ABS: return "vabs";
895      case ARMneon_VABSFP: return "vabsfp";
896      case ARMneon_VRSQRTEFP: return "vrsqrtefp";
897      case ARMneon_VRSQRTE: return "vrsqrte";
898      /* ... */
899      default: vpanic("showARMNeonUnOp");
900   }
901}
902
903HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
904   switch (op) {
905      case ARMneon_COPY:
906      case ARMneon_NOT:
907         return "";
908      case ARMneon_COPYN:
909      case ARMneon_EQZ:
910      case ARMneon_CNT:
911      case ARMneon_DUP:
912      case ARMneon_REV16:
913      case ARMneon_REV32:
914      case ARMneon_REV64:
915         return ".i";
916      case ARMneon_COPYLU:
917      case ARMneon_PADDLU:
918      case ARMneon_COPYQNUU:
919      case ARMneon_VQSHLNUU:
920      case ARMneon_VRECIP:
921      case ARMneon_VRSQRTE:
922         return ".u";
923      case ARMneon_CLS:
924      case ARMneon_CLZ:
925      case ARMneon_COPYLS:
926      case ARMneon_PADDLS:
927      case ARMneon_COPYQNSS:
928      case ARMneon_COPYQNUS:
929      case ARMneon_VQSHLNSS:
930      case ARMneon_VQSHLNUS:
931      case ARMneon_ABS:
932         return ".s";
933      case ARMneon_VRECIPF:
934      case ARMneon_VNEGF:
935      case ARMneon_VABSFP:
936      case ARMneon_VRSQRTEFP:
937         return ".f";
938      case ARMneon_VCVTFtoU: return ".u32.f32";
939      case ARMneon_VCVTFtoS: return ".s32.f32";
940      case ARMneon_VCVTUtoF: return ".f32.u32";
941      case ARMneon_VCVTStoF: return ".f32.s32";
942      case ARMneon_VCVTF16toF32: return ".f32.f16";
943      case ARMneon_VCVTF32toF16: return ".f16.f32";
944      case ARMneon_VCVTFtoFixedU: return ".u32.f32";
945      case ARMneon_VCVTFtoFixedS: return ".s32.f32";
946      case ARMneon_VCVTFixedUtoF: return ".f32.u32";
947      case ARMneon_VCVTFixedStoF: return ".f32.s32";
948      /* ... */
949      default: vpanic("showARMNeonUnOpDataType");
950   }
951}
952
953HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
954   switch (op) {
955      case ARMneon_SETELEM: return "vmov";
956      case ARMneon_GETELEMU: return "vmov";
957      case ARMneon_GETELEMS: return "vmov";
958      case ARMneon_VDUP: return "vdup";
959      /* ... */
960      default: vpanic("showARMNeonUnarySOp");
961   }
962}
963
964HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
965   switch (op) {
966      case ARMneon_SETELEM:
967      case ARMneon_VDUP:
968         return ".i";
969      case ARMneon_GETELEMS:
970         return ".s";
971      case ARMneon_GETELEMU:
972         return ".u";
973      /* ... */
974      default: vpanic("showARMNeonUnarySOp");
975   }
976}
977
978HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
979   switch (op) {
980      case ARMneon_VSHL: return "vshl";
981      case ARMneon_VSAL: return "vshl";
982      case ARMneon_VQSHL: return "vqshl";
983      case ARMneon_VQSAL: return "vqshl";
984      /* ... */
985      default: vpanic("showARMNeonShiftOp");
986   }
987}
988
989HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
990   switch (op) {
991      case ARMneon_VSHL:
992      case ARMneon_VQSHL:
993         return ".u";
994      case ARMneon_VSAL:
995      case ARMneon_VQSAL:
996         return ".s";
997      /* ... */
998      default: vpanic("showARMNeonShiftOpDataType");
999   }
1000}
1001
1002HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
1003   switch (op) {
1004      case ARMneon_TRN: return "vtrn";
1005      case ARMneon_ZIP: return "vzip";
1006      case ARMneon_UZP: return "vuzp";
1007      /* ... */
1008      default: vpanic("showARMNeonDualOp");
1009   }
1010}
1011
1012HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1013   switch (op) {
1014      case ARMneon_TRN:
1015      case ARMneon_ZIP:
1016      case ARMneon_UZP:
1017         return "i";
1018      /* ... */
1019      default: vpanic("showARMNeonDualOp");
1020   }
1021}
1022
1023static HChar* showARMNeonDataSize_wrk ( UInt size )
1024{
1025   switch (size) {
1026      case 0: return "8";
1027      case 1: return "16";
1028      case 2: return "32";
1029      case 3: return "64";
1030      default: vpanic("showARMNeonDataSize");
1031   }
1032}
1033
1034static HChar* showARMNeonDataSize ( ARMInstr* i )
1035{
1036   switch (i->tag) {
1037      case ARMin_NBinary:
1038         if (i->ARMin.NBinary.op == ARMneon_VEXT)
1039            return "8";
1040         if (i->ARMin.NBinary.op == ARMneon_VAND ||
1041             i->ARMin.NBinary.op == ARMneon_VORR ||
1042             i->ARMin.NBinary.op == ARMneon_VXOR)
1043            return "";
1044         return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1045      case ARMin_NUnary:
1046         if (i->ARMin.NUnary.op == ARMneon_COPY ||
1047             i->ARMin.NUnary.op == ARMneon_NOT ||
1048             i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1049             i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1050             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1051             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1052             i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1053             i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1054             i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1055             i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1056             i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1057             i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1058            return "";
1059         if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1060             i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1061             i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1062            UInt size;
1063            size = i->ARMin.NUnary.size;
1064            if (size & 0x40)
1065               return "64";
1066            if (size & 0x20)
1067               return "32";
1068            if (size & 0x10)
1069               return "16";
1070            if (size & 0x08)
1071               return "8";
1072            vpanic("showARMNeonDataSize");
1073         }
1074         return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1075      case ARMin_NUnaryS:
1076         if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1077            int size;
1078            size = i->ARMin.NUnaryS.size;
1079            if ((size & 1) == 1)
1080               return "8";
1081            if ((size & 3) == 2)
1082               return "16";
1083            if ((size & 7) == 4)
1084               return "32";
1085            vpanic("showARMNeonDataSize");
1086         }
1087         return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1088      case ARMin_NShift:
1089         return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1090      case ARMin_NDual:
1091         return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1092      default:
1093         vpanic("showARMNeonDataSize");
1094   }
1095}
1096
1097ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1098                         HReg dst, HReg argL, ARMRI84* argR ) {
1099   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1100   i->tag            = ARMin_Alu;
1101   i->ARMin.Alu.op   = op;
1102   i->ARMin.Alu.dst  = dst;
1103   i->ARMin.Alu.argL = argL;
1104   i->ARMin.Alu.argR = argR;
1105   return i;
1106}
1107ARMInstr* ARMInstr_Shift  ( ARMShiftOp op,
1108                            HReg dst, HReg argL, ARMRI5* argR ) {
1109   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1110   i->tag              = ARMin_Shift;
1111   i->ARMin.Shift.op   = op;
1112   i->ARMin.Shift.dst  = dst;
1113   i->ARMin.Shift.argL = argL;
1114   i->ARMin.Shift.argR = argR;
1115   return i;
1116}
1117ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1118   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1119   i->tag             = ARMin_Unary;
1120   i->ARMin.Unary.op  = op;
1121   i->ARMin.Unary.dst = dst;
1122   i->ARMin.Unary.src = src;
1123   return i;
1124}
1125ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1126   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1127   i->tag                  = ARMin_CmpOrTst;
1128   i->ARMin.CmpOrTst.isCmp = isCmp;
1129   i->ARMin.CmpOrTst.argL  = argL;
1130   i->ARMin.CmpOrTst.argR  = argR;
1131   return i;
1132}
1133ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1134   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1135   i->tag           = ARMin_Mov;
1136   i->ARMin.Mov.dst = dst;
1137   i->ARMin.Mov.src = src;
1138   return i;
1139}
1140ARMInstr* ARMInstr_Imm32  ( HReg dst, UInt imm32 ) {
1141   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1142   i->tag               = ARMin_Imm32;
1143   i->ARMin.Imm32.dst   = dst;
1144   i->ARMin.Imm32.imm32 = imm32;
1145   return i;
1146}
1147ARMInstr* ARMInstr_LdSt32 ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
1148   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1149   i->tag                 = ARMin_LdSt32;
1150   i->ARMin.LdSt32.isLoad = isLoad;
1151   i->ARMin.LdSt32.rD     = rD;
1152   i->ARMin.LdSt32.amode  = amode;
1153   return i;
1154}
1155ARMInstr* ARMInstr_LdSt16 ( Bool isLoad, Bool signedLoad,
1156                            HReg rD, ARMAMode2* amode ) {
1157   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1158   i->tag                     = ARMin_LdSt16;
1159   i->ARMin.LdSt16.isLoad     = isLoad;
1160   i->ARMin.LdSt16.signedLoad = signedLoad;
1161   i->ARMin.LdSt16.rD         = rD;
1162   i->ARMin.LdSt16.amode      = amode;
1163   return i;
1164}
1165ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
1166   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1167   i->tag                 = ARMin_LdSt8U;
1168   i->ARMin.LdSt8U.isLoad = isLoad;
1169   i->ARMin.LdSt8U.rD     = rD;
1170   i->ARMin.LdSt8U.amode  = amode;
1171   return i;
1172}
1173//extern ARMInstr* ARMInstr_Ld8S   ( HReg, ARMAMode2* );
1174ARMInstr* ARMInstr_Goto ( IRJumpKind jk, ARMCondCode cond, HReg gnext ) {
1175   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1176   i->tag              = ARMin_Goto;
1177   i->ARMin.Goto.jk    = jk;
1178   i->ARMin.Goto.cond  = cond;
1179   i->ARMin.Goto.gnext = gnext;
1180   return i;
1181}
1182ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1183   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1184   i->tag             = ARMin_CMov;
1185   i->ARMin.CMov.cond = cond;
1186   i->ARMin.CMov.dst  = dst;
1187   i->ARMin.CMov.src  = src;
1188   vassert(cond != ARMcc_AL);
1189   return i;
1190}
1191ARMInstr* ARMInstr_Call ( ARMCondCode cond, HWord target, Int nArgRegs ) {
1192   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1193   i->tag                 = ARMin_Call;
1194   i->ARMin.Call.cond     = cond;
1195   i->ARMin.Call.target   = target;
1196   i->ARMin.Call.nArgRegs = nArgRegs;
1197   return i;
1198}
1199ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
1200   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1201   i->tag          = ARMin_Mul;
1202   i->ARMin.Mul.op = op;
1203   return i;
1204}
1205ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1206   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1207   i->tag             = ARMin_LdrEX;
1208   i->ARMin.LdrEX.szB = szB;
1209   vassert(szB == 8 || szB == 4 || szB == 1);
1210   return i;
1211}
1212ARMInstr* ARMInstr_StrEX ( Int szB ) {
1213   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1214   i->tag             = ARMin_StrEX;
1215   i->ARMin.StrEX.szB = szB;
1216   vassert(szB == 8 || szB == 4 || szB == 1);
1217   return i;
1218}
1219ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1220   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1221   i->tag                 = ARMin_VLdStD;
1222   i->ARMin.VLdStD.isLoad = isLoad;
1223   i->ARMin.VLdStD.dD     = dD;
1224   i->ARMin.VLdStD.amode  = am;
1225   return i;
1226}
1227ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1228   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1229   i->tag                 = ARMin_VLdStS;
1230   i->ARMin.VLdStS.isLoad = isLoad;
1231   i->ARMin.VLdStS.fD     = fD;
1232   i->ARMin.VLdStS.amode  = am;
1233   return i;
1234}
1235ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1236   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1237   i->tag              = ARMin_VAluD;
1238   i->ARMin.VAluD.op   = op;
1239   i->ARMin.VAluD.dst  = dst;
1240   i->ARMin.VAluD.argL = argL;
1241   i->ARMin.VAluD.argR = argR;
1242   return i;
1243}
1244ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1245   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1246   i->tag              = ARMin_VAluS;
1247   i->ARMin.VAluS.op   = op;
1248   i->ARMin.VAluS.dst  = dst;
1249   i->ARMin.VAluS.argL = argL;
1250   i->ARMin.VAluS.argR = argR;
1251   return i;
1252}
1253ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1254   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1255   i->tag               = ARMin_VUnaryD;
1256   i->ARMin.VUnaryD.op  = op;
1257   i->ARMin.VUnaryD.dst = dst;
1258   i->ARMin.VUnaryD.src = src;
1259   return i;
1260}
1261ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1262   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1263   i->tag               = ARMin_VUnaryS;
1264   i->ARMin.VUnaryS.op  = op;
1265   i->ARMin.VUnaryS.dst = dst;
1266   i->ARMin.VUnaryS.src = src;
1267   return i;
1268}
1269ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1270   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1271   i->tag              = ARMin_VCmpD;
1272   i->ARMin.VCmpD.argL = argL;
1273   i->ARMin.VCmpD.argR = argR;
1274   return i;
1275}
1276ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1277   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1278   i->tag               = ARMin_VCMovD;
1279   i->ARMin.VCMovD.cond = cond;
1280   i->ARMin.VCMovD.dst  = dst;
1281   i->ARMin.VCMovD.src  = src;
1282   vassert(cond != ARMcc_AL);
1283   return i;
1284}
1285ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1286   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1287   i->tag               = ARMin_VCMovS;
1288   i->ARMin.VCMovS.cond = cond;
1289   i->ARMin.VCMovS.dst  = dst;
1290   i->ARMin.VCMovS.src  = src;
1291   vassert(cond != ARMcc_AL);
1292   return i;
1293}
1294ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1295   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1296   i->tag               = ARMin_VCvtSD;
1297   i->ARMin.VCvtSD.sToD = sToD;
1298   i->ARMin.VCvtSD.dst  = dst;
1299   i->ARMin.VCvtSD.src  = src;
1300   return i;
1301}
1302ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1303   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1304   i->tag              = ARMin_VXferD;
1305   i->ARMin.VXferD.toD = toD;
1306   i->ARMin.VXferD.dD  = dD;
1307   i->ARMin.VXferD.rHi = rHi;
1308   i->ARMin.VXferD.rLo = rLo;
1309   return i;
1310}
1311ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1312   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1313   i->tag              = ARMin_VXferS;
1314   i->ARMin.VXferS.toS = toS;
1315   i->ARMin.VXferS.fD  = fD;
1316   i->ARMin.VXferS.rLo = rLo;
1317   return i;
1318}
1319ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1320                            HReg dst, HReg src ) {
1321   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1322   i->tag                = ARMin_VCvtID;
1323   i->ARMin.VCvtID.iToD  = iToD;
1324   i->ARMin.VCvtID.syned = syned;
1325   i->ARMin.VCvtID.dst   = dst;
1326   i->ARMin.VCvtID.src   = src;
1327   return i;
1328}
1329ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1330   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1331   i->tag                 = ARMin_FPSCR;
1332   i->ARMin.FPSCR.toFPSCR = toFPSCR;
1333   i->ARMin.FPSCR.iReg    = iReg;
1334   return i;
1335}
1336ARMInstr* ARMInstr_MFence ( void ) {
1337   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1338   i->tag      = ARMin_MFence;
1339   return i;
1340}
1341
1342ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1343   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1344   i->tag                  = ARMin_NLdStQ;
1345   i->ARMin.NLdStQ.isLoad  = isLoad;
1346   i->ARMin.NLdStQ.dQ      = dQ;
1347   i->ARMin.NLdStQ.amode   = amode;
1348   return i;
1349}
1350
1351ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1352   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1353   i->tag                  = ARMin_NLdStD;
1354   i->ARMin.NLdStD.isLoad  = isLoad;
1355   i->ARMin.NLdStD.dD      = dD;
1356   i->ARMin.NLdStD.amode   = amode;
1357   return i;
1358}
1359
1360ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1361                            UInt size, Bool Q ) {
1362   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1363   i->tag                = ARMin_NUnary;
1364   i->ARMin.NUnary.op   = op;
1365   i->ARMin.NUnary.src  = nQ;
1366   i->ARMin.NUnary.dst  = dQ;
1367   i->ARMin.NUnary.size = size;
1368   i->ARMin.NUnary.Q    = Q;
1369   return i;
1370}
1371
1372ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOp op, ARMNRS* dst, ARMNRS* src,
1373                             UInt size, Bool Q ) {
1374   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1375   i->tag                = ARMin_NUnaryS;
1376   i->ARMin.NUnaryS.op   = op;
1377   i->ARMin.NUnaryS.src  = src;
1378   i->ARMin.NUnaryS.dst  = dst;
1379   i->ARMin.NUnaryS.size = size;
1380   i->ARMin.NUnaryS.Q    = Q;
1381   return i;
1382}
1383
1384ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1385                           UInt size, Bool Q ) {
1386   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1387   i->tag                = ARMin_NDual;
1388   i->ARMin.NDual.op   = op;
1389   i->ARMin.NDual.arg1 = nQ;
1390   i->ARMin.NDual.arg2 = mQ;
1391   i->ARMin.NDual.size = size;
1392   i->ARMin.NDual.Q    = Q;
1393   return i;
1394}
1395
1396ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1397                             HReg dst, HReg argL, HReg argR,
1398                             UInt size, Bool Q ) {
1399   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1400   i->tag                = ARMin_NBinary;
1401   i->ARMin.NBinary.op   = op;
1402   i->ARMin.NBinary.argL = argL;
1403   i->ARMin.NBinary.argR = argR;
1404   i->ARMin.NBinary.dst  = dst;
1405   i->ARMin.NBinary.size = size;
1406   i->ARMin.NBinary.Q    = Q;
1407   return i;
1408}
1409
1410ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1411   ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
1412   i->tag         = ARMin_NeonImm;
1413   i->ARMin.NeonImm.dst = dst;
1414   i->ARMin.NeonImm.imm = imm;
1415   return i;
1416}
1417
1418ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1419   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1420   i->tag               = ARMin_NCMovQ;
1421   i->ARMin.NCMovQ.cond = cond;
1422   i->ARMin.NCMovQ.dst  = dst;
1423   i->ARMin.NCMovQ.src  = src;
1424   vassert(cond != ARMcc_AL);
1425   return i;
1426}
1427
1428ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1429                            HReg dst, HReg argL, HReg argR,
1430                            UInt size, Bool Q ) {
1431   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1432   i->tag                = ARMin_NShift;
1433   i->ARMin.NShift.op   = op;
1434   i->ARMin.NShift.argL = argL;
1435   i->ARMin.NShift.argR = argR;
1436   i->ARMin.NShift.dst  = dst;
1437   i->ARMin.NShift.size = size;
1438   i->ARMin.NShift.Q    = Q;
1439   return i;
1440}
1441
1442/* Helper copy-pasted from isel.c */
1443static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1444{
1445   UInt i;
1446   for (i = 0; i < 16; i++) {
1447      if (0 == (u & 0xFFFFFF00)) {
1448         *u8 = u;
1449         *u4 = i;
1450         return True;
1451      }
1452      u = ROR32(u, 30);
1453   }
1454   vassert(i == 16);
1455   return False;
1456}
1457
1458ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1459   UInt u8, u4;
1460   ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
1461   /* Try to generate single ADD if possible */
1462   if (fitsIn8x4(&u8, &u4, imm32)) {
1463      i->tag            = ARMin_Alu;
1464      i->ARMin.Alu.op   = ARMalu_ADD;
1465      i->ARMin.Alu.dst  = rD;
1466      i->ARMin.Alu.argL = rN;
1467      i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1468   } else {
1469      i->tag               = ARMin_Add32;
1470      i->ARMin.Add32.rD    = rD;
1471      i->ARMin.Add32.rN    = rN;
1472      i->ARMin.Add32.imm32 = imm32;
1473   }
1474   return i;
1475}
1476
1477/* ... */
1478
1479void ppARMInstr ( ARMInstr* i ) {
1480   switch (i->tag) {
1481      case ARMin_Alu:
1482         vex_printf("%-4s  ", showARMAluOp(i->ARMin.Alu.op));
1483         ppHRegARM(i->ARMin.Alu.dst);
1484         vex_printf(", ");
1485         ppHRegARM(i->ARMin.Alu.argL);
1486         vex_printf(", ");
1487         ppARMRI84(i->ARMin.Alu.argR);
1488         return;
1489      case ARMin_Shift:
1490         vex_printf("%s   ", showARMShiftOp(i->ARMin.Shift.op));
1491         ppHRegARM(i->ARMin.Shift.dst);
1492         vex_printf(", ");
1493         ppHRegARM(i->ARMin.Shift.argL);
1494         vex_printf(", ");
1495         ppARMRI5(i->ARMin.Shift.argR);
1496         return;
1497      case ARMin_Unary:
1498         vex_printf("%s   ", showARMUnaryOp(i->ARMin.Unary.op));
1499         ppHRegARM(i->ARMin.Unary.dst);
1500         vex_printf(", ");
1501         ppHRegARM(i->ARMin.Unary.src);
1502         return;
1503      case ARMin_CmpOrTst:
1504         vex_printf("%s   ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1505         ppHRegARM(i->ARMin.CmpOrTst.argL);
1506         vex_printf(", ");
1507         ppARMRI84(i->ARMin.CmpOrTst.argR);
1508         return;
1509      case ARMin_Mov:
1510         vex_printf("mov   ");
1511         ppHRegARM(i->ARMin.Mov.dst);
1512         vex_printf(", ");
1513         ppARMRI84(i->ARMin.Mov.src);
1514         return;
1515      case ARMin_Imm32:
1516         vex_printf("imm   ");
1517         ppHRegARM(i->ARMin.Imm32.dst);
1518         vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1519         return;
1520      case ARMin_LdSt32:
1521         if (i->ARMin.LdSt32.isLoad) {
1522            vex_printf("ldr   ");
1523            ppHRegARM(i->ARMin.LdSt32.rD);
1524            vex_printf(", ");
1525            ppARMAMode1(i->ARMin.LdSt32.amode);
1526         } else {
1527            vex_printf("str   ");
1528            ppARMAMode1(i->ARMin.LdSt32.amode);
1529            vex_printf(", ");
1530            ppHRegARM(i->ARMin.LdSt32.rD);
1531         }
1532         return;
1533      case ARMin_LdSt16:
1534         if (i->ARMin.LdSt16.isLoad) {
1535            vex_printf("%s", i->ARMin.LdSt16.signedLoad
1536                                ? "ldrsh " : "ldrh  " );
1537            ppHRegARM(i->ARMin.LdSt16.rD);
1538            vex_printf(", ");
1539            ppARMAMode2(i->ARMin.LdSt16.amode);
1540         } else {
1541            vex_printf("strh  ");
1542            ppARMAMode2(i->ARMin.LdSt16.amode);
1543            vex_printf(", ");
1544            ppHRegARM(i->ARMin.LdSt16.rD);
1545         }
1546         return;
1547      case ARMin_LdSt8U:
1548         if (i->ARMin.LdSt8U.isLoad) {
1549            vex_printf("ldrb  ");
1550            ppHRegARM(i->ARMin.LdSt8U.rD);
1551            vex_printf(", ");
1552            ppARMAMode1(i->ARMin.LdSt8U.amode);
1553         } else {
1554            vex_printf("strb  ");
1555            ppARMAMode1(i->ARMin.LdSt8U.amode);
1556            vex_printf(", ");
1557            ppHRegARM(i->ARMin.LdSt8U.rD);
1558         }
1559         return;
1560      case ARMin_Ld8S:
1561         goto unhandled;
1562      case ARMin_Goto:
1563         if (i->ARMin.Goto.cond != ARMcc_AL) {
1564            vex_printf("if (%%cpsr.%s) { ",
1565                       showARMCondCode(i->ARMin.Goto.cond));
1566         } else {
1567            vex_printf("if (1) { ");
1568         }
1569         if (i->ARMin.Goto.jk != Ijk_Boring
1570             && i->ARMin.Goto.jk != Ijk_Call
1571             && i->ARMin.Goto.jk != Ijk_Ret) {
1572            vex_printf("mov r8, $");
1573            ppIRJumpKind(i->ARMin.Goto.jk);
1574            vex_printf(" ; ");
1575         }
1576         vex_printf("mov r0, ");
1577         ppHRegARM(i->ARMin.Goto.gnext);
1578         vex_printf(" ; bx r14");
1579         if (i->ARMin.Goto.cond != ARMcc_AL) {
1580            vex_printf(" }");
1581         } else {
1582            vex_printf(" }");
1583         }
1584         return;
1585      case ARMin_CMov:
1586         vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1587         ppHRegARM(i->ARMin.CMov.dst);
1588         vex_printf(", ");
1589         ppARMRI84(i->ARMin.CMov.src);
1590         return;
1591      case ARMin_Call:
1592         vex_printf("call%s  ",
1593                    i->ARMin.Call.cond==ARMcc_AL
1594                       ? "" : showARMCondCode(i->ARMin.Call.cond));
1595         vex_printf("0x%lx [nArgRegs=%d]",
1596                    i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1597         return;
1598      case ARMin_Mul:
1599         vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1600         if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1601            vex_printf("r0, r2, r3");
1602         } else {
1603            vex_printf("r1:r0, r2, r3");
1604         }
1605         return;
1606      case ARMin_LdrEX:
1607         if (i->ARMin.LdrEX.szB == 8) {
1608            vex_printf("ldrexd r2, r3, [r0]");
1609         } else {
1610            vex_printf("ldrex%s ", i->ARMin.LdrEX.szB == 1 ? "b"
1611                                 : i->ARMin.LdrEX.szB == 2 ? "h" : "");
1612            vex_printf("r2, [r0]");
1613         }
1614         return;
1615      case ARMin_StrEX:
1616         if (i->ARMin.StrEX.szB == 8) {
1617            vex_printf("strexd r1, r2, r3, [r0]");
1618         } else {
1619            vex_printf("strex%s ", i->ARMin.StrEX.szB == 1 ? "b"
1620                                 : i->ARMin.StrEX.szB == 2 ? "h" : "");
1621            vex_printf("r1, r2, [r0]");
1622         }
1623         return;
1624      case ARMin_VLdStD:
1625         if (i->ARMin.VLdStD.isLoad) {
1626            vex_printf("fldd  ");
1627            ppHRegARM(i->ARMin.VLdStD.dD);
1628            vex_printf(", ");
1629            ppARMAModeV(i->ARMin.VLdStD.amode);
1630         } else {
1631            vex_printf("fstd  ");
1632            ppARMAModeV(i->ARMin.VLdStD.amode);
1633            vex_printf(", ");
1634            ppHRegARM(i->ARMin.VLdStD.dD);
1635         }
1636         return;
1637      case ARMin_VLdStS:
1638         if (i->ARMin.VLdStS.isLoad) {
1639            vex_printf("flds  ");
1640            ppHRegARM(i->ARMin.VLdStS.fD);
1641            vex_printf(", ");
1642            ppARMAModeV(i->ARMin.VLdStS.amode);
1643         } else {
1644            vex_printf("fsts  ");
1645            ppARMAModeV(i->ARMin.VLdStS.amode);
1646            vex_printf(", ");
1647            ppHRegARM(i->ARMin.VLdStS.fD);
1648         }
1649         return;
1650      case ARMin_VAluD:
1651         vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1652         ppHRegARM(i->ARMin.VAluD.dst);
1653         vex_printf(", ");
1654         ppHRegARM(i->ARMin.VAluD.argL);
1655         vex_printf(", ");
1656         ppHRegARM(i->ARMin.VAluD.argR);
1657         return;
1658      case ARMin_VAluS:
1659         vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1660         ppHRegARM(i->ARMin.VAluS.dst);
1661         vex_printf(", ");
1662         ppHRegARM(i->ARMin.VAluS.argL);
1663         vex_printf(", ");
1664         ppHRegARM(i->ARMin.VAluS.argR);
1665         return;
1666      case ARMin_VUnaryD:
1667         vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1668         ppHRegARM(i->ARMin.VUnaryD.dst);
1669         vex_printf(", ");
1670         ppHRegARM(i->ARMin.VUnaryD.src);
1671         return;
1672      case ARMin_VUnaryS:
1673         vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1674         ppHRegARM(i->ARMin.VUnaryS.dst);
1675         vex_printf(", ");
1676         ppHRegARM(i->ARMin.VUnaryS.src);
1677         return;
1678      case ARMin_VCmpD:
1679         vex_printf("fcmpd ");
1680         ppHRegARM(i->ARMin.VCmpD.argL);
1681         vex_printf(", ");
1682         ppHRegARM(i->ARMin.VCmpD.argR);
1683         vex_printf(" ; fmstat");
1684         return;
1685      case ARMin_VCMovD:
1686         vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1687         ppHRegARM(i->ARMin.VCMovD.dst);
1688         vex_printf(", ");
1689         ppHRegARM(i->ARMin.VCMovD.src);
1690         return;
1691      case ARMin_VCMovS:
1692         vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1693         ppHRegARM(i->ARMin.VCMovS.dst);
1694         vex_printf(", ");
1695         ppHRegARM(i->ARMin.VCMovS.src);
1696         return;
1697      case ARMin_VCvtSD:
1698         vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1699         ppHRegARM(i->ARMin.VCvtSD.dst);
1700         vex_printf(", ");
1701         ppHRegARM(i->ARMin.VCvtSD.src);
1702         return;
1703      case ARMin_VXferD:
1704         vex_printf("vmov  ");
1705         if (i->ARMin.VXferD.toD) {
1706            ppHRegARM(i->ARMin.VXferD.dD);
1707            vex_printf(", ");
1708            ppHRegARM(i->ARMin.VXferD.rLo);
1709            vex_printf(", ");
1710            ppHRegARM(i->ARMin.VXferD.rHi);
1711         } else {
1712            ppHRegARM(i->ARMin.VXferD.rLo);
1713            vex_printf(", ");
1714            ppHRegARM(i->ARMin.VXferD.rHi);
1715            vex_printf(", ");
1716            ppHRegARM(i->ARMin.VXferD.dD);
1717         }
1718         return;
1719      case ARMin_VXferS:
1720         vex_printf("vmov  ");
1721         if (i->ARMin.VXferS.toS) {
1722            ppHRegARM(i->ARMin.VXferS.fD);
1723            vex_printf(", ");
1724            ppHRegARM(i->ARMin.VXferS.rLo);
1725         } else {
1726            ppHRegARM(i->ARMin.VXferS.rLo);
1727            vex_printf(", ");
1728            ppHRegARM(i->ARMin.VXferS.fD);
1729         }
1730         return;
1731      case ARMin_VCvtID: {
1732         HChar* nm = "?";
1733         if (i->ARMin.VCvtID.iToD) {
1734            nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1735         } else {
1736            nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1737         }
1738         vex_printf("%s ", nm);
1739         ppHRegARM(i->ARMin.VCvtID.dst);
1740         vex_printf(", ");
1741         ppHRegARM(i->ARMin.VCvtID.src);
1742         return;
1743      }
1744      case ARMin_FPSCR:
1745         if (i->ARMin.FPSCR.toFPSCR) {
1746            vex_printf("fmxr  fpscr, ");
1747            ppHRegARM(i->ARMin.FPSCR.iReg);
1748         } else {
1749            vex_printf("fmrx  ");
1750            ppHRegARM(i->ARMin.FPSCR.iReg);
1751            vex_printf(", fpscr");
1752         }
1753         return;
1754      case ARMin_MFence:
1755         vex_printf("mfence (mcr 15,0,r0,c7,c10,4; 15,0,r0,c7,c10,5; "
1756                    "15,0,r0,c7,c5,4)");
1757         return;
1758      case ARMin_NLdStQ:
1759         if (i->ARMin.NLdStQ.isLoad)
1760            vex_printf("vld1.32 {");
1761         else
1762            vex_printf("vst1.32 {");
1763         ppHRegARM(i->ARMin.NLdStQ.dQ);
1764         vex_printf("} ");
1765         ppARMAModeN(i->ARMin.NLdStQ.amode);
1766         return;
1767      case ARMin_NLdStD:
1768         if (i->ARMin.NLdStD.isLoad)
1769            vex_printf("vld1.32 {");
1770         else
1771            vex_printf("vst1.32 {");
1772         ppHRegARM(i->ARMin.NLdStD.dD);
1773         vex_printf("} ");
1774         ppARMAModeN(i->ARMin.NLdStD.amode);
1775         return;
1776      case ARMin_NUnary:
1777         vex_printf("%s%s%s  ",
1778                    showARMNeonUnOp(i->ARMin.NUnary.op),
1779                    showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1780                    showARMNeonDataSize(i));
1781         ppHRegARM(i->ARMin.NUnary.dst);
1782         vex_printf(", ");
1783         ppHRegARM(i->ARMin.NUnary.src);
1784         if (i->ARMin.NUnary.op == ARMneon_EQZ)
1785            vex_printf(", #0");
1786         if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1787             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1788             i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1789             i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1790            vex_printf(", #%d", i->ARMin.NUnary.size);
1791         }
1792         if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1793             i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1794             i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1795            UInt size;
1796            size = i->ARMin.NUnary.size;
1797            if (size & 0x40) {
1798               vex_printf(", #%d", size - 64);
1799            } else if (size & 0x20) {
1800               vex_printf(", #%d", size - 32);
1801            } else if (size & 0x10) {
1802               vex_printf(", #%d", size - 16);
1803            } else if (size & 0x08) {
1804               vex_printf(", #%d", size - 8);
1805            }
1806         }
1807         return;
1808      case ARMin_NUnaryS:
1809         vex_printf("%s%s%s  ",
1810                    showARMNeonUnOpS(i->ARMin.NUnary.op),
1811                    showARMNeonUnOpSDataType(i->ARMin.NUnary.op),
1812                    showARMNeonDataSize(i));
1813         ppARMNRS(i->ARMin.NUnaryS.dst);
1814         vex_printf(", ");
1815         ppARMNRS(i->ARMin.NUnaryS.src);
1816         return;
1817      case ARMin_NShift:
1818         vex_printf("%s%s%s  ",
1819                    showARMNeonShiftOp(i->ARMin.NShift.op),
1820                    showARMNeonShiftOpDataType(i->ARMin.NShift.op),
1821                    showARMNeonDataSize(i));
1822         ppHRegARM(i->ARMin.NShift.dst);
1823         vex_printf(", ");
1824         ppHRegARM(i->ARMin.NShift.argL);
1825         vex_printf(", ");
1826         ppHRegARM(i->ARMin.NShift.argR);
1827         return;
1828      case ARMin_NDual:
1829         vex_printf("%s%s%s  ",
1830                    showARMNeonDualOp(i->ARMin.NDual.op),
1831                    showARMNeonDualOpDataType(i->ARMin.NDual.op),
1832                    showARMNeonDataSize(i));
1833         ppHRegARM(i->ARMin.NDual.arg1);
1834         vex_printf(", ");
1835         ppHRegARM(i->ARMin.NDual.arg2);
1836         return;
1837      case ARMin_NBinary:
1838         vex_printf("%s%s%s",
1839                    showARMNeonBinOp(i->ARMin.NBinary.op),
1840                    showARMNeonBinOpDataType(i->ARMin.NBinary.op),
1841                    showARMNeonDataSize(i));
1842         vex_printf("  ");
1843         ppHRegARM(i->ARMin.NBinary.dst);
1844         vex_printf(", ");
1845         ppHRegARM(i->ARMin.NBinary.argL);
1846         vex_printf(", ");
1847         ppHRegARM(i->ARMin.NBinary.argR);
1848         return;
1849      case ARMin_NeonImm:
1850         vex_printf("vmov  ");
1851         ppHRegARM(i->ARMin.NeonImm.dst);
1852         vex_printf(", ");
1853         ppARMNImm(i->ARMin.NeonImm.imm);
1854         return;
1855      case ARMin_NCMovQ:
1856         vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
1857         ppHRegARM(i->ARMin.NCMovQ.dst);
1858         vex_printf(", ");
1859         ppHRegARM(i->ARMin.NCMovQ.src);
1860         return;
1861      case ARMin_Add32:
1862         vex_printf("add32 ");
1863         ppHRegARM(i->ARMin.Add32.rD);
1864         vex_printf(", ");
1865         ppHRegARM(i->ARMin.Add32.rN);
1866         vex_printf(", ");
1867         vex_printf("%d", i->ARMin.Add32.imm32);
1868         return;
1869      default:
1870      unhandled:
1871         vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
1872         vpanic("ppARMInstr(1)");
1873         return;
1874   }
1875}
1876
1877
1878/* --------- Helpers for register allocation. --------- */
1879
1880void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 )
1881{
1882   vassert(mode64 == False);
1883   initHRegUsage(u);
1884   switch (i->tag) {
1885      case ARMin_Alu:
1886         addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
1887         addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
1888         addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
1889         return;
1890      case ARMin_Shift:
1891         addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
1892         addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
1893         addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
1894         return;
1895      case ARMin_Unary:
1896         addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
1897         addHRegUse(u, HRmRead, i->ARMin.Unary.src);
1898         return;
1899      case ARMin_CmpOrTst:
1900         addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
1901         addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
1902         return;
1903      case ARMin_Mov:
1904         addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
1905         addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
1906         return;
1907      case ARMin_Imm32:
1908         addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
1909         return;
1910      case ARMin_LdSt32:
1911         addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
1912         if (i->ARMin.LdSt32.isLoad) {
1913            addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
1914         } else {
1915            addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
1916         }
1917         return;
1918      case ARMin_LdSt16:
1919         addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
1920         if (i->ARMin.LdSt16.isLoad) {
1921            addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
1922         } else {
1923            addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
1924         }
1925         return;
1926      case ARMin_LdSt8U:
1927         addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
1928         if (i->ARMin.LdSt8U.isLoad) {
1929            addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
1930         } else {
1931            addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
1932         }
1933         return;
1934      case ARMin_Ld8S:
1935         goto unhandled;
1936      case ARMin_Goto:
1937         /* reads the reg holding the next guest addr */
1938         addHRegUse(u, HRmRead, i->ARMin.Goto.gnext);
1939         /* writes it to the standard integer return register */
1940         addHRegUse(u, HRmWrite, hregARM_R0());
1941         /* possibly messes with the baseblock pointer */
1942         if (i->ARMin.Goto.jk != Ijk_Boring
1943             && i->ARMin.Goto.jk != Ijk_Call
1944             && i->ARMin.Goto.jk != Ijk_Ret)
1945            /* note, this is irrelevant since r8 is not actually
1946               available to the allocator.  But still .. */
1947            addHRegUse(u, HRmWrite, hregARM_R8());
1948         return;
1949      case ARMin_CMov:
1950         addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
1951         addHRegUse(u, HRmRead,  i->ARMin.CMov.dst);
1952         addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
1953         return;
1954      case ARMin_Call:
1955         /* logic and comments copied/modified from x86 back end */
1956         /* This is a bit subtle. */
1957         /* First off, claim it trashes all the caller-saved regs
1958            which fall within the register allocator's jurisdiction.
1959            These I believe to be r0,1,2,3.  If it turns out that r9
1960            is also caller-saved, then we'll have to add that here
1961            too. */
1962         addHRegUse(u, HRmWrite, hregARM_R0());
1963         addHRegUse(u, HRmWrite, hregARM_R1());
1964         addHRegUse(u, HRmWrite, hregARM_R2());
1965         addHRegUse(u, HRmWrite, hregARM_R3());
1966         /* Now we have to state any parameter-carrying registers
1967            which might be read.  This depends on nArgRegs. */
1968         switch (i->ARMin.Call.nArgRegs) {
1969            case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
1970            case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
1971            case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
1972            case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
1973            case 0: break;
1974            default: vpanic("getRegUsage_ARM:Call:regparms");
1975         }
1976         /* Finally, there is the issue that the insn trashes a
1977            register because the literal target address has to be
1978            loaded into a register.  Fortunately, for the nArgRegs=
1979            0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
1980            this does not cause any further damage.  For the
1981            nArgRegs=4 case, we'll have to choose another register
1982            arbitrarily since all the caller saved regs are used for
1983            parameters, and so we might as well choose r11.
1984            */
1985         if (i->ARMin.Call.nArgRegs == 4)
1986            addHRegUse(u, HRmWrite, hregARM_R11());
1987         /* Upshot of this is that the assembler really must observe
1988            the here-stated convention of which register to use as an
1989            address temporary, depending on nArgRegs: 0==r0,
1990            1==r1, 2==r2, 3==r3, 4==r11 */
1991         return;
1992      case ARMin_Mul:
1993         addHRegUse(u, HRmRead, hregARM_R2());
1994         addHRegUse(u, HRmRead, hregARM_R3());
1995         addHRegUse(u, HRmWrite, hregARM_R0());
1996         if (i->ARMin.Mul.op != ARMmul_PLAIN)
1997            addHRegUse(u, HRmWrite, hregARM_R1());
1998         return;
1999      case ARMin_LdrEX:
2000         addHRegUse(u, HRmRead, hregARM_R0());
2001         addHRegUse(u, HRmWrite, hregARM_R2());
2002         if (i->ARMin.LdrEX.szB == 8)
2003            addHRegUse(u, HRmWrite, hregARM_R3());
2004         return;
2005      case ARMin_StrEX:
2006         addHRegUse(u, HRmRead, hregARM_R0());
2007         addHRegUse(u, HRmWrite, hregARM_R1());
2008         addHRegUse(u, HRmRead, hregARM_R2());
2009         if (i->ARMin.StrEX.szB == 8)
2010            addHRegUse(u, HRmRead, hregARM_R3());
2011         return;
2012      case ARMin_VLdStD:
2013         addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2014         if (i->ARMin.VLdStD.isLoad) {
2015            addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2016         } else {
2017            addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2018         }
2019         return;
2020      case ARMin_VLdStS:
2021         addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2022         if (i->ARMin.VLdStS.isLoad) {
2023            addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2024         } else {
2025            addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2026         }
2027         return;
2028      case ARMin_VAluD:
2029         addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2030         addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2031         addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2032         return;
2033      case ARMin_VAluS:
2034         addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2035         addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2036         addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2037         return;
2038      case ARMin_VUnaryD:
2039         addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2040         addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2041         return;
2042      case ARMin_VUnaryS:
2043         addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2044         addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2045         return;
2046      case ARMin_VCmpD:
2047         addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2048         addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2049         return;
2050      case ARMin_VCMovD:
2051         addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2052         addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
2053         addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
2054         return;
2055      case ARMin_VCMovS:
2056         addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2057         addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
2058         addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
2059         return;
2060      case ARMin_VCvtSD:
2061         addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2062         addHRegUse(u, HRmRead,  i->ARMin.VCvtSD.src);
2063         return;
2064      case ARMin_VXferD:
2065         if (i->ARMin.VXferD.toD) {
2066            addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2067            addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
2068            addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
2069         } else {
2070            addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
2071            addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2072            addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2073         }
2074         return;
2075      case ARMin_VXferS:
2076         if (i->ARMin.VXferS.toS) {
2077            addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2078            addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
2079         } else {
2080            addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
2081            addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2082         }
2083         return;
2084      case ARMin_VCvtID:
2085         addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2086         addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
2087         return;
2088      case ARMin_FPSCR:
2089         if (i->ARMin.FPSCR.toFPSCR)
2090            addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2091         else
2092            addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2093         return;
2094      case ARMin_MFence:
2095         return;
2096      case ARMin_NLdStQ:
2097         if (i->ARMin.NLdStQ.isLoad)
2098            addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2099         else
2100            addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2101         addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2102         return;
2103      case ARMin_NLdStD:
2104         if (i->ARMin.NLdStD.isLoad)
2105            addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2106         else
2107            addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2108         addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2109         return;
2110      case ARMin_NUnary:
2111         addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2112         addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2113         return;
2114      case ARMin_NUnaryS:
2115         addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2116         addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2117         return;
2118      case ARMin_NShift:
2119         addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2120         addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2121         addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2122         return;
2123      case ARMin_NDual:
2124         addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2125         addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2126         addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2127         addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2128         return;
2129      case ARMin_NBinary:
2130         addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2131         /* TODO: sometimes dst is also being read! */
2132         // XXX fix this
2133         addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2134         addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2135         return;
2136      case ARMin_NeonImm:
2137         addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2138         return;
2139      case ARMin_NCMovQ:
2140         addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2141         addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
2142         addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
2143         return;
2144      case ARMin_Add32:
2145         addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2146         addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2147         return;
2148      unhandled:
2149      default:
2150         ppARMInstr(i);
2151         vpanic("getRegUsage_ARMInstr");
2152   }
2153}
2154
2155
2156void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2157{
2158   vassert(mode64 == False);
2159   switch (i->tag) {
2160      case ARMin_Alu:
2161         i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2162         i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2163         mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2164         return;
2165      case ARMin_Shift:
2166         i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2167         i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2168         mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2169         return;
2170      case ARMin_Unary:
2171         i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2172         i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2173         return;
2174      case ARMin_CmpOrTst:
2175         i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2176         mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2177         return;
2178      case ARMin_Mov:
2179         i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2180         mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2181         return;
2182      case ARMin_Imm32:
2183         i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2184         return;
2185      case ARMin_LdSt32:
2186         i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2187         mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2188         return;
2189      case ARMin_LdSt16:
2190         i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2191         mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2192         return;
2193      case ARMin_LdSt8U:
2194         i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2195         mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2196         return;
2197      case ARMin_Ld8S:
2198         goto unhandled;
2199      case ARMin_Goto:
2200         i->ARMin.Goto.gnext = lookupHRegRemap(m, i->ARMin.Goto.gnext);
2201         return;
2202      case ARMin_CMov:
2203         i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2204         mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2205         return;
2206      case ARMin_Call:
2207         return;
2208      case ARMin_Mul:
2209         return;
2210      case ARMin_LdrEX:
2211         return;
2212      case ARMin_StrEX:
2213         return;
2214      case ARMin_VLdStD:
2215         i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2216         mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2217         return;
2218      case ARMin_VLdStS:
2219         i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2220         mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2221         return;
2222      case ARMin_VAluD:
2223         i->ARMin.VAluD.dst  = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2224         i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2225         i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2226         return;
2227      case ARMin_VAluS:
2228         i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2229         i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2230         i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2231         return;
2232      case ARMin_VUnaryD:
2233         i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2234         i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2235         return;
2236      case ARMin_VUnaryS:
2237         i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2238         i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2239         return;
2240      case ARMin_VCmpD:
2241         i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2242         i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2243         return;
2244      case ARMin_VCMovD:
2245         i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2246         i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2247         return;
2248      case ARMin_VCMovS:
2249         i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2250         i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2251         return;
2252      case ARMin_VCvtSD:
2253         i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2254         i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2255         return;
2256      case ARMin_VXferD:
2257         i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2258         i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2259         i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2260         return;
2261      case ARMin_VXferS:
2262         i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2263         i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2264         return;
2265      case ARMin_VCvtID:
2266         i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2267         i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2268         return;
2269      case ARMin_FPSCR:
2270         i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2271         return;
2272      case ARMin_MFence:
2273         return;
2274      case ARMin_NLdStQ:
2275         i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2276         mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2277         return;
2278      case ARMin_NLdStD:
2279         i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2280         mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2281         return;
2282      case ARMin_NUnary:
2283         i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2284         i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2285         return;
2286      case ARMin_NUnaryS:
2287         i->ARMin.NUnaryS.src->reg
2288            = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2289         i->ARMin.NUnaryS.dst->reg
2290            = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2291         return;
2292      case ARMin_NShift:
2293         i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2294         i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2295         i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2296         return;
2297      case ARMin_NDual:
2298         i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2299         i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2300         return;
2301      case ARMin_NBinary:
2302         i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2303         i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2304         i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2305         return;
2306      case ARMin_NeonImm:
2307         i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2308         return;
2309      case ARMin_NCMovQ:
2310         i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2311         i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2312         return;
2313      case ARMin_Add32:
2314         i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2315         i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2316      unhandled:
2317      default:
2318         ppARMInstr(i);
2319         vpanic("mapRegs_ARMInstr");
2320   }
2321}
2322
2323/* Figure out if i represents a reg-reg move, and if so assign the
2324   source and destination to *src and *dst.  If in doubt say No.  Used
2325   by the register allocator to do move coalescing.
2326*/
2327Bool isMove_ARMInstr ( ARMInstr* i, HReg* src, HReg* dst )
2328{
2329   /* Moves between integer regs */
2330   switch (i->tag) {
2331      case ARMin_Mov:
2332         if (i->ARMin.Mov.src->tag == ARMri84_R) {
2333            *src = i->ARMin.Mov.src->ARMri84.R.reg;
2334            *dst = i->ARMin.Mov.dst;
2335            return True;
2336         }
2337         break;
2338      case ARMin_VUnaryD:
2339         if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2340            *src = i->ARMin.VUnaryD.src;
2341            *dst = i->ARMin.VUnaryD.dst;
2342            return True;
2343         }
2344         break;
2345      case ARMin_VUnaryS:
2346         if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2347            *src = i->ARMin.VUnaryS.src;
2348            *dst = i->ARMin.VUnaryS.dst;
2349            return True;
2350         }
2351         break;
2352      default:
2353         break;
2354   }
2355
2356   // todo: float, vector moves
2357   return False;
2358}
2359
2360
2361/* Generate arm spill/reload instructions under the direction of the
2362   register allocator.  Note it's critical these don't write the
2363   condition codes. */
2364
2365void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2366                    HReg rreg, Int offsetB, Bool mode64 )
2367{
2368   HRegClass rclass;
2369   vassert(offsetB >= 0);
2370   vassert(!hregIsVirtual(rreg));
2371   vassert(mode64 == False);
2372   *i1 = *i2 = NULL;
2373   rclass = hregClass(rreg);
2374   switch (rclass) {
2375      case HRcInt32:
2376         vassert(offsetB <= 4095);
2377         *i1 = ARMInstr_LdSt32( False/*!isLoad*/,
2378                                rreg,
2379                                ARMAMode1_RI(hregARM_R8(), offsetB) );
2380         return;
2381      case HRcFlt32:
2382      case HRcFlt64: {
2383         HReg r8   = hregARM_R8();  /* baseblock */
2384         HReg r12  = hregARM_R12(); /* spill temp */
2385         HReg base = r8;
2386         vassert(0 == (offsetB & 3));
2387         if (offsetB >= 1024) {
2388            Int offsetKB = offsetB / 1024;
2389            /* r12 = r8 + (1024 * offsetKB) */
2390            *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2391                               ARMRI84_I84(offsetKB, 11));
2392            offsetB -= (1024 * offsetKB);
2393            base = r12;
2394         }
2395         vassert(offsetB <= 1020);
2396         if (rclass == HRcFlt32) {
2397            *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2398                                   rreg,
2399                                   mkARMAModeV(base, offsetB) );
2400         } else {
2401            *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2402                                   rreg,
2403                                   mkARMAModeV(base, offsetB) );
2404         }
2405         return;
2406      }
2407      case HRcVec128: {
2408         HReg r8  = hregARM_R8();
2409         HReg r12 = hregARM_R12();
2410         *i1 = ARMInstr_Add32(r12, r8, offsetB);
2411         *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2412         return;
2413      }
2414      default:
2415         ppHRegClass(rclass);
2416         vpanic("genSpill_ARM: unimplemented regclass");
2417   }
2418}
2419
2420void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2421                     HReg rreg, Int offsetB, Bool mode64 )
2422{
2423   HRegClass rclass;
2424   vassert(offsetB >= 0);
2425   vassert(!hregIsVirtual(rreg));
2426   vassert(mode64 == False);
2427   *i1 = *i2 = NULL;
2428   rclass = hregClass(rreg);
2429   switch (rclass) {
2430      case HRcInt32:
2431         vassert(offsetB <= 4095);
2432         *i1 = ARMInstr_LdSt32( True/*isLoad*/,
2433                                rreg,
2434                                ARMAMode1_RI(hregARM_R8(), offsetB) );
2435         return;
2436      case HRcFlt32:
2437      case HRcFlt64: {
2438         HReg r8   = hregARM_R8();  /* baseblock */
2439         HReg r12  = hregARM_R12(); /* spill temp */
2440         HReg base = r8;
2441         vassert(0 == (offsetB & 3));
2442         if (offsetB >= 1024) {
2443            Int offsetKB = offsetB / 1024;
2444            /* r12 = r8 + (1024 * offsetKB) */
2445            *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2446                               ARMRI84_I84(offsetKB, 11));
2447            offsetB -= (1024 * offsetKB);
2448            base = r12;
2449         }
2450         vassert(offsetB <= 1020);
2451         if (rclass == HRcFlt32) {
2452            *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2453                                   rreg,
2454                                   mkARMAModeV(base, offsetB) );
2455         } else {
2456            *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2457                                   rreg,
2458                                   mkARMAModeV(base, offsetB) );
2459         }
2460         return;
2461      }
2462      case HRcVec128: {
2463         HReg r8  = hregARM_R8();
2464         HReg r12 = hregARM_R12();
2465         *i1 = ARMInstr_Add32(r12, r8, offsetB);
2466         *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2467         return;
2468      }
2469      default:
2470         ppHRegClass(rclass);
2471         vpanic("genReload_ARM: unimplemented regclass");
2472   }
2473}
2474
2475
2476/* Emit an instruction into buf and return the number of bytes used.
2477   Note that buf is not the insn's final place, and therefore it is
2478   imperative to emit position-independent code. */
2479
2480static inline UChar iregNo ( HReg r )
2481{
2482   UInt n;
2483   vassert(hregClass(r) == HRcInt32);
2484   vassert(!hregIsVirtual(r));
2485   n = hregNumber(r);
2486   vassert(n <= 15);
2487   return toUChar(n);
2488}
2489
2490static inline UChar dregNo ( HReg r )
2491{
2492   UInt n;
2493   if (hregClass(r) != HRcFlt64)
2494      ppHRegClass(hregClass(r));
2495   vassert(hregClass(r) == HRcFlt64);
2496   vassert(!hregIsVirtual(r));
2497   n = hregNumber(r);
2498   vassert(n <= 31);
2499   return toUChar(n);
2500}
2501
2502static inline UChar fregNo ( HReg r )
2503{
2504   UInt n;
2505   vassert(hregClass(r) == HRcFlt32);
2506   vassert(!hregIsVirtual(r));
2507   n = hregNumber(r);
2508   vassert(n <= 31);
2509   return toUChar(n);
2510}
2511
2512static inline UChar qregNo ( HReg r )
2513{
2514   UInt n;
2515   vassert(hregClass(r) == HRcVec128);
2516   vassert(!hregIsVirtual(r));
2517   n = hregNumber(r);
2518   vassert(n <= 15);
2519   return toUChar(n);
2520}
2521
2522#define BITS4(zzb3,zzb2,zzb1,zzb0) \
2523   (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2524#define X0000  BITS4(0,0,0,0)
2525#define X0001  BITS4(0,0,0,1)
2526#define X0010  BITS4(0,0,1,0)
2527#define X0011  BITS4(0,0,1,1)
2528#define X0100  BITS4(0,1,0,0)
2529#define X0101  BITS4(0,1,0,1)
2530#define X0110  BITS4(0,1,1,0)
2531#define X0111  BITS4(0,1,1,1)
2532#define X1000  BITS4(1,0,0,0)
2533#define X1001  BITS4(1,0,0,1)
2534#define X1010  BITS4(1,0,1,0)
2535#define X1011  BITS4(1,0,1,1)
2536#define X1100  BITS4(1,1,0,0)
2537#define X1101  BITS4(1,1,0,1)
2538#define X1110  BITS4(1,1,1,0)
2539#define X1111  BITS4(1,1,1,1)
2540
2541#define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2542   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2543    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2544    (((zzx3) & 0xF) << 12))
2545
2546#define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
2547   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2548    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2549    (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
2550
2551#define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
2552   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2553    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2554    (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
2555
2556#define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2557  ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2558   (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2559   (((zzx0) & 0xF) << 0))
2560
2561#define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
2562   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2563    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2564    (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
2565    (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
2566
2567/* Generate a skeletal insn that involves an a RI84 shifter operand.
2568   Returns a word which is all zeroes apart from bits 25 and 11..0,
2569   since it is those that encode the shifter operand (at least to the
2570   extent that we care about it.) */
2571static UInt skeletal_RI84 ( ARMRI84* ri )
2572{
2573   UInt instr;
2574   if (ri->tag == ARMri84_I84) {
2575      vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2576      vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2577      instr = 1 << 25;
2578      instr |= (ri->ARMri84.I84.imm4 << 8);
2579      instr |= ri->ARMri84.I84.imm8;
2580   } else {
2581      instr = 0 << 25;
2582      instr |= iregNo(ri->ARMri84.R.reg);
2583   }
2584   return instr;
2585}
2586
2587/* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
2588   11..7. */
2589static UInt skeletal_RI5 ( ARMRI5* ri )
2590{
2591   UInt instr;
2592   if (ri->tag == ARMri5_I5) {
2593      UInt imm5 = ri->ARMri5.I5.imm5;
2594      vassert(imm5 >= 1 && imm5 <= 31);
2595      instr = 0 << 4;
2596      instr |= imm5 << 7;
2597   } else {
2598      instr = 1 << 4;
2599      instr |= iregNo(ri->ARMri5.R.reg) << 8;
2600   }
2601   return instr;
2602}
2603
2604
2605/* Get an immediate into a register, using only that
2606   register.  (very lame..) */
2607static UInt* imm32_to_iregNo ( UInt* p, Int rD, UInt imm32 )
2608{
2609   UInt instr;
2610   vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2611#if 0
2612   if (0 == (imm32 & ~0xFF)) {
2613      /* mov with a immediate shifter operand of (0, imm32) (??) */
2614      instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2615      instr |= imm32;
2616      *p++ = instr;
2617   } else {
2618      // this is very bad; causes Dcache pollution
2619      // ldr  rD, [pc]
2620      instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2621      *p++ = instr;
2622      // b .+8
2623      instr = 0xEA000000;
2624      *p++ = instr;
2625      // .word imm32
2626      *p++ = imm32;
2627   }
2628#else
2629   if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2630      /* Generate movw rD, #low16.  Then, if the high 16 are
2631         nonzero, generate movt rD, #high16. */
2632      UInt lo16 = imm32 & 0xFFFF;
2633      UInt hi16 = (imm32 >> 16) & 0xFFFF;
2634      instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2635                       (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2636                       lo16 & 0xF);
2637      *p++ = instr;
2638      if (hi16 != 0) {
2639         instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2640                          (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2641                          hi16 & 0xF);
2642         *p++ = instr;
2643      }
2644   } else {
2645      UInt imm, rot;
2646      UInt op = X1010;
2647      UInt rN = 0;
2648      if ((imm32 & 0xFF) || (imm32 == 0)) {
2649         imm = imm32 & 0xFF;
2650         rot = 0;
2651         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2652         *p++ = instr;
2653         op = X1000;
2654         rN = rD;
2655      }
2656      if (imm32 & 0xFF000000) {
2657         imm = (imm32 >> 24) & 0xFF;
2658         rot = 4;
2659         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2660         *p++ = instr;
2661         op = X1000;
2662         rN = rD;
2663      }
2664      if (imm32 & 0xFF0000) {
2665         imm = (imm32 >> 16) & 0xFF;
2666         rot = 8;
2667         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2668         *p++ = instr;
2669         op = X1000;
2670         rN = rD;
2671      }
2672      if (imm32 & 0xFF00) {
2673         imm = (imm32 >> 8) & 0xFF;
2674         rot = 12;
2675         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2676         *p++ = instr;
2677         op = X1000;
2678         rN = rD;
2679      }
2680   }
2681#endif
2682   return p;
2683}
2684
2685
2686Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i,
2687                    Bool mode64, void* dispatch )
2688{
2689   UInt* p = (UInt*)buf;
2690   vassert(nbuf >= 32);
2691   vassert(mode64 == False);
2692   vassert(0 == (((HWord)buf) & 3));
2693   /* since we branch to lr(r13) to get back to dispatch: */
2694   vassert(dispatch == NULL);
2695
2696   switch (i->tag) {
2697      case ARMin_Alu: {
2698         UInt     instr, subopc;
2699         UInt     rD   = iregNo(i->ARMin.Alu.dst);
2700         UInt     rN   = iregNo(i->ARMin.Alu.argL);
2701         ARMRI84* argR = i->ARMin.Alu.argR;
2702         switch (i->ARMin.Alu.op) {
2703            case ARMalu_ADDS: /* fallthru */
2704            case ARMalu_ADD:  subopc = X0100; break;
2705            case ARMalu_ADC:  subopc = X0101; break;
2706            case ARMalu_SUBS: /* fallthru */
2707            case ARMalu_SUB:  subopc = X0010; break;
2708            case ARMalu_SBC:  subopc = X0110; break;
2709            case ARMalu_AND:  subopc = X0000; break;
2710            case ARMalu_BIC:  subopc = X1110; break;
2711            case ARMalu_OR:   subopc = X1100; break;
2712            case ARMalu_XOR:  subopc = X0001; break;
2713            default: goto bad;
2714         }
2715         instr = skeletal_RI84(argR);
2716         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2717                           (subopc << 1) & 0xF, rN, rD);
2718         if (i->ARMin.Alu.op == ARMalu_ADDS
2719             || i->ARMin.Alu.op == ARMalu_SUBS) {
2720            instr |= 1<<20;  /* set the S bit */
2721         }
2722         *p++ = instr;
2723         goto done;
2724      }
2725      case ARMin_Shift: {
2726         UInt    instr, subopc;
2727         HReg    rD   = iregNo(i->ARMin.Shift.dst);
2728         HReg    rM   = iregNo(i->ARMin.Shift.argL);
2729         ARMRI5* argR = i->ARMin.Shift.argR;
2730         switch (i->ARMin.Shift.op) {
2731            case ARMsh_SHL: subopc = X0000; break;
2732            case ARMsh_SHR: subopc = X0001; break;
2733            case ARMsh_SAR: subopc = X0010; break;
2734            default: goto bad;
2735         }
2736         instr = skeletal_RI5(argR);
2737         instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
2738         instr |= (subopc & 3) << 5;
2739         *p++ = instr;
2740         goto done;
2741      }
2742      case ARMin_Unary: {
2743         UInt instr;
2744         HReg rDst = iregNo(i->ARMin.Unary.dst);
2745         HReg rSrc = iregNo(i->ARMin.Unary.src);
2746         switch (i->ARMin.Unary.op) {
2747            case ARMun_CLZ:
2748               instr = XXXXXXXX(X1110,X0001,X0110,X1111,
2749                                rDst,X1111,X0001,rSrc);
2750               *p++ = instr;
2751               goto done;
2752            case ARMun_NEG: /* RSB rD,rS,#0 */
2753               instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
2754               *p++ = instr;
2755               goto done;
2756            case ARMun_NOT: {
2757               UInt subopc = X1111; /* MVN */
2758               instr = rSrc;
2759               instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2760                                 (subopc << 1) & 0xF, 0, rDst);
2761               *p++ = instr;
2762               goto done;
2763            }
2764            default:
2765               break;
2766         }
2767         goto bad;
2768      }
2769      case ARMin_CmpOrTst: {
2770         UInt instr  = skeletal_RI84(i->ARMin.CmpOrTst.argR);
2771         UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
2772         UInt SBZ    = 0;
2773         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2774                           ((subopc << 1) & 0xF) | 1,
2775                           i->ARMin.CmpOrTst.argL, SBZ );
2776         *p++ = instr;
2777         goto done;
2778      }
2779      case ARMin_Mov: {
2780         UInt instr  = skeletal_RI84(i->ARMin.Mov.src);
2781         UInt subopc = X1101; /* MOV */
2782         UInt SBZ    = 0;
2783         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2784                           (subopc << 1) & 0xF, SBZ, i->ARMin.Mov.dst);
2785         *p++ = instr;
2786         goto done;
2787      }
2788      case ARMin_Imm32: {
2789         p = imm32_to_iregNo( (UInt*)p, iregNo(i->ARMin.Imm32.dst),
2790                                        i->ARMin.Imm32.imm32 );
2791         goto done;
2792      }
2793      case ARMin_LdSt32:
2794      case ARMin_LdSt8U: {
2795         UInt       bL, bB;
2796         HReg       rD;
2797         ARMAMode1* am;
2798         if (i->tag == ARMin_LdSt32) {
2799            bB = 0;
2800            bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
2801            am = i->ARMin.LdSt32.amode;
2802            rD = i->ARMin.LdSt32.rD;
2803         } else {
2804            bB = 1;
2805            bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
2806            am = i->ARMin.LdSt8U.amode;
2807            rD = i->ARMin.LdSt8U.rD;
2808         }
2809         if (am->tag == ARMam1_RI) {
2810            Int  simm12;
2811            UInt instr, bP;
2812            if (am->ARMam1.RI.simm13 < 0) {
2813               bP = 0;
2814               simm12 = -am->ARMam1.RI.simm13;
2815            } else {
2816               bP = 1;
2817               simm12 = am->ARMam1.RI.simm13;
2818            }
2819            vassert(simm12 >= 0 && simm12 <= 4095);
2820            instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
2821                             iregNo(am->ARMam1.RI.reg),
2822                             iregNo(rD));
2823            instr |= simm12;
2824            *p++ = instr;
2825            goto done;
2826         } else {
2827            // RR case
2828            goto bad;
2829         }
2830      }
2831      case ARMin_LdSt16: {
2832         HReg       rD = i->ARMin.LdSt16.rD;
2833         UInt       bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
2834         UInt       bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
2835         ARMAMode2* am = i->ARMin.LdSt16.amode;
2836         if (am->tag == ARMam2_RI) {
2837            HReg rN = am->ARMam2.RI.reg;
2838            Int  simm8;
2839            UInt bP, imm8hi, imm8lo, instr;
2840            if (am->ARMam2.RI.simm9 < 0) {
2841               bP = 0;
2842               simm8 = -am->ARMam2.RI.simm9;
2843            } else {
2844               bP = 1;
2845               simm8 = am->ARMam2.RI.simm9;
2846            }
2847            vassert(simm8 >= 0 && simm8 <= 255);
2848            imm8hi = (simm8 >> 4) & 0xF;
2849            imm8lo = simm8 & 0xF;
2850            vassert(!(bL == 0 && bS == 1)); // "! signed store"
2851            /**/ if (bL == 0 && bS == 0) {
2852               // strh
2853               instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,0), iregNo(rN),
2854                                iregNo(rD), imm8hi, X1011, imm8lo);
2855               *p++ = instr;
2856               goto done;
2857            }
2858            else if (bL == 1 && bS == 0) {
2859               // ldrh
2860               instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,1), iregNo(rN),
2861                                iregNo(rD), imm8hi, X1011, imm8lo);
2862               *p++ = instr;
2863               goto done;
2864            }
2865            else if (bL == 1 && bS == 1) {
2866               goto bad;
2867            }
2868            else vassert(0); // ill-constructed insn
2869         } else {
2870            // RR case
2871            goto bad;
2872         }
2873      }
2874      case ARMin_Ld8S:
2875         goto bad;
2876      case ARMin_Goto: {
2877         UInt        instr;
2878         IRJumpKind  jk    = i->ARMin.Goto.jk;
2879         ARMCondCode cond  = i->ARMin.Goto.cond;
2880         UInt        rnext = iregNo(i->ARMin.Goto.gnext);
2881         Int         trc   = -1;
2882         switch (jk) {
2883            case Ijk_Ret: case Ijk_Call: case Ijk_Boring:
2884               break; /* no need to set GST in these common cases */
2885            case Ijk_ClientReq:
2886               trc = VEX_TRC_JMP_CLIENTREQ; break;
2887            case Ijk_Sys_int128:
2888            case Ijk_Sys_int129:
2889            case Ijk_Sys_int130:
2890            case Ijk_Yield:
2891            case Ijk_EmWarn:
2892            case Ijk_MapFail:
2893               goto unhandled_jk;
2894            case Ijk_YieldNoRedir:
2895               trc = VEX_TRC_JMP_YIELD_NOREDIR; break;
2896            case Ijk_NoDecode:
2897               trc = VEX_TRC_JMP_NODECODE; break;
2898            case Ijk_TInval:
2899               trc = VEX_TRC_JMP_TINVAL; break;
2900            case Ijk_NoRedir:
2901               trc = VEX_TRC_JMP_NOREDIR; break;
2902            case Ijk_Sys_sysenter:
2903            case Ijk_SigTRAP:
2904            case Ijk_SigSEGV:
2905               goto unhandled_jk;
2906            case Ijk_Sys_syscall:
2907               trc = VEX_TRC_JMP_SYS_SYSCALL; break;
2908            unhandled_jk:
2909            default:
2910               goto bad;
2911         }
2912         if (trc != -1) {
2913            // mov{cond} r8, #trc
2914            vassert(trc >= 0 && trc <= 255);
2915            instr = (cond << 28) | 0x03A08000 | (0xFF & (UInt)trc);
2916            *p++ = instr;
2917         }
2918         // mov{cond} r0, rnext
2919         if (rnext != 0) {
2920            instr = (cond << 28) | 0x01A00000 | rnext;
2921            *p++ = instr;
2922         }
2923         // bx{cond} r14
2924         instr =(cond << 28) | 0x012FFF1E;
2925         *p++ = instr;
2926         goto done;
2927      }
2928      case ARMin_CMov: {
2929         UInt instr  = skeletal_RI84(i->ARMin.CMov.src);
2930         UInt subopc = X1101; /* MOV */
2931         UInt SBZ    = 0;
2932         instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
2933                           (subopc << 1) & 0xF, SBZ, i->ARMin.CMov.dst);
2934         *p++ = instr;
2935         goto done;
2936      }
2937      case ARMin_Call: {
2938         UInt instr;
2939         /* Decide on a scratch reg used to hold to the call address.
2940            This has to be done as per the comments in getRegUsage. */
2941         Int scratchNo;
2942         switch (i->ARMin.Call.nArgRegs) {
2943            case 0:  scratchNo = 0;  break;
2944            case 1:  scratchNo = 1;  break;
2945            case 2:  scratchNo = 2;  break;
2946            case 3:  scratchNo = 3;  break;
2947            case 4:  scratchNo = 11; break;
2948            default: vassert(0);
2949         }
2950         // r"scratchNo" = &target
2951         p = imm32_to_iregNo( (UInt*)p,
2952                              scratchNo, (UInt)i->ARMin.Call.target );
2953         // blx{cond} r"scratchNo"
2954         instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
2955                          X0011, scratchNo);
2956         instr |= 0xFFF << 8; // stick in the SBOnes
2957         *p++ = instr;
2958         goto done;
2959      }
2960      case ARMin_Mul: {
2961         /* E0000392   mul     r0, r2, r3
2962            E0810392   umull   r0(LO), r1(HI), r2, r3
2963            E0C10392   smull   r0(LO), r1(HI), r2, r3
2964         */
2965         switch (i->ARMin.Mul.op) {
2966            case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
2967            case ARMmul_ZX:    *p++ = 0xE0810392; goto done;
2968            case ARMmul_SX:    *p++ = 0xE0C10392; goto done;
2969            default: vassert(0);
2970         }
2971         goto bad;
2972      }
2973      case ARMin_LdrEX: {
2974         /* E1B01F9F   ldrexd   r2, r3, [r0]
2975            E1901F9F   ldrex    r2, [r0]
2976            E1F01F9F   ldrexh   r2, [r0]
2977            E1D01F9F   ldrexb   r2, [r0]
2978         */
2979         switch (i->ARMin.LdrEX.szB) {
2980            case 8: *p++ = 0xE1B02F9F; goto done;
2981            case 4: *p++ = 0xE1902F9F; goto done;
2982            //case 2: *p++ = 0xE1F02F9F; goto done;
2983            case 1: *p++ = 0xE1D02F9F; goto done;
2984            default: break;
2985         }
2986         goto bad;
2987      }
2988      case ARMin_StrEX: {
2989         /* E1A01F92   strexd  r1, r2, r3, [r0]
2990            E1801F92   strex   r1, r2, [r0]
2991            E1E01F92   strexh  r1, r2, [r0]
2992            E1C01F92   strexb  r1, r2, [r0]
2993         */
2994         switch (i->ARMin.StrEX.szB) {
2995            case 8: *p++ = 0xE1A01F92; goto done;
2996            case 4: *p++ = 0xE1801F92; goto done;
2997            //case 2: *p++ = 0xE1E01F92; goto done;
2998            case 1: *p++ = 0xE1C01F92; goto done;
2999            default: break;
3000         }
3001         goto bad;
3002      }
3003      case ARMin_VLdStD: {
3004         UInt dD     = dregNo(i->ARMin.VLdStD.dD);
3005         UInt rN     = iregNo(i->ARMin.VLdStD.amode->reg);
3006         Int  simm11 = i->ARMin.VLdStD.amode->simm11;
3007         UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3008         UInt bU     = simm11 >= 0 ? 1 : 0;
3009         UInt bL     = i->ARMin.VLdStD.isLoad ? 1 : 0;
3010         UInt insn;
3011         vassert(0 == (off8 & 3));
3012         off8 >>= 2;
3013         vassert(0 == (off8 & 0xFFFFFF00));
3014         insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3015         insn |= off8;
3016         *p++ = insn;
3017         goto done;
3018      }
3019      case ARMin_VLdStS: {
3020         UInt fD     = fregNo(i->ARMin.VLdStS.fD);
3021         UInt rN     = iregNo(i->ARMin.VLdStS.amode->reg);
3022         Int  simm11 = i->ARMin.VLdStS.amode->simm11;
3023         UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3024         UInt bU     = simm11 >= 0 ? 1 : 0;
3025         UInt bL     = i->ARMin.VLdStS.isLoad ? 1 : 0;
3026         UInt bD     = fD & 1;
3027         UInt insn;
3028         vassert(0 == (off8 & 3));
3029         off8 >>= 2;
3030         vassert(0 == (off8 & 0xFFFFFF00));
3031         insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3032         insn |= off8;
3033         *p++ = insn;
3034         goto done;
3035      }
3036      case ARMin_VAluD: {
3037         UInt dN = dregNo(i->ARMin.VAluD.argL);
3038         UInt dD = dregNo(i->ARMin.VAluD.dst);
3039         UInt dM = dregNo(i->ARMin.VAluD.argR);
3040         UInt pqrs = X1111; /* undefined */
3041         switch (i->ARMin.VAluD.op) {
3042            case ARMvfp_ADD: pqrs = X0110; break;
3043            case ARMvfp_SUB: pqrs = X0111; break;
3044            case ARMvfp_MUL: pqrs = X0100; break;
3045            case ARMvfp_DIV: pqrs = X1000; break;
3046            default: goto bad;
3047         }
3048         vassert(pqrs != X1111);
3049         UInt bP  = (pqrs >> 3) & 1;
3050         UInt bQ  = (pqrs >> 2) & 1;
3051         UInt bR  = (pqrs >> 1) & 1;
3052         UInt bS  = (pqrs >> 0) & 1;
3053         UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3054                              X1011, BITS4(0,bS,0,0), dM);
3055         *p++ = insn;
3056         goto done;
3057      }
3058      case ARMin_VAluS: {
3059         UInt dN = fregNo(i->ARMin.VAluS.argL);
3060         UInt dD = fregNo(i->ARMin.VAluS.dst);
3061         UInt dM = fregNo(i->ARMin.VAluS.argR);
3062         UInt bN = dN & 1;
3063         UInt bD = dD & 1;
3064         UInt bM = dM & 1;
3065         UInt pqrs = X1111; /* undefined */
3066         switch (i->ARMin.VAluS.op) {
3067            case ARMvfp_ADD: pqrs = X0110; break;
3068            case ARMvfp_SUB: pqrs = X0111; break;
3069            case ARMvfp_MUL: pqrs = X0100; break;
3070            case ARMvfp_DIV: pqrs = X1000; break;
3071            default: goto bad;
3072         }
3073         vassert(pqrs != X1111);
3074         UInt bP  = (pqrs >> 3) & 1;
3075         UInt bQ  = (pqrs >> 2) & 1;
3076         UInt bR  = (pqrs >> 1) & 1;
3077         UInt bS  = (pqrs >> 0) & 1;
3078         UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3079                              (dN >> 1), (dD >> 1),
3080                              X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3081         *p++ = insn;
3082         goto done;
3083      }
3084      case ARMin_VUnaryD: {
3085         UInt dD   = dregNo(i->ARMin.VUnaryD.dst);
3086         UInt dM   = dregNo(i->ARMin.VUnaryD.src);
3087         UInt insn = 0;
3088         switch (i->ARMin.VUnaryD.op) {
3089            case ARMvfpu_COPY:
3090               insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3091               break;
3092            case ARMvfpu_ABS:
3093               insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3094               break;
3095            case ARMvfpu_NEG:
3096               insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3097               break;
3098            case ARMvfpu_SQRT:
3099               insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3100               break;
3101            default:
3102               goto bad;
3103         }
3104         *p++ = insn;
3105         goto done;
3106      }
3107      case ARMin_VUnaryS: {
3108         UInt fD   = fregNo(i->ARMin.VUnaryS.dst);
3109         UInt fM   = fregNo(i->ARMin.VUnaryS.src);
3110         UInt insn = 0;
3111         switch (i->ARMin.VUnaryS.op) {
3112            case ARMvfpu_COPY:
3113               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3114                               (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3115                               (fM >> 1));
3116               break;
3117            case ARMvfpu_ABS:
3118               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3119                               (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3120                               (fM >> 1));
3121               break;
3122            case ARMvfpu_NEG:
3123               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3124                               (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3125                               (fM >> 1));
3126               break;
3127            case ARMvfpu_SQRT:
3128               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3129                               (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3130                               (fM >> 1));
3131               break;
3132            default:
3133               goto bad;
3134         }
3135         *p++ = insn;
3136         goto done;
3137      }
3138      case ARMin_VCmpD: {
3139         UInt dD   = dregNo(i->ARMin.VCmpD.argL);
3140         UInt dM   = dregNo(i->ARMin.VCmpD.argR);
3141         UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3142         *p++ = insn;       /* FCMPD dD, dM */
3143         *p++ = 0xEEF1FA10; /* FMSTAT */
3144         goto done;
3145      }
3146      case ARMin_VCMovD: {
3147         UInt cc = (UInt)i->ARMin.VCMovD.cond;
3148         UInt dD = dregNo(i->ARMin.VCMovD.dst);
3149         UInt dM = dregNo(i->ARMin.VCMovD.src);
3150         vassert(cc < 16 && cc != ARMcc_AL);
3151         UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3152         *p++ = insn;
3153         goto done;
3154      }
3155      case ARMin_VCMovS: {
3156         UInt cc = (UInt)i->ARMin.VCMovS.cond;
3157         UInt fD = fregNo(i->ARMin.VCMovS.dst);
3158         UInt fM = fregNo(i->ARMin.VCMovS.src);
3159         vassert(cc < 16 && cc != ARMcc_AL);
3160         UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3161                              X0000,(fD >> 1),X1010,
3162                              BITS4(0,1,(fM & 1),0), (fM >> 1));
3163         *p++ = insn;
3164         goto done;
3165      }
3166      case ARMin_VCvtSD: {
3167         if (i->ARMin.VCvtSD.sToD) {
3168            UInt dD = dregNo(i->ARMin.VCvtSD.dst);
3169            UInt fM = fregNo(i->ARMin.VCvtSD.src);
3170            UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3171                                 BITS4(1,1, (fM & 1), 0),
3172                                 (fM >> 1));
3173            *p++ = insn;
3174            goto done;
3175         } else {
3176            UInt fD = fregNo(i->ARMin.VCvtSD.dst);
3177            UInt dM = dregNo(i->ARMin.VCvtSD.src);
3178            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3179                                 X0111, (fD >> 1),
3180                                 X1011, X1100, dM);
3181            *p++ = insn;
3182            goto done;
3183         }
3184         goto bad;
3185      }
3186      case ARMin_VXferD: {
3187         UInt dD  = dregNo(i->ARMin.VXferD.dD);
3188         UInt rHi = iregNo(i->ARMin.VXferD.rHi);
3189         UInt rLo = iregNo(i->ARMin.VXferD.rLo);
3190         /* vmov dD, rLo, rHi is
3191            E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3192            vmov rLo, rHi, dD is
3193            E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3194         */
3195         UInt insn
3196            = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3197                       rHi, rLo, 0xB,
3198                       BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3199         *p++ = insn;
3200         goto done;
3201      }
3202      case ARMin_VXferS: {
3203         UInt fD  = fregNo(i->ARMin.VXferS.fD);
3204         UInt rLo = iregNo(i->ARMin.VXferS.rLo);
3205         /* vmov fD, rLo is
3206            E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3207            vmov rLo, fD is
3208            E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3209         */
3210         UInt insn
3211            = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3212                       (fD >> 1) & 0xF, rLo, 0xA,
3213                       BITS4((fD & 1),0,0,1), 0);
3214         *p++ = insn;
3215         goto done;
3216      }
3217      case ARMin_VCvtID: {
3218         Bool iToD = i->ARMin.VCvtID.iToD;
3219         Bool syned = i->ARMin.VCvtID.syned;
3220         if (iToD && syned) {
3221            // FSITOD: I32S-in-freg to F64-in-dreg
3222            UInt regF = fregNo(i->ARMin.VCvtID.src);
3223            UInt regD = dregNo(i->ARMin.VCvtID.dst);
3224            UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3225                                 X1011, BITS4(1,1,(regF & 1),0),
3226                                 (regF >> 1) & 0xF);
3227            *p++ = insn;
3228            goto done;
3229         }
3230         if (iToD && (!syned)) {
3231            // FUITOD: I32U-in-freg to F64-in-dreg
3232            UInt regF = fregNo(i->ARMin.VCvtID.src);
3233            UInt regD = dregNo(i->ARMin.VCvtID.dst);
3234            UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3235                                 X1011, BITS4(0,1,(regF & 1),0),
3236                                 (regF >> 1) & 0xF);
3237            *p++ = insn;
3238            goto done;
3239         }
3240         if ((!iToD) && syned) {
3241            // FTOSID: F64-in-dreg to I32S-in-freg
3242            UInt regD = dregNo(i->ARMin.VCvtID.src);
3243            UInt regF = fregNo(i->ARMin.VCvtID.dst);
3244            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3245                                 X1101, (regF >> 1) & 0xF,
3246                                 X1011, X0100, regD);
3247            *p++ = insn;
3248            goto done;
3249         }
3250         if ((!iToD) && (!syned)) {
3251            // FTOUID: F64-in-dreg to I32U-in-freg
3252            UInt regD = dregNo(i->ARMin.VCvtID.src);
3253            UInt regF = fregNo(i->ARMin.VCvtID.dst);
3254            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3255                                 X1100, (regF >> 1) & 0xF,
3256                                 X1011, X0100, regD);
3257            *p++ = insn;
3258            goto done;
3259         }
3260         /*UNREACHED*/
3261         vassert(0);
3262      }
3263      case ARMin_FPSCR: {
3264         Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3265         HReg iReg    = iregNo(i->ARMin.FPSCR.iReg);
3266         if (toFPSCR) {
3267            /* fmxr fpscr, iReg is EEE1 iReg A10 */
3268            *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3269            goto done;
3270         }
3271         goto bad; // FPSCR -> iReg case currently ATC
3272      }
3273      case ARMin_MFence: {
3274         *p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3275         *p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3276         *p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
3277         goto done;
3278      }
3279      case ARMin_NLdStQ: {
3280         UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
3281         UInt regN, regM;
3282         UInt D = regD >> 4;
3283         UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
3284         UInt insn;
3285         vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
3286         regD &= 0xF;
3287         if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
3288            regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
3289            regM = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
3290         } else {
3291            regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
3292            regM = 15;
3293         }
3294         insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3295                              regN, regD, X1010, X1000, regM);
3296         *p++ = insn;
3297         goto done;
3298      }
3299      case ARMin_NLdStD: {
3300         UInt regD = dregNo(i->ARMin.NLdStD.dD);
3301         UInt regN, regM;
3302         UInt D = regD >> 4;
3303         UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
3304         UInt insn;
3305         vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
3306         regD &= 0xF;
3307         if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
3308            regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
3309            regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
3310         } else {
3311            regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
3312            regM = 15;
3313         }
3314         insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3315                              regN, regD, X0111, X1000, regM);
3316         *p++ = insn;
3317         goto done;
3318      }
3319      case ARMin_NUnaryS: {
3320         UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
3321         UInt regD, D;
3322         UInt regM, M;
3323         UInt size = i->ARMin.NUnaryS.size;
3324         UInt insn;
3325         UInt opc, opc1, opc2;
3326         switch (i->ARMin.NUnaryS.op) {
3327	    case ARMneon_VDUP:
3328               if (i->ARMin.NUnaryS.size >= 16)
3329                  goto bad;
3330               if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
3331                  goto bad;
3332               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3333                  goto bad;
3334               regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
3335                        ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
3336                        : dregNo(i->ARMin.NUnaryS.dst->reg);
3337               regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
3338                        ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
3339                        : dregNo(i->ARMin.NUnaryS.src->reg);
3340               D = regD >> 4;
3341               M = regM >> 4;
3342               regD &= 0xf;
3343               regM &= 0xf;
3344               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
3345                               (i->ARMin.NUnaryS.size & 0xf), regD,
3346                               X1100, BITS4(0,Q,M,0), regM);
3347               *p++ = insn;
3348               goto done;
3349            case ARMneon_SETELEM:
3350               regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
3351                                dregNo(i->ARMin.NUnaryS.dst->reg);
3352               regM = iregNo(i->ARMin.NUnaryS.src->reg);
3353               M = regM >> 4;
3354               D = regD >> 4;
3355               regM &= 0xF;
3356               regD &= 0xF;
3357               if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
3358                  goto bad;
3359               switch (size) {
3360                  case 0:
3361                     if (i->ARMin.NUnaryS.dst->index > 7)
3362                        goto bad;
3363                     opc = X1000 | i->ARMin.NUnaryS.dst->index;
3364                     break;
3365                  case 1:
3366                     if (i->ARMin.NUnaryS.dst->index > 3)
3367                        goto bad;
3368                     opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
3369                     break;
3370                  case 2:
3371                     if (i->ARMin.NUnaryS.dst->index > 1)
3372                        goto bad;
3373                     opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
3374                     break;
3375                  default:
3376                     goto bad;
3377               }
3378               opc1 = (opc >> 2) & 3;
3379               opc2 = opc & 3;
3380               insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
3381                               regD, regM, X1011,
3382                               BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
3383               *p++ = insn;
3384               goto done;
3385            case ARMneon_GETELEMU:
3386               regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
3387                                dregNo(i->ARMin.NUnaryS.src->reg);
3388               regD = iregNo(i->ARMin.NUnaryS.dst->reg);
3389               M = regM >> 4;
3390               D = regD >> 4;
3391               regM &= 0xF;
3392               regD &= 0xF;
3393               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3394                  goto bad;
3395               switch (size) {
3396                  case 0:
3397                     if (Q && i->ARMin.NUnaryS.src->index > 7) {
3398                        regM++;
3399                        i->ARMin.NUnaryS.src->index -= 8;
3400                     }
3401                     if (i->ARMin.NUnaryS.src->index > 7)
3402                        goto bad;
3403                     opc = X1000 | i->ARMin.NUnaryS.src->index;
3404                     break;
3405                  case 1:
3406                     if (Q && i->ARMin.NUnaryS.src->index > 3) {
3407                        regM++;
3408                        i->ARMin.NUnaryS.src->index -= 4;
3409                     }
3410                     if (i->ARMin.NUnaryS.src->index > 3)
3411                        goto bad;
3412                     opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3413                     break;
3414                  case 2:
3415                     goto bad;
3416                  default:
3417                     goto bad;
3418               }
3419               opc1 = (opc >> 2) & 3;
3420               opc2 = opc & 3;
3421               insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
3422                               regM, regD, X1011,
3423                               BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3424               *p++ = insn;
3425               goto done;
3426            case ARMneon_GETELEMS:
3427               regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
3428                                dregNo(i->ARMin.NUnaryS.src->reg);
3429               regD = iregNo(i->ARMin.NUnaryS.dst->reg);
3430               M = regM >> 4;
3431               D = regD >> 4;
3432               regM &= 0xF;
3433               regD &= 0xF;
3434               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3435                  goto bad;
3436               switch (size) {
3437                  case 0:
3438                     if (Q && i->ARMin.NUnaryS.src->index > 7) {
3439                        regM++;
3440                        i->ARMin.NUnaryS.src->index -= 8;
3441                     }
3442                     if (i->ARMin.NUnaryS.src->index > 7)
3443                        goto bad;
3444                     opc = X1000 | i->ARMin.NUnaryS.src->index;
3445                     break;
3446                  case 1:
3447                     if (Q && i->ARMin.NUnaryS.src->index > 3) {
3448                        regM++;
3449                        i->ARMin.NUnaryS.src->index -= 4;
3450                     }
3451                     if (i->ARMin.NUnaryS.src->index > 3)
3452                        goto bad;
3453                     opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3454                     break;
3455                  case 2:
3456                     if (Q && i->ARMin.NUnaryS.src->index > 1) {
3457                        regM++;
3458                        i->ARMin.NUnaryS.src->index -= 2;
3459                     }
3460                     if (i->ARMin.NUnaryS.src->index > 1)
3461                        goto bad;
3462                     opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
3463                     break;
3464                  default:
3465                     goto bad;
3466               }
3467               opc1 = (opc >> 2) & 3;
3468               opc2 = opc & 3;
3469               insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
3470                               regM, regD, X1011,
3471                               BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3472               *p++ = insn;
3473               goto done;
3474            default:
3475               goto bad;
3476         }
3477      }
3478      case ARMin_NUnary: {
3479         UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
3480         UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
3481                       ? (qregNo(i->ARMin.NUnary.dst) << 1)
3482                       : dregNo(i->ARMin.NUnary.dst);
3483         UInt regM, M;
3484         UInt D = regD >> 4;
3485         UInt sz1 = i->ARMin.NUnary.size >> 1;
3486         UInt sz2 = i->ARMin.NUnary.size & 1;
3487         UInt sz = i->ARMin.NUnary.size;
3488         UInt insn;
3489         UInt F = 0; /* TODO: floating point EQZ ??? */
3490         if (i->ARMin.NUnary.op != ARMneon_DUP) {
3491            regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
3492                     ? (qregNo(i->ARMin.NUnary.src) << 1)
3493                     : dregNo(i->ARMin.NUnary.src);
3494            M = regM >> 4;
3495         } else {
3496            regM = iregNo(i->ARMin.NUnary.src);
3497            M = regM >> 4;
3498         }
3499         regD &= 0xF;
3500         regM &= 0xF;
3501         switch (i->ARMin.NUnary.op) {
3502            case ARMneon_COPY: /* VMOV reg, reg */
3503               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
3504                               BITS4(M,Q,M,1), regM);
3505               break;
3506            case ARMneon_COPYN: /* VMOVN regD, regQ */
3507               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3508                               regD, X0010, BITS4(0,0,M,0), regM);
3509               break;
3510            case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
3511               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3512                               regD, X0010, BITS4(1,0,M,0), regM);
3513               break;
3514            case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
3515               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3516                               regD, X0010, BITS4(0,1,M,0), regM);
3517               break;
3518            case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
3519               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3520                               regD, X0010, BITS4(1,1,M,0), regM);
3521               break;
3522            case ARMneon_COPYLS: /* VMOVL regQ, regD */
3523               if (sz >= 3)
3524                  goto bad;
3525               insn = XXXXXXXX(0xF, X0010,
3526                               BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
3527                               BITS4((sz == 0) ? 1 : 0,0,0,0),
3528                               regD, X1010, BITS4(0,0,M,1), regM);
3529               break;
3530            case ARMneon_COPYLU: /* VMOVL regQ, regD */
3531               if (sz >= 3)
3532                  goto bad;
3533               insn = XXXXXXXX(0xF, X0011,
3534                               BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
3535                               BITS4((sz == 0) ? 1 : 0,0,0,0),
3536                               regD, X1010, BITS4(0,0,M,1), regM);
3537               break;
3538            case ARMneon_NOT: /* VMVN reg, reg*/
3539               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
3540                               BITS4(1,Q,M,0), regM);
3541               break;
3542            case ARMneon_EQZ:
3543               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
3544                               regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
3545               break;
3546            case ARMneon_CNT:
3547               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
3548                               BITS4(0,Q,M,0), regM);
3549               break;
3550            case ARMneon_CLZ:
3551               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3552                               regD, X0100, BITS4(1,Q,M,0), regM);
3553               break;
3554            case ARMneon_CLS:
3555               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3556                               regD, X0100, BITS4(0,Q,M,0), regM);
3557               break;
3558            case ARMneon_ABS:
3559               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
3560                               regD, X0011, BITS4(0,Q,M,0), regM);
3561               break;
3562            case ARMneon_DUP:
3563               sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
3564               sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
3565               vassert(sz1 + sz2 < 2);
3566               insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
3567                               X1011, BITS4(D,0,sz2,1), X0000);
3568               break;
3569            case ARMneon_REV16:
3570               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3571                               regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
3572               break;
3573            case ARMneon_REV32:
3574               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3575                               regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
3576               break;
3577            case ARMneon_REV64:
3578               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3579                               regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
3580               break;
3581            case ARMneon_PADDLU:
3582               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3583                               regD, X0010, BITS4(1,Q,M,0), regM);
3584               break;
3585            case ARMneon_PADDLS:
3586               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3587                               regD, X0010, BITS4(0,Q,M,0), regM);
3588               break;
3589            case ARMneon_VQSHLNUU:
3590               insn = XXXXXXXX(0xF, X0011,
3591                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
3592                               sz & 0xf, regD, X0111,
3593                               BITS4(sz >> 6,Q,M,1), regM);
3594               break;
3595            case ARMneon_VQSHLNSS:
3596               insn = XXXXXXXX(0xF, X0010,
3597                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
3598                               sz & 0xf, regD, X0111,
3599                               BITS4(sz >> 6,Q,M,1), regM);
3600               break;
3601            case ARMneon_VQSHLNUS:
3602               insn = XXXXXXXX(0xF, X0011,
3603                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
3604                               sz & 0xf, regD, X0110,
3605                               BITS4(sz >> 6,Q,M,1), regM);
3606               break;
3607            case ARMneon_VCVTFtoS:
3608               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
3609                               BITS4(0,Q,M,0), regM);
3610               break;
3611            case ARMneon_VCVTFtoU:
3612               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
3613                               BITS4(1,Q,M,0), regM);
3614               break;
3615            case ARMneon_VCVTStoF:
3616               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
3617                               BITS4(0,Q,M,0), regM);
3618               break;
3619            case ARMneon_VCVTUtoF:
3620               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
3621                               BITS4(1,Q,M,0), regM);
3622               break;
3623            case ARMneon_VCVTFtoFixedU:
3624               sz1 = (sz >> 5) & 1;
3625               sz2 = (sz >> 4) & 1;
3626               sz &= 0xf;
3627               insn = XXXXXXXX(0xF, X0011,
3628                               BITS4(1,D,sz1,sz2), sz, regD, X1111,
3629                               BITS4(0,Q,M,1), regM);
3630               break;
3631            case ARMneon_VCVTFtoFixedS:
3632               sz1 = (sz >> 5) & 1;
3633               sz2 = (sz >> 4) & 1;
3634               sz &= 0xf;
3635               insn = XXXXXXXX(0xF, X0010,
3636                               BITS4(1,D,sz1,sz2), sz, regD, X1111,
3637                               BITS4(0,Q,M,1), regM);
3638               break;
3639            case ARMneon_VCVTFixedUtoF:
3640               sz1 = (sz >> 5) & 1;
3641               sz2 = (sz >> 4) & 1;
3642               sz &= 0xf;
3643               insn = XXXXXXXX(0xF, X0011,
3644                               BITS4(1,D,sz1,sz2), sz, regD, X1110,
3645                               BITS4(0,Q,M,1), regM);
3646               break;
3647            case ARMneon_VCVTFixedStoF:
3648               sz1 = (sz >> 5) & 1;
3649               sz2 = (sz >> 4) & 1;
3650               sz &= 0xf;
3651               insn = XXXXXXXX(0xF, X0010,
3652                               BITS4(1,D,sz1,sz2), sz, regD, X1110,
3653                               BITS4(0,Q,M,1), regM);
3654               break;
3655            case ARMneon_VCVTF32toF16:
3656               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
3657                               BITS4(0,0,M,0), regM);
3658               break;
3659            case ARMneon_VCVTF16toF32:
3660               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
3661                               BITS4(0,0,M,0), regM);
3662               break;
3663            case ARMneon_VRECIP:
3664               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
3665                               BITS4(0,Q,M,0), regM);
3666               break;
3667            case ARMneon_VRECIPF:
3668               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
3669                               BITS4(0,Q,M,0), regM);
3670               break;
3671            case ARMneon_VABSFP:
3672               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
3673                               BITS4(0,Q,M,0), regM);
3674               break;
3675            case ARMneon_VRSQRTEFP:
3676               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
3677                               BITS4(1,Q,M,0), regM);
3678               break;
3679            case ARMneon_VRSQRTE:
3680               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
3681                               BITS4(1,Q,M,0), regM);
3682               break;
3683            case ARMneon_VNEGF:
3684               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
3685                               BITS4(1,Q,M,0), regM);
3686               break;
3687
3688            default:
3689               goto bad;
3690         }
3691         *p++ = insn;
3692         goto done;
3693      }
3694      case ARMin_NDual: {
3695         UInt Q = i->ARMin.NDual.Q ? 1 : 0;
3696         UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
3697                       ? (qregNo(i->ARMin.NDual.arg1) << 1)
3698                       : dregNo(i->ARMin.NDual.arg1);
3699         UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
3700                       ? (qregNo(i->ARMin.NDual.arg2) << 1)
3701                       : dregNo(i->ARMin.NDual.arg2);
3702         UInt D = regD >> 4;
3703         UInt M = regM >> 4;
3704         UInt sz1 = i->ARMin.NDual.size >> 1;
3705         UInt sz2 = i->ARMin.NDual.size & 1;
3706         UInt insn;
3707         regD &= 0xF;
3708         regM &= 0xF;
3709         switch (i->ARMin.NDual.op) {
3710            case ARMneon_TRN: /* VTRN reg, reg */
3711               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3712                               regD, X0000, BITS4(1,Q,M,0), regM);
3713               break;
3714            case ARMneon_ZIP: /* VZIP reg, reg */
3715               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3716                               regD, X0001, BITS4(1,Q,M,0), regM);
3717               break;
3718            case ARMneon_UZP: /* VUZP reg, reg */
3719               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3720                               regD, X0001, BITS4(0,Q,M,0), regM);
3721               break;
3722            default:
3723               goto bad;
3724         }
3725         *p++ = insn;
3726         goto done;
3727      }
3728      case ARMin_NBinary: {
3729         UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
3730         UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
3731                       ? (qregNo(i->ARMin.NBinary.dst) << 1)
3732                       : dregNo(i->ARMin.NBinary.dst);
3733         UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
3734                       ? (qregNo(i->ARMin.NBinary.argL) << 1)
3735                       : dregNo(i->ARMin.NBinary.argL);
3736         UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
3737                       ? (qregNo(i->ARMin.NBinary.argR) << 1)
3738                       : dregNo(i->ARMin.NBinary.argR);
3739         UInt sz1 = i->ARMin.NBinary.size >> 1;
3740         UInt sz2 = i->ARMin.NBinary.size & 1;
3741         UInt D = regD >> 4;
3742         UInt N = regN >> 4;
3743         UInt M = regM >> 4;
3744         UInt insn;
3745         regD &= 0xF;
3746         regM &= 0xF;
3747         regN &= 0xF;
3748         switch (i->ARMin.NBinary.op) {
3749            case ARMneon_VAND: /* VAND reg, reg, reg */
3750               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
3751                               BITS4(N,Q,M,1), regM);
3752               break;
3753            case ARMneon_VORR: /* VORR reg, reg, reg*/
3754               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
3755                               BITS4(N,Q,M,1), regM);
3756               break;
3757            case ARMneon_VXOR: /* VEOR reg, reg, reg */
3758               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
3759                               BITS4(N,Q,M,1), regM);
3760               break;
3761            case ARMneon_VADD: /* VADD reg, reg, reg */
3762               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3763                               X1000, BITS4(N,Q,M,0), regM);
3764               break;
3765            case ARMneon_VSUB: /* VSUB reg, reg, reg */
3766               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3767                               X1000, BITS4(N,Q,M,0), regM);
3768               break;
3769            case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
3770               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3771                               X0110, BITS4(N,Q,M,1), regM);
3772               break;
3773            case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
3774               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3775                               X0110, BITS4(N,Q,M,1), regM);
3776               break;
3777            case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
3778               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3779                               X0110, BITS4(N,Q,M,0), regM);
3780               break;
3781            case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
3782               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3783                               X0110, BITS4(N,Q,M,0), regM);
3784               break;
3785            case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
3786               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3787                               X0001, BITS4(N,Q,M,0), regM);
3788               break;
3789            case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
3790               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3791                               X0001, BITS4(N,Q,M,0), regM);
3792               break;
3793            case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
3794               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3795                               X0000, BITS4(N,Q,M,1), regM);
3796               break;
3797            case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
3798               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3799                               X0000, BITS4(N,Q,M,1), regM);
3800               break;
3801            case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
3802               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3803                               X0010, BITS4(N,Q,M,1), regM);
3804               break;
3805            case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
3806               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3807                               X0010, BITS4(N,Q,M,1), regM);
3808               break;
3809            case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
3810               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3811                               X0011, BITS4(N,Q,M,0), regM);
3812               break;
3813            case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
3814               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3815                               X0011, BITS4(N,Q,M,0), regM);
3816               break;
3817            case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
3818               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3819                               X0011, BITS4(N,Q,M,1), regM);
3820               break;
3821            case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
3822               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3823                               X0011, BITS4(N,Q,M,1), regM);
3824               break;
3825            case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
3826               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3827                               X1000, BITS4(N,Q,M,1), regM);
3828               break;
3829            case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
3830               if (i->ARMin.NBinary.size >= 16)
3831                  goto bad;
3832               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
3833                               i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
3834                               regM);
3835               break;
3836            case ARMneon_VMUL:
3837               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3838                               X1001, BITS4(N,Q,M,1), regM);
3839               break;
3840            case ARMneon_VMULLU:
3841               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
3842                               X1100, BITS4(N,0,M,0), regM);
3843               break;
3844            case ARMneon_VMULLS:
3845               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
3846                               X1100, BITS4(N,0,M,0), regM);
3847               break;
3848            case ARMneon_VMULP:
3849               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3850                               X1001, BITS4(N,Q,M,1), regM);
3851               break;
3852            case ARMneon_VMULFP:
3853               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
3854                               X1101, BITS4(N,Q,M,1), regM);
3855               break;
3856            case ARMneon_VMULLP:
3857               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
3858                               X1110, BITS4(N,0,M,0), regM);
3859               break;
3860            case ARMneon_VQDMULH:
3861               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3862                               X1011, BITS4(N,Q,M,0), regM);
3863               break;
3864            case ARMneon_VQRDMULH:
3865               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3866                               X1011, BITS4(N,Q,M,0), regM);
3867               break;
3868            case ARMneon_VQDMULL:
3869               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
3870                               X1101, BITS4(N,0,M,0), regM);
3871               break;
3872            case ARMneon_VTBL:
3873               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
3874                               X1000, BITS4(N,0,M,0), regM);
3875               break;
3876            case ARMneon_VPADD:
3877               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3878                               X1011, BITS4(N,Q,M,1), regM);
3879               break;
3880            case ARMneon_VPADDFP:
3881               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
3882                               X1101, BITS4(N,Q,M,0), regM);
3883               break;
3884            case ARMneon_VPMINU:
3885               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3886                               X1010, BITS4(N,Q,M,1), regM);
3887               break;
3888            case ARMneon_VPMINS:
3889               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3890                               X1010, BITS4(N,Q,M,1), regM);
3891               break;
3892            case ARMneon_VPMAXU:
3893               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3894                               X1010, BITS4(N,Q,M,0), regM);
3895               break;
3896            case ARMneon_VPMAXS:
3897               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3898                               X1010, BITS4(N,Q,M,0), regM);
3899               break;
3900            case ARMneon_VADDFP: /* VADD reg, reg, reg */
3901               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
3902                               X1101, BITS4(N,Q,M,0), regM);
3903               break;
3904            case ARMneon_VSUBFP: /* VADD reg, reg, reg */
3905               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
3906                               X1101, BITS4(N,Q,M,0), regM);
3907               break;
3908            case ARMneon_VABDFP: /* VABD reg, reg, reg */
3909               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
3910                               X1101, BITS4(N,Q,M,0), regM);
3911               break;
3912            case ARMneon_VMINF:
3913               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
3914                               X1111, BITS4(N,Q,M,0), regM);
3915               break;
3916            case ARMneon_VMAXF:
3917               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
3918                               X1111, BITS4(N,Q,M,0), regM);
3919               break;
3920            case ARMneon_VPMINF:
3921               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
3922                               X1111, BITS4(N,Q,M,0), regM);
3923               break;
3924            case ARMneon_VPMAXF:
3925               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
3926                               X1111, BITS4(N,Q,M,0), regM);
3927               break;
3928            case ARMneon_VRECPS:
3929               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
3930                               BITS4(N,Q,M,1), regM);
3931               break;
3932            case ARMneon_VCGTF:
3933               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
3934                               BITS4(N,Q,M,0), regM);
3935               break;
3936            case ARMneon_VCGEF:
3937               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
3938                               BITS4(N,Q,M,0), regM);
3939               break;
3940            case ARMneon_VCEQF:
3941               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
3942                               BITS4(N,Q,M,0), regM);
3943               break;
3944            case ARMneon_VRSQRTS:
3945               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
3946                               BITS4(N,Q,M,1), regM);
3947               break;
3948            default:
3949               goto bad;
3950         }
3951         *p++ = insn;
3952         goto done;
3953      }
3954      case ARMin_NShift: {
3955         UInt Q = i->ARMin.NShift.Q ? 1 : 0;
3956         UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
3957                       ? (qregNo(i->ARMin.NShift.dst) << 1)
3958                       : dregNo(i->ARMin.NShift.dst);
3959         UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
3960                       ? (qregNo(i->ARMin.NShift.argL) << 1)
3961                       : dregNo(i->ARMin.NShift.argL);
3962         UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
3963                       ? (qregNo(i->ARMin.NShift.argR) << 1)
3964                       : dregNo(i->ARMin.NShift.argR);
3965         UInt sz1 = i->ARMin.NShift.size >> 1;
3966         UInt sz2 = i->ARMin.NShift.size & 1;
3967         UInt D = regD >> 4;
3968         UInt N = regN >> 4;
3969         UInt M = regM >> 4;
3970         UInt insn;
3971         regD &= 0xF;
3972         regM &= 0xF;
3973         regN &= 0xF;
3974         switch (i->ARMin.NShift.op) {
3975            case ARMneon_VSHL:
3976               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3977                               X0100, BITS4(N,Q,M,0), regM);
3978               break;
3979            case ARMneon_VSAL:
3980               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3981                               X0100, BITS4(N,Q,M,0), regM);
3982               break;
3983            case ARMneon_VQSHL:
3984               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
3985                               X0100, BITS4(N,Q,M,1), regM);
3986               break;
3987            case ARMneon_VQSAL:
3988               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
3989                               X0100, BITS4(N,Q,M,1), regM);
3990               break;
3991            default:
3992               goto bad;
3993         }
3994         *p++ = insn;
3995         goto done;
3996      }
3997      case ARMin_NeonImm: {
3998         UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
3999         UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
4000                          dregNo(i->ARMin.NeonImm.dst);
4001         UInt D = regD >> 4;
4002         UInt imm = i->ARMin.NeonImm.imm->imm8;
4003         UInt tp = i->ARMin.NeonImm.imm->type;
4004         UInt j = imm >> 7;
4005         UInt imm3 = (imm >> 4) & 0x7;
4006         UInt imm4 = imm & 0xF;
4007         UInt cmode, op;
4008         UInt insn;
4009         regD &= 0xF;
4010         if (tp == 9)
4011            op = 1;
4012         else
4013            op = 0;
4014         switch (tp) {
4015            case 0:
4016            case 1:
4017            case 2:
4018            case 3:
4019            case 4:
4020            case 5:
4021               cmode = tp << 1;
4022               break;
4023            case 9:
4024            case 6:
4025               cmode = 14;
4026               break;
4027            case 7:
4028               cmode = 12;
4029               break;
4030            case 8:
4031               cmode = 13;
4032               break;
4033            case 10:
4034               cmode = 15;
4035               break;
4036            default:
4037               vpanic("ARMin_NeonImm");
4038
4039         }
4040         insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4041                         cmode, BITS4(0,Q,op,1), imm4);
4042         *p++ = insn;
4043         goto done;
4044      }
4045      case ARMin_NCMovQ: {
4046         UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4047         UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
4048         UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
4049         UInt vM = qM & 0xF;
4050         UInt vD = qD & 0xF;
4051         UInt M  = (qM >> 4) & 1;
4052         UInt D  = (qD >> 4) & 1;
4053         vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4054         /* b!cc here+8: !cc A00 0000 */
4055         UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4056         *p++ = insn;
4057         /* vmov qD, qM */
4058         insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4059                         vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4060         *p++ = insn;
4061         goto done;
4062      }
4063      case ARMin_Add32: {
4064         UInt regD = iregNo(i->ARMin.Add32.rD);
4065         UInt regN = iregNo(i->ARMin.Add32.rN);
4066         UInt imm32 = i->ARMin.Add32.imm32;
4067         vassert(regD != regN);
4068         /* MOV regD, imm32 */
4069         p = imm32_to_iregNo((UInt *)p, regD, imm32);
4070         /* ADD regD, regN, regD */
4071         UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4072         *p++ = insn;
4073         goto done;
4074      }
4075      /* ... */
4076      default:
4077         goto bad;
4078    }
4079
4080  bad:
4081   ppARMInstr(i);
4082   vpanic("emit_ARMInstr");
4083   /*NOTREACHED*/
4084
4085  done:
4086   vassert(((UChar*)p) - &buf[0] <= 32);
4087   return ((UChar*)p) - &buf[0];
4088}
4089
4090#undef BITS4
4091#undef X0000
4092#undef X0001
4093#undef X0010
4094#undef X0011
4095#undef X0100
4096#undef X0101
4097#undef X0110
4098#undef X0111
4099#undef X1000
4100#undef X1001
4101#undef X1010
4102#undef X1011
4103#undef X1100
4104#undef X1101
4105#undef X1110
4106#undef X1111
4107#undef XXXXX___
4108#undef XXXXXX__
4109#undef XXX___XX
4110#undef XXXXX__X
4111#undef XXXXXXXX
4112
4113/*---------------------------------------------------------------*/
4114/*--- end                                     host_arm_defs.c ---*/
4115/*---------------------------------------------------------------*/
4116