1
2/*---------------------------------------------------------------*/
3/*--- begin                                   host_arm_defs.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2012 OpenWorks LLP
11      info@open-works.net
12
13   NEON support is
14   Copyright (C) 2010-2012 Samsung Electronics
15   contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16              and Kirill Batuzov <batuzovk@ispras.ru>
17
18   This program is free software; you can redistribute it and/or
19   modify it under the terms of the GNU General Public License as
20   published by the Free Software Foundation; either version 2 of the
21   License, or (at your option) any later version.
22
23   This program is distributed in the hope that it will be useful, but
24   WITHOUT ANY WARRANTY; without even the implied warranty of
25   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
26   General Public License for more details.
27
28   You should have received a copy of the GNU General Public License
29   along with this program; if not, write to the Free Software
30   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31   02110-1301, USA.
32
33   The GNU General Public License is contained in the file COPYING.
34*/
35
36#include "libvex_basictypes.h"
37#include "libvex.h"
38#include "libvex_trc_values.h"
39
40#include "main_util.h"
41#include "host_generic_regs.h"
42#include "host_arm_defs.h"
43
44UInt arm_hwcaps = 0;
45
46
47/* --------- Registers. --------- */
48
49/* The usual HReg abstraction.
50   There are 16 general purpose regs.
51*/
52
53void ppHRegARM ( HReg reg )  {
54   Int r;
55   /* Be generic for all virtual regs. */
56   if (hregIsVirtual(reg)) {
57      ppHReg(reg);
58      return;
59   }
60   /* But specific for real regs. */
61   switch (hregClass(reg)) {
62      case HRcInt32:
63         r = hregNumber(reg);
64         vassert(r >= 0 && r < 16);
65         vex_printf("r%d", r);
66         return;
67      case HRcFlt64:
68         r = hregNumber(reg);
69         vassert(r >= 0 && r < 32);
70         vex_printf("d%d", r);
71         return;
72      case HRcFlt32:
73         r = hregNumber(reg);
74         vassert(r >= 0 && r < 32);
75         vex_printf("s%d", r);
76         return;
77      case HRcVec128:
78         r = hregNumber(reg);
79         vassert(r >= 0 && r < 16);
80         vex_printf("q%d", r);
81         return;
82      default:
83         vpanic("ppHRegARM");
84   }
85}
86
87HReg hregARM_R0  ( void ) { return mkHReg(0,  HRcInt32, False); }
88HReg hregARM_R1  ( void ) { return mkHReg(1,  HRcInt32, False); }
89HReg hregARM_R2  ( void ) { return mkHReg(2,  HRcInt32, False); }
90HReg hregARM_R3  ( void ) { return mkHReg(3,  HRcInt32, False); }
91HReg hregARM_R4  ( void ) { return mkHReg(4,  HRcInt32, False); }
92HReg hregARM_R5  ( void ) { return mkHReg(5,  HRcInt32, False); }
93HReg hregARM_R6  ( void ) { return mkHReg(6,  HRcInt32, False); }
94HReg hregARM_R7  ( void ) { return mkHReg(7,  HRcInt32, False); }
95HReg hregARM_R8  ( void ) { return mkHReg(8,  HRcInt32, False); }
96HReg hregARM_R9  ( void ) { return mkHReg(9,  HRcInt32, False); }
97HReg hregARM_R10 ( void ) { return mkHReg(10, HRcInt32, False); }
98HReg hregARM_R11 ( void ) { return mkHReg(11, HRcInt32, False); }
99HReg hregARM_R12 ( void ) { return mkHReg(12, HRcInt32, False); }
100HReg hregARM_R13 ( void ) { return mkHReg(13, HRcInt32, False); }
101HReg hregARM_R14 ( void ) { return mkHReg(14, HRcInt32, False); }
102HReg hregARM_R15 ( void ) { return mkHReg(15, HRcInt32, False); }
103HReg hregARM_D8  ( void ) { return mkHReg(8,  HRcFlt64, False); }
104HReg hregARM_D9  ( void ) { return mkHReg(9,  HRcFlt64, False); }
105HReg hregARM_D10 ( void ) { return mkHReg(10, HRcFlt64, False); }
106HReg hregARM_D11 ( void ) { return mkHReg(11, HRcFlt64, False); }
107HReg hregARM_D12 ( void ) { return mkHReg(12, HRcFlt64, False); }
108HReg hregARM_S26 ( void ) { return mkHReg(26, HRcFlt32, False); }
109HReg hregARM_S27 ( void ) { return mkHReg(27, HRcFlt32, False); }
110HReg hregARM_S28 ( void ) { return mkHReg(28, HRcFlt32, False); }
111HReg hregARM_S29 ( void ) { return mkHReg(29, HRcFlt32, False); }
112HReg hregARM_S30 ( void ) { return mkHReg(30, HRcFlt32, False); }
113HReg hregARM_Q8  ( void ) { return mkHReg(8,  HRcVec128, False); }
114HReg hregARM_Q9  ( void ) { return mkHReg(9,  HRcVec128, False); }
115HReg hregARM_Q10 ( void ) { return mkHReg(10, HRcVec128, False); }
116HReg hregARM_Q11 ( void ) { return mkHReg(11, HRcVec128, False); }
117HReg hregARM_Q12 ( void ) { return mkHReg(12, HRcVec128, False); }
118HReg hregARM_Q13 ( void ) { return mkHReg(13, HRcVec128, False); }
119HReg hregARM_Q14 ( void ) { return mkHReg(14, HRcVec128, False); }
120HReg hregARM_Q15 ( void ) { return mkHReg(15, HRcVec128, False); }
121
122void getAllocableRegs_ARM ( Int* nregs, HReg** arr )
123{
124   Int i = 0;
125   *nregs = 26;
126   *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
127   // callee saves ones are listed first, since we prefer them
128   // if they're available
129   (*arr)[i++] = hregARM_R4();
130   (*arr)[i++] = hregARM_R5();
131   (*arr)[i++] = hregARM_R6();
132   (*arr)[i++] = hregARM_R7();
133   (*arr)[i++] = hregARM_R10();
134   (*arr)[i++] = hregARM_R11();
135   // otherwise we'll have to slum it out with caller-saves ones
136   (*arr)[i++] = hregARM_R0();
137   (*arr)[i++] = hregARM_R1();
138   (*arr)[i++] = hregARM_R2();
139   (*arr)[i++] = hregARM_R3();
140   (*arr)[i++] = hregARM_R9();
141   // FP hreegisters.  Note: these are all callee-save.  Yay!
142   // Hence we don't need to mention them as trashed in
143   // getHRegUsage for ARMInstr_Call.
144   (*arr)[i++] = hregARM_D8();
145   (*arr)[i++] = hregARM_D9();
146   (*arr)[i++] = hregARM_D10();
147   (*arr)[i++] = hregARM_D11();
148   (*arr)[i++] = hregARM_D12();
149   (*arr)[i++] = hregARM_S26();
150   (*arr)[i++] = hregARM_S27();
151   (*arr)[i++] = hregARM_S28();
152   (*arr)[i++] = hregARM_S29();
153   (*arr)[i++] = hregARM_S30();
154
155   (*arr)[i++] = hregARM_Q8();
156   (*arr)[i++] = hregARM_Q9();
157   (*arr)[i++] = hregARM_Q10();
158   (*arr)[i++] = hregARM_Q11();
159   (*arr)[i++] = hregARM_Q12();
160
161   //(*arr)[i++] = hregARM_Q13();
162   //(*arr)[i++] = hregARM_Q14();
163   //(*arr)[i++] = hregARM_Q15();
164
165   // unavail: r8 as GSP
166   // r12 is used as a spill/reload temporary
167   // r13 as SP
168   // r14 as LR
169   // r15 as PC
170   //
171   // All in all, we have 11 allocatable integer registers:
172   // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
173   // and r12 dedicated as a spill temporary.
174   // 13 14 and 15 are not under the allocator's control.
175   //
176   // Hence for the allocatable registers we have:
177   //
178   // callee-saved: 4 5 6 7 (8) 9 10 11
179   // caller-saved: 0 1 2 3
180   // Note 9 is ambiguous: the base EABI does not give an e/r-saved
181   // designation for it, but the Linux instantiation of the ABI
182   // specifies it as callee-saved.
183   //
184   // If the set of available registers changes or if the e/r status
185   // changes, be sure to re-check/sync the definition of
186   // getHRegUsage for ARMInstr_Call too.
187   vassert(i == *nregs);
188}
189
190
191
192/* --------- Condition codes, ARM encoding. --------- */
193
194HChar* showARMCondCode ( ARMCondCode cond ) {
195   switch (cond) {
196       case ARMcc_EQ:  return "eq";
197       case ARMcc_NE:  return "ne";
198       case ARMcc_HS:  return "hs";
199       case ARMcc_LO:  return "lo";
200       case ARMcc_MI:  return "mi";
201       case ARMcc_PL:  return "pl";
202       case ARMcc_VS:  return "vs";
203       case ARMcc_VC:  return "vc";
204       case ARMcc_HI:  return "hi";
205       case ARMcc_LS:  return "ls";
206       case ARMcc_GE:  return "ge";
207       case ARMcc_LT:  return "lt";
208       case ARMcc_GT:  return "gt";
209       case ARMcc_LE:  return "le";
210       case ARMcc_AL:  return "al"; // default
211       case ARMcc_NV:  return "nv";
212       default: vpanic("showARMCondCode");
213   }
214}
215
216
217/* --------- Mem AModes: Addressing Mode 1 --------- */
218
219ARMAMode1* ARMAMode1_RI  ( HReg reg, Int simm13 ) {
220   ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
221   am->tag              = ARMam1_RI;
222   am->ARMam1.RI.reg    = reg;
223   am->ARMam1.RI.simm13 = simm13;
224   vassert(-4095 <= simm13 && simm13 <= 4095);
225   return am;
226}
227ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
228   ARMAMode1* am        = LibVEX_Alloc(sizeof(ARMAMode1));
229   am->tag              = ARMam1_RRS;
230   am->ARMam1.RRS.base  = base;
231   am->ARMam1.RRS.index = index;
232   am->ARMam1.RRS.shift = shift;
233   vassert(0 <= shift && shift <= 3);
234   return am;
235}
236
237void ppARMAMode1 ( ARMAMode1* am ) {
238   switch (am->tag) {
239      case ARMam1_RI:
240         vex_printf("%d(", am->ARMam1.RI.simm13);
241         ppHRegARM(am->ARMam1.RI.reg);
242         vex_printf(")");
243         break;
244      case ARMam1_RRS:
245         vex_printf("(");
246         ppHRegARM(am->ARMam1.RRS.base);
247         vex_printf(",");
248         ppHRegARM(am->ARMam1.RRS.index);
249         vex_printf(",%u)", am->ARMam1.RRS.shift);
250         break;
251      default:
252         vassert(0);
253   }
254}
255
256static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
257   switch (am->tag) {
258      case ARMam1_RI:
259         addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
260         return;
261      case ARMam1_RRS:
262         //    addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
263         //    addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
264         //   return;
265      default:
266         vpanic("addRegUsage_ARMAmode1");
267   }
268}
269
270static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
271   switch (am->tag) {
272      case ARMam1_RI:
273         am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
274         return;
275      case ARMam1_RRS:
276         //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
277         //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
278         //return;
279      default:
280         vpanic("mapRegs_ARMAmode1");
281   }
282}
283
284
285/* --------- Mem AModes: Addressing Mode 2 --------- */
286
287ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
288   ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
289   am->tag             = ARMam2_RI;
290   am->ARMam2.RI.reg   = reg;
291   am->ARMam2.RI.simm9 = simm9;
292   vassert(-255 <= simm9 && simm9 <= 255);
293   return am;
294}
295ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
296   ARMAMode2* am       = LibVEX_Alloc(sizeof(ARMAMode2));
297   am->tag             = ARMam2_RR;
298   am->ARMam2.RR.base  = base;
299   am->ARMam2.RR.index = index;
300   return am;
301}
302
303void ppARMAMode2 ( ARMAMode2* am ) {
304   switch (am->tag) {
305      case ARMam2_RI:
306         vex_printf("%d(", am->ARMam2.RI.simm9);
307         ppHRegARM(am->ARMam2.RI.reg);
308         vex_printf(")");
309         break;
310      case ARMam2_RR:
311         vex_printf("(");
312         ppHRegARM(am->ARMam2.RR.base);
313         vex_printf(",");
314         ppHRegARM(am->ARMam2.RR.index);
315         vex_printf(")");
316         break;
317      default:
318         vassert(0);
319   }
320}
321
322static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
323   switch (am->tag) {
324      case ARMam2_RI:
325         addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
326         return;
327      case ARMam2_RR:
328         //    addHRegUse(u, HRmRead, am->ARMam2.RR.base);
329         //    addHRegUse(u, HRmRead, am->ARMam2.RR.index);
330         //   return;
331      default:
332         vpanic("addRegUsage_ARMAmode2");
333   }
334}
335
336static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
337   switch (am->tag) {
338      case ARMam2_RI:
339         am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
340         return;
341      case ARMam2_RR:
342         //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
343         //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
344         //return;
345      default:
346         vpanic("mapRegs_ARMAmode2");
347   }
348}
349
350
351/* --------- Mem AModes: Addressing Mode VFP --------- */
352
353ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
354   ARMAModeV* am = LibVEX_Alloc(sizeof(ARMAModeV));
355   vassert(simm11 >= -1020 && simm11 <= 1020);
356   vassert(0 == (simm11 & 3));
357   am->reg    = reg;
358   am->simm11 = simm11;
359   return am;
360}
361
362void ppARMAModeV ( ARMAModeV* am ) {
363   vex_printf("%d(", am->simm11);
364   ppHRegARM(am->reg);
365   vex_printf(")");
366}
367
368static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
369   addHRegUse(u, HRmRead, am->reg);
370}
371
372static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
373   am->reg = lookupHRegRemap(m, am->reg);
374}
375
376
377/* --------- Mem AModes: Addressing Mode Neon ------- */
378
379ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
380   ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
381   am->tag = ARMamN_RR;
382   am->ARMamN.RR.rN = rN;
383   am->ARMamN.RR.rM = rM;
384   return am;
385}
386
387ARMAModeN *mkARMAModeN_R ( HReg rN ) {
388   ARMAModeN* am = LibVEX_Alloc(sizeof(ARMAModeN));
389   am->tag = ARMamN_R;
390   am->ARMamN.R.rN = rN;
391   return am;
392}
393
394static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
395   if (am->tag == ARMamN_R) {
396      addHRegUse(u, HRmRead, am->ARMamN.R.rN);
397   } else {
398      addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
399      addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
400   }
401}
402
403static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
404   if (am->tag == ARMamN_R) {
405      am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
406   } else {
407      am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
408      am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
409   }
410}
411
412void ppARMAModeN ( ARMAModeN* am ) {
413   vex_printf("[");
414   if (am->tag == ARMamN_R) {
415      ppHRegARM(am->ARMamN.R.rN);
416   } else {
417      ppHRegARM(am->ARMamN.RR.rN);
418   }
419   vex_printf("]");
420   if (am->tag == ARMamN_RR) {
421      vex_printf(", ");
422      ppHRegARM(am->ARMamN.RR.rM);
423   }
424}
425
426
427/* --------- Reg or imm-8x4 operands --------- */
428
429static UInt ROR32 ( UInt x, UInt sh ) {
430   vassert(sh >= 0 && sh < 32);
431   if (sh == 0)
432      return x;
433   else
434      return (x << (32-sh)) | (x >> sh);
435}
436
437ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
438   ARMRI84* ri84          = LibVEX_Alloc(sizeof(ARMRI84));
439   ri84->tag              = ARMri84_I84;
440   ri84->ARMri84.I84.imm8 = imm8;
441   ri84->ARMri84.I84.imm4 = imm4;
442   vassert(imm8 >= 0 && imm8 <= 255);
443   vassert(imm4 >= 0 && imm4 <= 15);
444   return ri84;
445}
446ARMRI84* ARMRI84_R ( HReg reg ) {
447   ARMRI84* ri84       = LibVEX_Alloc(sizeof(ARMRI84));
448   ri84->tag           = ARMri84_R;
449   ri84->ARMri84.R.reg = reg;
450   return ri84;
451}
452
453void ppARMRI84 ( ARMRI84* ri84 ) {
454   switch (ri84->tag) {
455      case ARMri84_I84:
456         vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
457                                  2 * ri84->ARMri84.I84.imm4));
458         break;
459      case ARMri84_R:
460         ppHRegARM(ri84->ARMri84.R.reg);
461         break;
462      default:
463         vassert(0);
464   }
465}
466
467static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
468   switch (ri84->tag) {
469      case ARMri84_I84:
470         return;
471      case ARMri84_R:
472         addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
473         return;
474      default:
475         vpanic("addRegUsage_ARMRI84");
476   }
477}
478
479static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
480   switch (ri84->tag) {
481      case ARMri84_I84:
482         return;
483      case ARMri84_R:
484         ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
485         return;
486      default:
487         vpanic("mapRegs_ARMRI84");
488   }
489}
490
491
492/* --------- Reg or imm5 operands --------- */
493
494ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
495   ARMRI5* ri5         = LibVEX_Alloc(sizeof(ARMRI5));
496   ri5->tag            = ARMri5_I5;
497   ri5->ARMri5.I5.imm5 = imm5;
498   vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
499   return ri5;
500}
501ARMRI5* ARMRI5_R ( HReg reg ) {
502   ARMRI5* ri5       = LibVEX_Alloc(sizeof(ARMRI5));
503   ri5->tag          = ARMri5_R;
504   ri5->ARMri5.R.reg = reg;
505   return ri5;
506}
507
508void ppARMRI5 ( ARMRI5* ri5 ) {
509   switch (ri5->tag) {
510      case ARMri5_I5:
511         vex_printf("%u", ri5->ARMri5.I5.imm5);
512         break;
513      case ARMri5_R:
514         ppHRegARM(ri5->ARMri5.R.reg);
515         break;
516      default:
517         vassert(0);
518   }
519}
520
521static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
522   switch (ri5->tag) {
523      case ARMri5_I5:
524         return;
525      case ARMri5_R:
526         addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
527         return;
528      default:
529         vpanic("addRegUsage_ARMRI5");
530   }
531}
532
533static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
534   switch (ri5->tag) {
535      case ARMri5_I5:
536         return;
537      case ARMri5_R:
538         ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
539         return;
540      default:
541         vpanic("mapRegs_ARMRI5");
542   }
543}
544
545/* -------- Neon Immediate operatnd --------- */
546
547ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
548   ARMNImm* i = LibVEX_Alloc(sizeof(ARMNImm));
549   i->type = type;
550   i->imm8 = imm8;
551   return i;
552}
553
554ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
555   int i, j;
556   ULong y, x = imm->imm8;
557   switch (imm->type) {
558      case 3:
559         x = x << 8;
560      case 2:
561         x = x << 8;
562      case 1:
563         x = x << 8;
564      case 0:
565         return (x << 32) | x;
566      case 5:
567      case 6:
568         if (imm->type == 5)
569            x = x << 8;
570         else
571            x = (x << 8) | x;
572      case 4:
573         x = (x << 16) | x;
574         return (x << 32) | x;
575      case 8:
576         x = (x << 8) | 0xFF;
577      case 7:
578         x = (x << 8) | 0xFF;
579         return (x << 32) | x;
580      case 9:
581         x = 0;
582         for (i = 7; i >= 0; i--) {
583            y = ((ULong)imm->imm8 >> i) & 1;
584            for (j = 0; j < 8; j++) {
585               x = (x << 1) | y;
586            }
587         }
588         return x;
589      case 10:
590         x |= (x & 0x80) << 5;
591         x |= (~x & 0x40) << 5;
592         x &= 0x187F; /* 0001 1000 0111 1111 */
593         x |= (x & 0x40) << 4;
594         x |= (x & 0x40) << 3;
595         x |= (x & 0x40) << 2;
596         x |= (x & 0x40) << 1;
597         x = x << 19;
598         x = (x << 32) | x;
599         return x;
600      default:
601         vpanic("ARMNImm_to_Imm64");
602   }
603}
604
605ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
606   ARMNImm tmp;
607   if ((x & 0xFFFFFFFF) == (x >> 32)) {
608      if ((x & 0xFFFFFF00) == 0)
609         return ARMNImm_TI(0, x & 0xFF);
610      if ((x & 0xFFFF00FF) == 0)
611         return ARMNImm_TI(1, (x >> 8) & 0xFF);
612      if ((x & 0xFF00FFFF) == 0)
613         return ARMNImm_TI(2, (x >> 16) & 0xFF);
614      if ((x & 0x00FFFFFF) == 0)
615         return ARMNImm_TI(3, (x >> 24) & 0xFF);
616      if ((x & 0xFFFF00FF) == 0xFF)
617         return ARMNImm_TI(7, (x >> 8) & 0xFF);
618      if ((x & 0xFF00FFFF) == 0xFFFF)
619         return ARMNImm_TI(8, (x >> 16) & 0xFF);
620      if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
621         if ((x & 0xFF00) == 0)
622            return ARMNImm_TI(4, x & 0xFF);
623         if ((x & 0x00FF) == 0)
624            return ARMNImm_TI(5, (x >> 8) & 0xFF);
625         if ((x & 0xFF) == ((x >> 8) & 0xFF))
626            return ARMNImm_TI(6, x & 0xFF);
627      }
628      if ((x & 0x7FFFF) == 0) {
629         tmp.type = 10;
630         tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
631         if (ARMNImm_to_Imm64(&tmp) == x)
632            return ARMNImm_TI(tmp.type, tmp.imm8);
633      }
634   } else {
635      /* This can only be type 9. */
636      tmp.imm8 = (((x >> 56) & 1) << 7)
637               | (((x >> 48) & 1) << 6)
638               | (((x >> 40) & 1) << 5)
639               | (((x >> 32) & 1) << 4)
640               | (((x >> 24) & 1) << 3)
641               | (((x >> 16) & 1) << 2)
642               | (((x >>  8) & 1) << 1)
643               | (((x >>  0) & 1) << 0);
644      tmp.type = 9;
645      if (ARMNImm_to_Imm64 (&tmp) == x)
646         return ARMNImm_TI(tmp.type, tmp.imm8);
647   }
648   return NULL;
649}
650
651void ppARMNImm (ARMNImm* i) {
652   ULong x = ARMNImm_to_Imm64(i);
653   vex_printf("0x%llX%llX", x, x);
654}
655
656/* -- Register or scalar operand --- */
657
658ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
659{
660   ARMNRS *p = LibVEX_Alloc(sizeof(ARMNRS));
661   p->tag = tag;
662   p->reg = reg;
663   p->index = index;
664   return p;
665}
666
667void ppARMNRS(ARMNRS *p)
668{
669   ppHRegARM(p->reg);
670   if (p->tag == ARMNRS_Scalar) {
671      vex_printf("[%d]", p->index);
672   }
673}
674
675/* --------- Instructions. --------- */
676
677HChar* showARMAluOp ( ARMAluOp op ) {
678   switch (op) {
679      case ARMalu_ADD:  return "add";
680      case ARMalu_ADDS: return "adds";
681      case ARMalu_ADC:  return "adc";
682      case ARMalu_SUB:  return "sub";
683      case ARMalu_SUBS: return "subs";
684      case ARMalu_SBC:  return "sbc";
685      case ARMalu_AND:  return "and";
686      case ARMalu_BIC:  return "bic";
687      case ARMalu_OR:   return "orr";
688      case ARMalu_XOR:  return "xor";
689      default: vpanic("showARMAluOp");
690   }
691}
692
693HChar* showARMShiftOp ( ARMShiftOp op ) {
694   switch (op) {
695      case ARMsh_SHL: return "shl";
696      case ARMsh_SHR: return "shr";
697      case ARMsh_SAR: return "sar";
698      default: vpanic("showARMShiftOp");
699   }
700}
701
702HChar* showARMUnaryOp ( ARMUnaryOp op ) {
703   switch (op) {
704      case ARMun_NEG: return "neg";
705      case ARMun_NOT: return "not";
706      case ARMun_CLZ: return "clz";
707      default: vpanic("showARMUnaryOp");
708   }
709}
710
711HChar* showARMMulOp ( ARMMulDivOp op ) {
712   switch (op) {
713      case ARMmul_PLAIN: return "mul";
714      case ARMmul_ZX:    return "umull";
715      case ARMmul_SX:    return "smull";
716      default: vpanic("showARMMulOp");
717   }
718}
719
720HChar* showARMDivOp ( ARMMulDivOp op ) {
721   switch (op) {
722      case ARMdiv_S:     return "sdiv";
723      case ARMdiv_U:     return "udiv";
724      default: vpanic("showARMDivOp");
725   }
726}
727
728HChar* showARMVfpOp ( ARMVfpOp op ) {
729   switch (op) {
730      case ARMvfp_ADD: return "add";
731      case ARMvfp_SUB: return "sub";
732      case ARMvfp_MUL: return "mul";
733      case ARMvfp_DIV: return "div";
734      default: vpanic("showARMVfpOp");
735   }
736}
737
738HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
739   switch (op) {
740      case ARMvfpu_COPY: return "cpy";
741      case ARMvfpu_NEG:  return "neg";
742      case ARMvfpu_ABS:  return "abs";
743      case ARMvfpu_SQRT: return "sqrt";
744      default: vpanic("showARMVfpUnaryOp");
745   }
746}
747
748HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
749   switch (op) {
750      case ARMneon_VAND: return "vand";
751      case ARMneon_VORR: return "vorr";
752      case ARMneon_VXOR: return "veor";
753      case ARMneon_VADD: return "vadd";
754      case ARMneon_VRHADDS: return "vrhadd";
755      case ARMneon_VRHADDU: return "vrhadd";
756      case ARMneon_VADDFP: return "vadd";
757      case ARMneon_VPADDFP: return "vpadd";
758      case ARMneon_VABDFP: return "vabd";
759      case ARMneon_VSUB: return "vsub";
760      case ARMneon_VSUBFP: return "vsub";
761      case ARMneon_VMINU: return "vmin";
762      case ARMneon_VMINS: return "vmin";
763      case ARMneon_VMINF: return "vmin";
764      case ARMneon_VMAXU: return "vmax";
765      case ARMneon_VMAXS: return "vmax";
766      case ARMneon_VMAXF: return "vmax";
767      case ARMneon_VQADDU: return "vqadd";
768      case ARMneon_VQADDS: return "vqadd";
769      case ARMneon_VQSUBU: return "vqsub";
770      case ARMneon_VQSUBS: return "vqsub";
771      case ARMneon_VCGTU:  return "vcgt";
772      case ARMneon_VCGTS:  return "vcgt";
773      case ARMneon_VCGTF:  return "vcgt";
774      case ARMneon_VCGEF:  return "vcgt";
775      case ARMneon_VCGEU:  return "vcge";
776      case ARMneon_VCGES:  return "vcge";
777      case ARMneon_VCEQ:  return "vceq";
778      case ARMneon_VCEQF:  return "vceq";
779      case ARMneon_VPADD:   return "vpadd";
780      case ARMneon_VPMINU:   return "vpmin";
781      case ARMneon_VPMINS:   return "vpmin";
782      case ARMneon_VPMINF:   return "vpmin";
783      case ARMneon_VPMAXU:   return "vpmax";
784      case ARMneon_VPMAXS:   return "vpmax";
785      case ARMneon_VPMAXF:   return "vpmax";
786      case ARMneon_VEXT:   return "vext";
787      case ARMneon_VMUL:   return "vmuli";
788      case ARMneon_VMULLU:   return "vmull";
789      case ARMneon_VMULLS:   return "vmull";
790      case ARMneon_VMULP:  return "vmul";
791      case ARMneon_VMULFP:  return "vmul";
792      case ARMneon_VMULLP:  return "vmul";
793      case ARMneon_VQDMULH: return "vqdmulh";
794      case ARMneon_VQRDMULH: return "vqrdmulh";
795      case ARMneon_VQDMULL: return "vqdmull";
796      case ARMneon_VTBL: return "vtbl";
797      case ARMneon_VRECPS: return "vrecps";
798      case ARMneon_VRSQRTS: return "vrecps";
799      /* ... */
800      default: vpanic("showARMNeonBinOp");
801   }
802}
803
804HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
805   switch (op) {
806      case ARMneon_VAND:
807      case ARMneon_VORR:
808      case ARMneon_VXOR:
809         return "";
810      case ARMneon_VADD:
811      case ARMneon_VSUB:
812      case ARMneon_VEXT:
813      case ARMneon_VMUL:
814      case ARMneon_VPADD:
815      case ARMneon_VTBL:
816      case ARMneon_VCEQ:
817         return ".i";
818      case ARMneon_VRHADDU:
819      case ARMneon_VMINU:
820      case ARMneon_VMAXU:
821      case ARMneon_VQADDU:
822      case ARMneon_VQSUBU:
823      case ARMneon_VCGTU:
824      case ARMneon_VCGEU:
825      case ARMneon_VMULLU:
826      case ARMneon_VPMINU:
827      case ARMneon_VPMAXU:
828         return ".u";
829      case ARMneon_VRHADDS:
830      case ARMneon_VMINS:
831      case ARMneon_VMAXS:
832      case ARMneon_VQADDS:
833      case ARMneon_VQSUBS:
834      case ARMneon_VCGTS:
835      case ARMneon_VCGES:
836      case ARMneon_VQDMULL:
837      case ARMneon_VMULLS:
838      case ARMneon_VPMINS:
839      case ARMneon_VPMAXS:
840      case ARMneon_VQDMULH:
841      case ARMneon_VQRDMULH:
842         return ".s";
843      case ARMneon_VMULP:
844      case ARMneon_VMULLP:
845         return ".p";
846      case ARMneon_VADDFP:
847      case ARMneon_VABDFP:
848      case ARMneon_VPADDFP:
849      case ARMneon_VSUBFP:
850      case ARMneon_VMULFP:
851      case ARMneon_VMINF:
852      case ARMneon_VMAXF:
853      case ARMneon_VPMINF:
854      case ARMneon_VPMAXF:
855      case ARMneon_VCGTF:
856      case ARMneon_VCGEF:
857      case ARMneon_VCEQF:
858      case ARMneon_VRECPS:
859      case ARMneon_VRSQRTS:
860         return ".f";
861      /* ... */
862      default: vpanic("showARMNeonBinOpDataType");
863   }
864}
865
866HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
867   switch (op) {
868      case ARMneon_COPY: return "vmov";
869      case ARMneon_COPYLS: return "vmov";
870      case ARMneon_COPYLU: return "vmov";
871      case ARMneon_COPYN: return "vmov";
872      case ARMneon_COPYQNSS: return "vqmovn";
873      case ARMneon_COPYQNUS: return "vqmovun";
874      case ARMneon_COPYQNUU: return "vqmovn";
875      case ARMneon_NOT: return "vmvn";
876      case ARMneon_EQZ: return "vceq";
877      case ARMneon_CNT: return "vcnt";
878      case ARMneon_CLS: return "vcls";
879      case ARMneon_CLZ: return "vclz";
880      case ARMneon_DUP: return "vdup";
881      case ARMneon_PADDLS: return "vpaddl";
882      case ARMneon_PADDLU: return "vpaddl";
883      case ARMneon_VQSHLNSS: return "vqshl";
884      case ARMneon_VQSHLNUU: return "vqshl";
885      case ARMneon_VQSHLNUS: return "vqshlu";
886      case ARMneon_REV16: return "vrev16";
887      case ARMneon_REV32: return "vrev32";
888      case ARMneon_REV64: return "vrev64";
889      case ARMneon_VCVTFtoU: return "vcvt";
890      case ARMneon_VCVTFtoS: return "vcvt";
891      case ARMneon_VCVTUtoF: return "vcvt";
892      case ARMneon_VCVTStoF: return "vcvt";
893      case ARMneon_VCVTFtoFixedU: return "vcvt";
894      case ARMneon_VCVTFtoFixedS: return "vcvt";
895      case ARMneon_VCVTFixedUtoF: return "vcvt";
896      case ARMneon_VCVTFixedStoF: return "vcvt";
897      case ARMneon_VCVTF32toF16: return "vcvt";
898      case ARMneon_VCVTF16toF32: return "vcvt";
899      case ARMneon_VRECIP: return "vrecip";
900      case ARMneon_VRECIPF: return "vrecipf";
901      case ARMneon_VNEGF: return "vneg";
902      case ARMneon_ABS: return "vabs";
903      case ARMneon_VABSFP: return "vabsfp";
904      case ARMneon_VRSQRTEFP: return "vrsqrtefp";
905      case ARMneon_VRSQRTE: return "vrsqrte";
906      /* ... */
907      default: vpanic("showARMNeonUnOp");
908   }
909}
910
911HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
912   switch (op) {
913      case ARMneon_COPY:
914      case ARMneon_NOT:
915         return "";
916      case ARMneon_COPYN:
917      case ARMneon_EQZ:
918      case ARMneon_CNT:
919      case ARMneon_DUP:
920      case ARMneon_REV16:
921      case ARMneon_REV32:
922      case ARMneon_REV64:
923         return ".i";
924      case ARMneon_COPYLU:
925      case ARMneon_PADDLU:
926      case ARMneon_COPYQNUU:
927      case ARMneon_VQSHLNUU:
928      case ARMneon_VRECIP:
929      case ARMneon_VRSQRTE:
930         return ".u";
931      case ARMneon_CLS:
932      case ARMneon_CLZ:
933      case ARMneon_COPYLS:
934      case ARMneon_PADDLS:
935      case ARMneon_COPYQNSS:
936      case ARMneon_COPYQNUS:
937      case ARMneon_VQSHLNSS:
938      case ARMneon_VQSHLNUS:
939      case ARMneon_ABS:
940         return ".s";
941      case ARMneon_VRECIPF:
942      case ARMneon_VNEGF:
943      case ARMneon_VABSFP:
944      case ARMneon_VRSQRTEFP:
945         return ".f";
946      case ARMneon_VCVTFtoU: return ".u32.f32";
947      case ARMneon_VCVTFtoS: return ".s32.f32";
948      case ARMneon_VCVTUtoF: return ".f32.u32";
949      case ARMneon_VCVTStoF: return ".f32.s32";
950      case ARMneon_VCVTF16toF32: return ".f32.f16";
951      case ARMneon_VCVTF32toF16: return ".f16.f32";
952      case ARMneon_VCVTFtoFixedU: return ".u32.f32";
953      case ARMneon_VCVTFtoFixedS: return ".s32.f32";
954      case ARMneon_VCVTFixedUtoF: return ".f32.u32";
955      case ARMneon_VCVTFixedStoF: return ".f32.s32";
956      /* ... */
957      default: vpanic("showARMNeonUnOpDataType");
958   }
959}
960
961HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
962   switch (op) {
963      case ARMneon_SETELEM: return "vmov";
964      case ARMneon_GETELEMU: return "vmov";
965      case ARMneon_GETELEMS: return "vmov";
966      case ARMneon_VDUP: return "vdup";
967      /* ... */
968      default: vpanic("showARMNeonUnarySOp");
969   }
970}
971
972HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
973   switch (op) {
974      case ARMneon_SETELEM:
975      case ARMneon_VDUP:
976         return ".i";
977      case ARMneon_GETELEMS:
978         return ".s";
979      case ARMneon_GETELEMU:
980         return ".u";
981      /* ... */
982      default: vpanic("showARMNeonUnarySOp");
983   }
984}
985
986HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
987   switch (op) {
988      case ARMneon_VSHL: return "vshl";
989      case ARMneon_VSAL: return "vshl";
990      case ARMneon_VQSHL: return "vqshl";
991      case ARMneon_VQSAL: return "vqshl";
992      /* ... */
993      default: vpanic("showARMNeonShiftOp");
994   }
995}
996
997HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
998   switch (op) {
999      case ARMneon_VSHL:
1000      case ARMneon_VQSHL:
1001         return ".u";
1002      case ARMneon_VSAL:
1003      case ARMneon_VQSAL:
1004         return ".s";
1005      /* ... */
1006      default: vpanic("showARMNeonShiftOpDataType");
1007   }
1008}
1009
1010HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
1011   switch (op) {
1012      case ARMneon_TRN: return "vtrn";
1013      case ARMneon_ZIP: return "vzip";
1014      case ARMneon_UZP: return "vuzp";
1015      /* ... */
1016      default: vpanic("showARMNeonDualOp");
1017   }
1018}
1019
1020HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1021   switch (op) {
1022      case ARMneon_TRN:
1023      case ARMneon_ZIP:
1024      case ARMneon_UZP:
1025         return "i";
1026      /* ... */
1027      default: vpanic("showARMNeonDualOp");
1028   }
1029}
1030
1031static HChar* showARMNeonDataSize_wrk ( UInt size )
1032{
1033   switch (size) {
1034      case 0: return "8";
1035      case 1: return "16";
1036      case 2: return "32";
1037      case 3: return "64";
1038      default: vpanic("showARMNeonDataSize");
1039   }
1040}
1041
1042static HChar* showARMNeonDataSize ( ARMInstr* i )
1043{
1044   switch (i->tag) {
1045      case ARMin_NBinary:
1046         if (i->ARMin.NBinary.op == ARMneon_VEXT)
1047            return "8";
1048         if (i->ARMin.NBinary.op == ARMneon_VAND ||
1049             i->ARMin.NBinary.op == ARMneon_VORR ||
1050             i->ARMin.NBinary.op == ARMneon_VXOR)
1051            return "";
1052         return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1053      case ARMin_NUnary:
1054         if (i->ARMin.NUnary.op == ARMneon_COPY ||
1055             i->ARMin.NUnary.op == ARMneon_NOT ||
1056             i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1057             i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1058             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1059             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1060             i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1061             i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1062             i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1063             i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1064             i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1065             i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1066            return "";
1067         if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1068             i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1069             i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1070            UInt size;
1071            size = i->ARMin.NUnary.size;
1072            if (size & 0x40)
1073               return "64";
1074            if (size & 0x20)
1075               return "32";
1076            if (size & 0x10)
1077               return "16";
1078            if (size & 0x08)
1079               return "8";
1080            vpanic("showARMNeonDataSize");
1081         }
1082         return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1083      case ARMin_NUnaryS:
1084         if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1085            int size;
1086            size = i->ARMin.NUnaryS.size;
1087            if ((size & 1) == 1)
1088               return "8";
1089            if ((size & 3) == 2)
1090               return "16";
1091            if ((size & 7) == 4)
1092               return "32";
1093            vpanic("showARMNeonDataSize");
1094         }
1095         return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1096      case ARMin_NShift:
1097         return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1098      case ARMin_NDual:
1099         return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1100      default:
1101         vpanic("showARMNeonDataSize");
1102   }
1103}
1104
1105ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1106                         HReg dst, HReg argL, ARMRI84* argR ) {
1107   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1108   i->tag            = ARMin_Alu;
1109   i->ARMin.Alu.op   = op;
1110   i->ARMin.Alu.dst  = dst;
1111   i->ARMin.Alu.argL = argL;
1112   i->ARMin.Alu.argR = argR;
1113   return i;
1114}
1115ARMInstr* ARMInstr_Shift  ( ARMShiftOp op,
1116                            HReg dst, HReg argL, ARMRI5* argR ) {
1117   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1118   i->tag              = ARMin_Shift;
1119   i->ARMin.Shift.op   = op;
1120   i->ARMin.Shift.dst  = dst;
1121   i->ARMin.Shift.argL = argL;
1122   i->ARMin.Shift.argR = argR;
1123   return i;
1124}
1125ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1126   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1127   i->tag             = ARMin_Unary;
1128   i->ARMin.Unary.op  = op;
1129   i->ARMin.Unary.dst = dst;
1130   i->ARMin.Unary.src = src;
1131   return i;
1132}
1133ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1134   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1135   i->tag                  = ARMin_CmpOrTst;
1136   i->ARMin.CmpOrTst.isCmp = isCmp;
1137   i->ARMin.CmpOrTst.argL  = argL;
1138   i->ARMin.CmpOrTst.argR  = argR;
1139   return i;
1140}
1141ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1142   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1143   i->tag           = ARMin_Mov;
1144   i->ARMin.Mov.dst = dst;
1145   i->ARMin.Mov.src = src;
1146   return i;
1147}
1148ARMInstr* ARMInstr_Imm32  ( HReg dst, UInt imm32 ) {
1149   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1150   i->tag               = ARMin_Imm32;
1151   i->ARMin.Imm32.dst   = dst;
1152   i->ARMin.Imm32.imm32 = imm32;
1153   return i;
1154}
1155ARMInstr* ARMInstr_LdSt32 ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
1156   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1157   i->tag                 = ARMin_LdSt32;
1158   i->ARMin.LdSt32.isLoad = isLoad;
1159   i->ARMin.LdSt32.rD     = rD;
1160   i->ARMin.LdSt32.amode  = amode;
1161   return i;
1162}
1163ARMInstr* ARMInstr_LdSt16 ( Bool isLoad, Bool signedLoad,
1164                            HReg rD, ARMAMode2* amode ) {
1165   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1166   i->tag                     = ARMin_LdSt16;
1167   i->ARMin.LdSt16.isLoad     = isLoad;
1168   i->ARMin.LdSt16.signedLoad = signedLoad;
1169   i->ARMin.LdSt16.rD         = rD;
1170   i->ARMin.LdSt16.amode      = amode;
1171   return i;
1172}
1173ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg rD, ARMAMode1* amode ) {
1174   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1175   i->tag                 = ARMin_LdSt8U;
1176   i->ARMin.LdSt8U.isLoad = isLoad;
1177   i->ARMin.LdSt8U.rD     = rD;
1178   i->ARMin.LdSt8U.amode  = amode;
1179   return i;
1180}
1181ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
1182                             ARMCondCode cond, Bool toFastEP ) {
1183   ARMInstr* i               = LibVEX_Alloc(sizeof(ARMInstr));
1184   i->tag                    = ARMin_XDirect;
1185   i->ARMin.XDirect.dstGA    = dstGA;
1186   i->ARMin.XDirect.amR15T   = amR15T;
1187   i->ARMin.XDirect.cond     = cond;
1188   i->ARMin.XDirect.toFastEP = toFastEP;
1189   return i;
1190}
1191ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
1192                            ARMCondCode cond ) {
1193   ARMInstr* i            = LibVEX_Alloc(sizeof(ARMInstr));
1194   i->tag                 = ARMin_XIndir;
1195   i->ARMin.XIndir.dstGA  = dstGA;
1196   i->ARMin.XIndir.amR15T = amR15T;
1197   i->ARMin.XIndir.cond   = cond;
1198   return i;
1199}
1200ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
1201                               ARMCondCode cond, IRJumpKind jk ) {
1202   ARMInstr* i               = LibVEX_Alloc(sizeof(ARMInstr));
1203   i->tag                    = ARMin_XAssisted;
1204   i->ARMin.XAssisted.dstGA  = dstGA;
1205   i->ARMin.XAssisted.amR15T = amR15T;
1206   i->ARMin.XAssisted.cond   = cond;
1207   i->ARMin.XAssisted.jk     = jk;
1208   return i;
1209}
1210ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1211   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1212   i->tag             = ARMin_CMov;
1213   i->ARMin.CMov.cond = cond;
1214   i->ARMin.CMov.dst  = dst;
1215   i->ARMin.CMov.src  = src;
1216   vassert(cond != ARMcc_AL);
1217   return i;
1218}
1219ARMInstr* ARMInstr_Call ( ARMCondCode cond, HWord target, Int nArgRegs ) {
1220   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1221   i->tag                 = ARMin_Call;
1222   i->ARMin.Call.cond     = cond;
1223   i->ARMin.Call.target   = target;
1224   i->ARMin.Call.nArgRegs = nArgRegs;
1225   return i;
1226}
1227ARMInstr* ARMInstr_Mul ( ARMMulDivOp op ) {
1228   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1229   i->tag          = ARMin_Mul;
1230   i->ARMin.Mul.op = op;
1231   return i;
1232}
1233ARMInstr* ARMInstr_Div ( ARMMulDivOp op, HReg dst, HReg argL, HReg argR ) {
1234   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1235   i->tag          = ARMin_Div;
1236   i->ARMin.Div.op = op;
1237   i->ARMin.Div.dst  = dst;
1238   i->ARMin.Div.argL = argL;
1239   i->ARMin.Div.argR = argR;
1240   return i;
1241}
1242ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1243   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1244   i->tag             = ARMin_LdrEX;
1245   i->ARMin.LdrEX.szB = szB;
1246   vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1247   return i;
1248}
1249ARMInstr* ARMInstr_StrEX ( Int szB ) {
1250   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1251   i->tag             = ARMin_StrEX;
1252   i->ARMin.StrEX.szB = szB;
1253   vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1254   return i;
1255}
1256ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1257   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1258   i->tag                 = ARMin_VLdStD;
1259   i->ARMin.VLdStD.isLoad = isLoad;
1260   i->ARMin.VLdStD.dD     = dD;
1261   i->ARMin.VLdStD.amode  = am;
1262   return i;
1263}
1264ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1265   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1266   i->tag                 = ARMin_VLdStS;
1267   i->ARMin.VLdStS.isLoad = isLoad;
1268   i->ARMin.VLdStS.fD     = fD;
1269   i->ARMin.VLdStS.amode  = am;
1270   return i;
1271}
1272ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1273   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1274   i->tag              = ARMin_VAluD;
1275   i->ARMin.VAluD.op   = op;
1276   i->ARMin.VAluD.dst  = dst;
1277   i->ARMin.VAluD.argL = argL;
1278   i->ARMin.VAluD.argR = argR;
1279   return i;
1280}
1281ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1282   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1283   i->tag              = ARMin_VAluS;
1284   i->ARMin.VAluS.op   = op;
1285   i->ARMin.VAluS.dst  = dst;
1286   i->ARMin.VAluS.argL = argL;
1287   i->ARMin.VAluS.argR = argR;
1288   return i;
1289}
1290ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1291   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1292   i->tag               = ARMin_VUnaryD;
1293   i->ARMin.VUnaryD.op  = op;
1294   i->ARMin.VUnaryD.dst = dst;
1295   i->ARMin.VUnaryD.src = src;
1296   return i;
1297}
1298ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1299   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1300   i->tag               = ARMin_VUnaryS;
1301   i->ARMin.VUnaryS.op  = op;
1302   i->ARMin.VUnaryS.dst = dst;
1303   i->ARMin.VUnaryS.src = src;
1304   return i;
1305}
1306ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1307   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1308   i->tag              = ARMin_VCmpD;
1309   i->ARMin.VCmpD.argL = argL;
1310   i->ARMin.VCmpD.argR = argR;
1311   return i;
1312}
1313ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1314   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1315   i->tag               = ARMin_VCMovD;
1316   i->ARMin.VCMovD.cond = cond;
1317   i->ARMin.VCMovD.dst  = dst;
1318   i->ARMin.VCMovD.src  = src;
1319   vassert(cond != ARMcc_AL);
1320   return i;
1321}
1322ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1323   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1324   i->tag               = ARMin_VCMovS;
1325   i->ARMin.VCMovS.cond = cond;
1326   i->ARMin.VCMovS.dst  = dst;
1327   i->ARMin.VCMovS.src  = src;
1328   vassert(cond != ARMcc_AL);
1329   return i;
1330}
1331ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1332   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1333   i->tag               = ARMin_VCvtSD;
1334   i->ARMin.VCvtSD.sToD = sToD;
1335   i->ARMin.VCvtSD.dst  = dst;
1336   i->ARMin.VCvtSD.src  = src;
1337   return i;
1338}
1339ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1340   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1341   i->tag              = ARMin_VXferD;
1342   i->ARMin.VXferD.toD = toD;
1343   i->ARMin.VXferD.dD  = dD;
1344   i->ARMin.VXferD.rHi = rHi;
1345   i->ARMin.VXferD.rLo = rLo;
1346   return i;
1347}
1348ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1349   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1350   i->tag              = ARMin_VXferS;
1351   i->ARMin.VXferS.toS = toS;
1352   i->ARMin.VXferS.fD  = fD;
1353   i->ARMin.VXferS.rLo = rLo;
1354   return i;
1355}
1356ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1357                            HReg dst, HReg src ) {
1358   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1359   i->tag                = ARMin_VCvtID;
1360   i->ARMin.VCvtID.iToD  = iToD;
1361   i->ARMin.VCvtID.syned = syned;
1362   i->ARMin.VCvtID.dst   = dst;
1363   i->ARMin.VCvtID.src   = src;
1364   return i;
1365}
1366ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1367   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1368   i->tag                 = ARMin_FPSCR;
1369   i->ARMin.FPSCR.toFPSCR = toFPSCR;
1370   i->ARMin.FPSCR.iReg    = iReg;
1371   return i;
1372}
1373ARMInstr* ARMInstr_MFence ( void ) {
1374   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1375   i->tag      = ARMin_MFence;
1376   return i;
1377}
1378ARMInstr* ARMInstr_CLREX( void ) {
1379   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1380   i->tag      = ARMin_CLREX;
1381   return i;
1382}
1383
1384ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1385   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1386   i->tag                  = ARMin_NLdStQ;
1387   i->ARMin.NLdStQ.isLoad  = isLoad;
1388   i->ARMin.NLdStQ.dQ      = dQ;
1389   i->ARMin.NLdStQ.amode   = amode;
1390   return i;
1391}
1392
1393ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1394   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1395   i->tag                  = ARMin_NLdStD;
1396   i->ARMin.NLdStD.isLoad  = isLoad;
1397   i->ARMin.NLdStD.dD      = dD;
1398   i->ARMin.NLdStD.amode   = amode;
1399   return i;
1400}
1401
1402ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1403                            UInt size, Bool Q ) {
1404   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1405   i->tag                = ARMin_NUnary;
1406   i->ARMin.NUnary.op   = op;
1407   i->ARMin.NUnary.src  = nQ;
1408   i->ARMin.NUnary.dst  = dQ;
1409   i->ARMin.NUnary.size = size;
1410   i->ARMin.NUnary.Q    = Q;
1411   return i;
1412}
1413
1414ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1415                             UInt size, Bool Q ) {
1416   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1417   i->tag                = ARMin_NUnaryS;
1418   i->ARMin.NUnaryS.op   = op;
1419   i->ARMin.NUnaryS.src  = src;
1420   i->ARMin.NUnaryS.dst  = dst;
1421   i->ARMin.NUnaryS.size = size;
1422   i->ARMin.NUnaryS.Q    = Q;
1423   return i;
1424}
1425
1426ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1427                           UInt size, Bool Q ) {
1428   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1429   i->tag                = ARMin_NDual;
1430   i->ARMin.NDual.op   = op;
1431   i->ARMin.NDual.arg1 = nQ;
1432   i->ARMin.NDual.arg2 = mQ;
1433   i->ARMin.NDual.size = size;
1434   i->ARMin.NDual.Q    = Q;
1435   return i;
1436}
1437
1438ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1439                             HReg dst, HReg argL, HReg argR,
1440                             UInt size, Bool Q ) {
1441   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1442   i->tag                = ARMin_NBinary;
1443   i->ARMin.NBinary.op   = op;
1444   i->ARMin.NBinary.argL = argL;
1445   i->ARMin.NBinary.argR = argR;
1446   i->ARMin.NBinary.dst  = dst;
1447   i->ARMin.NBinary.size = size;
1448   i->ARMin.NBinary.Q    = Q;
1449   return i;
1450}
1451
1452ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1453   ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
1454   i->tag         = ARMin_NeonImm;
1455   i->ARMin.NeonImm.dst = dst;
1456   i->ARMin.NeonImm.imm = imm;
1457   return i;
1458}
1459
1460ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1461   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1462   i->tag               = ARMin_NCMovQ;
1463   i->ARMin.NCMovQ.cond = cond;
1464   i->ARMin.NCMovQ.dst  = dst;
1465   i->ARMin.NCMovQ.src  = src;
1466   vassert(cond != ARMcc_AL);
1467   return i;
1468}
1469
1470ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1471                            HReg dst, HReg argL, HReg argR,
1472                            UInt size, Bool Q ) {
1473   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1474   i->tag                = ARMin_NShift;
1475   i->ARMin.NShift.op   = op;
1476   i->ARMin.NShift.argL = argL;
1477   i->ARMin.NShift.argR = argR;
1478   i->ARMin.NShift.dst  = dst;
1479   i->ARMin.NShift.size = size;
1480   i->ARMin.NShift.Q    = Q;
1481   return i;
1482}
1483
1484/* Helper copy-pasted from isel.c */
1485static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1486{
1487   UInt i;
1488   for (i = 0; i < 16; i++) {
1489      if (0 == (u & 0xFFFFFF00)) {
1490         *u8 = u;
1491         *u4 = i;
1492         return True;
1493      }
1494      u = ROR32(u, 30);
1495   }
1496   vassert(i == 16);
1497   return False;
1498}
1499
1500ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1501   UInt u8, u4;
1502   ARMInstr *i = LibVEX_Alloc(sizeof(ARMInstr));
1503   /* Try to generate single ADD if possible */
1504   if (fitsIn8x4(&u8, &u4, imm32)) {
1505      i->tag            = ARMin_Alu;
1506      i->ARMin.Alu.op   = ARMalu_ADD;
1507      i->ARMin.Alu.dst  = rD;
1508      i->ARMin.Alu.argL = rN;
1509      i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1510   } else {
1511      i->tag               = ARMin_Add32;
1512      i->ARMin.Add32.rD    = rD;
1513      i->ARMin.Add32.rN    = rN;
1514      i->ARMin.Add32.imm32 = imm32;
1515   }
1516   return i;
1517}
1518
1519ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
1520                             ARMAMode1* amFailAddr ) {
1521   ARMInstr* i                 = LibVEX_Alloc(sizeof(ARMInstr));
1522   i->tag                      = ARMin_EvCheck;
1523   i->ARMin.EvCheck.amCounter  = amCounter;
1524   i->ARMin.EvCheck.amFailAddr = amFailAddr;
1525   return i;
1526}
1527
1528ARMInstr* ARMInstr_ProfInc ( void ) {
1529   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
1530   i->tag      = ARMin_ProfInc;
1531   return i;
1532}
1533
1534/* ... */
1535
1536void ppARMInstr ( ARMInstr* i ) {
1537   switch (i->tag) {
1538      case ARMin_Alu:
1539         vex_printf("%-4s  ", showARMAluOp(i->ARMin.Alu.op));
1540         ppHRegARM(i->ARMin.Alu.dst);
1541         vex_printf(", ");
1542         ppHRegARM(i->ARMin.Alu.argL);
1543         vex_printf(", ");
1544         ppARMRI84(i->ARMin.Alu.argR);
1545         return;
1546      case ARMin_Shift:
1547         vex_printf("%s   ", showARMShiftOp(i->ARMin.Shift.op));
1548         ppHRegARM(i->ARMin.Shift.dst);
1549         vex_printf(", ");
1550         ppHRegARM(i->ARMin.Shift.argL);
1551         vex_printf(", ");
1552         ppARMRI5(i->ARMin.Shift.argR);
1553         return;
1554      case ARMin_Unary:
1555         vex_printf("%s   ", showARMUnaryOp(i->ARMin.Unary.op));
1556         ppHRegARM(i->ARMin.Unary.dst);
1557         vex_printf(", ");
1558         ppHRegARM(i->ARMin.Unary.src);
1559         return;
1560      case ARMin_CmpOrTst:
1561         vex_printf("%s   ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1562         ppHRegARM(i->ARMin.CmpOrTst.argL);
1563         vex_printf(", ");
1564         ppARMRI84(i->ARMin.CmpOrTst.argR);
1565         return;
1566      case ARMin_Mov:
1567         vex_printf("mov   ");
1568         ppHRegARM(i->ARMin.Mov.dst);
1569         vex_printf(", ");
1570         ppARMRI84(i->ARMin.Mov.src);
1571         return;
1572      case ARMin_Imm32:
1573         vex_printf("imm   ");
1574         ppHRegARM(i->ARMin.Imm32.dst);
1575         vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1576         return;
1577      case ARMin_LdSt32:
1578         if (i->ARMin.LdSt32.isLoad) {
1579            vex_printf("ldr   ");
1580            ppHRegARM(i->ARMin.LdSt32.rD);
1581            vex_printf(", ");
1582            ppARMAMode1(i->ARMin.LdSt32.amode);
1583         } else {
1584            vex_printf("str   ");
1585            ppARMAMode1(i->ARMin.LdSt32.amode);
1586            vex_printf(", ");
1587            ppHRegARM(i->ARMin.LdSt32.rD);
1588         }
1589         return;
1590      case ARMin_LdSt16:
1591         if (i->ARMin.LdSt16.isLoad) {
1592            vex_printf("%s", i->ARMin.LdSt16.signedLoad
1593                                ? "ldrsh " : "ldrh  " );
1594            ppHRegARM(i->ARMin.LdSt16.rD);
1595            vex_printf(", ");
1596            ppARMAMode2(i->ARMin.LdSt16.amode);
1597         } else {
1598            vex_printf("strh  ");
1599            ppARMAMode2(i->ARMin.LdSt16.amode);
1600            vex_printf(", ");
1601            ppHRegARM(i->ARMin.LdSt16.rD);
1602         }
1603         return;
1604      case ARMin_LdSt8U:
1605         if (i->ARMin.LdSt8U.isLoad) {
1606            vex_printf("ldrb  ");
1607            ppHRegARM(i->ARMin.LdSt8U.rD);
1608            vex_printf(", ");
1609            ppARMAMode1(i->ARMin.LdSt8U.amode);
1610         } else {
1611            vex_printf("strb  ");
1612            ppARMAMode1(i->ARMin.LdSt8U.amode);
1613            vex_printf(", ");
1614            ppHRegARM(i->ARMin.LdSt8U.rD);
1615         }
1616         return;
1617      case ARMin_Ld8S:
1618         goto unhandled;
1619      case ARMin_XDirect:
1620         vex_printf("(xDirect) ");
1621         vex_printf("if (%%cpsr.%s) { ",
1622                    showARMCondCode(i->ARMin.XDirect.cond));
1623         vex_printf("movw r12,0x%x; ",
1624                    (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
1625         vex_printf("movt r12,0x%x; ",
1626                    (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
1627         vex_printf("str r12,");
1628         ppARMAMode1(i->ARMin.XDirect.amR15T);
1629         vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
1630                    i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1631         vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
1632                    i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1633         vex_printf("blx r12 }");
1634         return;
1635      case ARMin_XIndir:
1636         vex_printf("(xIndir) ");
1637         vex_printf("if (%%cpsr.%s) { ",
1638                    showARMCondCode(i->ARMin.XIndir.cond));
1639         vex_printf("str ");
1640         ppHRegARM(i->ARMin.XIndir.dstGA);
1641         vex_printf(",");
1642         ppARMAMode1(i->ARMin.XIndir.amR15T);
1643         vex_printf("; movw r12,LO16($disp_cp_xindir); ");
1644         vex_printf("movt r12,HI16($disp_cp_xindir); ");
1645         vex_printf("blx r12 }");
1646         return;
1647      case ARMin_XAssisted:
1648         vex_printf("(xAssisted) ");
1649         vex_printf("if (%%cpsr.%s) { ",
1650                    showARMCondCode(i->ARMin.XAssisted.cond));
1651         vex_printf("str ");
1652         ppHRegARM(i->ARMin.XAssisted.dstGA);
1653         vex_printf(",");
1654         ppARMAMode1(i->ARMin.XAssisted.amR15T);
1655         vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
1656                    (Int)i->ARMin.XAssisted.jk);
1657         vex_printf("movw r12,LO16($disp_cp_xassisted); ");
1658         vex_printf("movt r12,HI16($disp_cp_xassisted); ");
1659         vex_printf("blx r12 }");
1660         return;
1661      case ARMin_CMov:
1662         vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1663         ppHRegARM(i->ARMin.CMov.dst);
1664         vex_printf(", ");
1665         ppARMRI84(i->ARMin.CMov.src);
1666         return;
1667      case ARMin_Call:
1668         vex_printf("call%s  ",
1669                    i->ARMin.Call.cond==ARMcc_AL
1670                       ? "" : showARMCondCode(i->ARMin.Call.cond));
1671         vex_printf("0x%lx [nArgRegs=%d]",
1672                    i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1673         return;
1674      case ARMin_Mul:
1675         vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1676         if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1677            vex_printf("r0, r2, r3");
1678         } else {
1679            vex_printf("r1:r0, r2, r3");
1680         }
1681         return;
1682      case ARMin_Div:
1683         vex_printf("%-5s ", showARMDivOp(i->ARMin.Div.op));
1684         ppHRegARM(i->ARMin.Div.dst);
1685         vex_printf(", ");
1686         ppHRegARM(i->ARMin.Div.argL);
1687         vex_printf(", ");
1688         ppHRegARM(i->ARMin.Div.argR);
1689         return;
1690      case ARMin_LdrEX: {
1691         HChar* sz = "";
1692         switch (i->ARMin.LdrEX.szB) {
1693            case 1: sz = "b"; break; case 2: sz = "h"; break;
1694            case 8: sz = "d"; break; case 4: break;
1695            default: vassert(0);
1696         }
1697         vex_printf("ldrex%s %sr2, [r4]",
1698                    sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
1699         return;
1700      }
1701      case ARMin_StrEX: {
1702         HChar* sz = "";
1703         switch (i->ARMin.StrEX.szB) {
1704            case 1: sz = "b"; break; case 2: sz = "h"; break;
1705            case 8: sz = "d"; break; case 4: break;
1706            default: vassert(0);
1707         }
1708         vex_printf("strex%s r0, %sr2, [r4]",
1709                    sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
1710         return;
1711      }
1712      case ARMin_VLdStD:
1713         if (i->ARMin.VLdStD.isLoad) {
1714            vex_printf("fldd  ");
1715            ppHRegARM(i->ARMin.VLdStD.dD);
1716            vex_printf(", ");
1717            ppARMAModeV(i->ARMin.VLdStD.amode);
1718         } else {
1719            vex_printf("fstd  ");
1720            ppARMAModeV(i->ARMin.VLdStD.amode);
1721            vex_printf(", ");
1722            ppHRegARM(i->ARMin.VLdStD.dD);
1723         }
1724         return;
1725      case ARMin_VLdStS:
1726         if (i->ARMin.VLdStS.isLoad) {
1727            vex_printf("flds  ");
1728            ppHRegARM(i->ARMin.VLdStS.fD);
1729            vex_printf(", ");
1730            ppARMAModeV(i->ARMin.VLdStS.amode);
1731         } else {
1732            vex_printf("fsts  ");
1733            ppARMAModeV(i->ARMin.VLdStS.amode);
1734            vex_printf(", ");
1735            ppHRegARM(i->ARMin.VLdStS.fD);
1736         }
1737         return;
1738      case ARMin_VAluD:
1739         vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1740         ppHRegARM(i->ARMin.VAluD.dst);
1741         vex_printf(", ");
1742         ppHRegARM(i->ARMin.VAluD.argL);
1743         vex_printf(", ");
1744         ppHRegARM(i->ARMin.VAluD.argR);
1745         return;
1746      case ARMin_VAluS:
1747         vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1748         ppHRegARM(i->ARMin.VAluS.dst);
1749         vex_printf(", ");
1750         ppHRegARM(i->ARMin.VAluS.argL);
1751         vex_printf(", ");
1752         ppHRegARM(i->ARMin.VAluS.argR);
1753         return;
1754      case ARMin_VUnaryD:
1755         vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1756         ppHRegARM(i->ARMin.VUnaryD.dst);
1757         vex_printf(", ");
1758         ppHRegARM(i->ARMin.VUnaryD.src);
1759         return;
1760      case ARMin_VUnaryS:
1761         vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1762         ppHRegARM(i->ARMin.VUnaryS.dst);
1763         vex_printf(", ");
1764         ppHRegARM(i->ARMin.VUnaryS.src);
1765         return;
1766      case ARMin_VCmpD:
1767         vex_printf("fcmpd ");
1768         ppHRegARM(i->ARMin.VCmpD.argL);
1769         vex_printf(", ");
1770         ppHRegARM(i->ARMin.VCmpD.argR);
1771         vex_printf(" ; fmstat");
1772         return;
1773      case ARMin_VCMovD:
1774         vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1775         ppHRegARM(i->ARMin.VCMovD.dst);
1776         vex_printf(", ");
1777         ppHRegARM(i->ARMin.VCMovD.src);
1778         return;
1779      case ARMin_VCMovS:
1780         vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1781         ppHRegARM(i->ARMin.VCMovS.dst);
1782         vex_printf(", ");
1783         ppHRegARM(i->ARMin.VCMovS.src);
1784         return;
1785      case ARMin_VCvtSD:
1786         vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1787         ppHRegARM(i->ARMin.VCvtSD.dst);
1788         vex_printf(", ");
1789         ppHRegARM(i->ARMin.VCvtSD.src);
1790         return;
1791      case ARMin_VXferD:
1792         vex_printf("vmov  ");
1793         if (i->ARMin.VXferD.toD) {
1794            ppHRegARM(i->ARMin.VXferD.dD);
1795            vex_printf(", ");
1796            ppHRegARM(i->ARMin.VXferD.rLo);
1797            vex_printf(", ");
1798            ppHRegARM(i->ARMin.VXferD.rHi);
1799         } else {
1800            ppHRegARM(i->ARMin.VXferD.rLo);
1801            vex_printf(", ");
1802            ppHRegARM(i->ARMin.VXferD.rHi);
1803            vex_printf(", ");
1804            ppHRegARM(i->ARMin.VXferD.dD);
1805         }
1806         return;
1807      case ARMin_VXferS:
1808         vex_printf("vmov  ");
1809         if (i->ARMin.VXferS.toS) {
1810            ppHRegARM(i->ARMin.VXferS.fD);
1811            vex_printf(", ");
1812            ppHRegARM(i->ARMin.VXferS.rLo);
1813         } else {
1814            ppHRegARM(i->ARMin.VXferS.rLo);
1815            vex_printf(", ");
1816            ppHRegARM(i->ARMin.VXferS.fD);
1817         }
1818         return;
1819      case ARMin_VCvtID: {
1820         HChar* nm = "?";
1821         if (i->ARMin.VCvtID.iToD) {
1822            nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1823         } else {
1824            nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1825         }
1826         vex_printf("%s ", nm);
1827         ppHRegARM(i->ARMin.VCvtID.dst);
1828         vex_printf(", ");
1829         ppHRegARM(i->ARMin.VCvtID.src);
1830         return;
1831      }
1832      case ARMin_FPSCR:
1833         if (i->ARMin.FPSCR.toFPSCR) {
1834            vex_printf("fmxr  fpscr, ");
1835            ppHRegARM(i->ARMin.FPSCR.iReg);
1836         } else {
1837            vex_printf("fmrx  ");
1838            ppHRegARM(i->ARMin.FPSCR.iReg);
1839            vex_printf(", fpscr");
1840         }
1841         return;
1842      case ARMin_MFence:
1843         vex_printf("(mfence) dsb sy; dmb sy; isb");
1844         return;
1845      case ARMin_CLREX:
1846         vex_printf("clrex");
1847         return;
1848      case ARMin_NLdStQ:
1849         if (i->ARMin.NLdStQ.isLoad)
1850            vex_printf("vld1.32 {");
1851         else
1852            vex_printf("vst1.32 {");
1853         ppHRegARM(i->ARMin.NLdStQ.dQ);
1854         vex_printf("} ");
1855         ppARMAModeN(i->ARMin.NLdStQ.amode);
1856         return;
1857      case ARMin_NLdStD:
1858         if (i->ARMin.NLdStD.isLoad)
1859            vex_printf("vld1.32 {");
1860         else
1861            vex_printf("vst1.32 {");
1862         ppHRegARM(i->ARMin.NLdStD.dD);
1863         vex_printf("} ");
1864         ppARMAModeN(i->ARMin.NLdStD.amode);
1865         return;
1866      case ARMin_NUnary:
1867         vex_printf("%s%s%s  ",
1868                    showARMNeonUnOp(i->ARMin.NUnary.op),
1869                    showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1870                    showARMNeonDataSize(i));
1871         ppHRegARM(i->ARMin.NUnary.dst);
1872         vex_printf(", ");
1873         ppHRegARM(i->ARMin.NUnary.src);
1874         if (i->ARMin.NUnary.op == ARMneon_EQZ)
1875            vex_printf(", #0");
1876         if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1877             i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1878             i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1879             i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1880            vex_printf(", #%d", i->ARMin.NUnary.size);
1881         }
1882         if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1883             i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1884             i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1885            UInt size;
1886            size = i->ARMin.NUnary.size;
1887            if (size & 0x40) {
1888               vex_printf(", #%d", size - 64);
1889            } else if (size & 0x20) {
1890               vex_printf(", #%d", size - 32);
1891            } else if (size & 0x10) {
1892               vex_printf(", #%d", size - 16);
1893            } else if (size & 0x08) {
1894               vex_printf(", #%d", size - 8);
1895            }
1896         }
1897         return;
1898      case ARMin_NUnaryS:
1899         vex_printf("%s%s%s  ",
1900                    showARMNeonUnOpS(i->ARMin.NUnaryS.op),
1901                    showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
1902                    showARMNeonDataSize(i));
1903         ppARMNRS(i->ARMin.NUnaryS.dst);
1904         vex_printf(", ");
1905         ppARMNRS(i->ARMin.NUnaryS.src);
1906         return;
1907      case ARMin_NShift:
1908         vex_printf("%s%s%s  ",
1909                    showARMNeonShiftOp(i->ARMin.NShift.op),
1910                    showARMNeonShiftOpDataType(i->ARMin.NShift.op),
1911                    showARMNeonDataSize(i));
1912         ppHRegARM(i->ARMin.NShift.dst);
1913         vex_printf(", ");
1914         ppHRegARM(i->ARMin.NShift.argL);
1915         vex_printf(", ");
1916         ppHRegARM(i->ARMin.NShift.argR);
1917         return;
1918      case ARMin_NDual:
1919         vex_printf("%s%s%s  ",
1920                    showARMNeonDualOp(i->ARMin.NDual.op),
1921                    showARMNeonDualOpDataType(i->ARMin.NDual.op),
1922                    showARMNeonDataSize(i));
1923         ppHRegARM(i->ARMin.NDual.arg1);
1924         vex_printf(", ");
1925         ppHRegARM(i->ARMin.NDual.arg2);
1926         return;
1927      case ARMin_NBinary:
1928         vex_printf("%s%s%s",
1929                    showARMNeonBinOp(i->ARMin.NBinary.op),
1930                    showARMNeonBinOpDataType(i->ARMin.NBinary.op),
1931                    showARMNeonDataSize(i));
1932         vex_printf("  ");
1933         ppHRegARM(i->ARMin.NBinary.dst);
1934         vex_printf(", ");
1935         ppHRegARM(i->ARMin.NBinary.argL);
1936         vex_printf(", ");
1937         ppHRegARM(i->ARMin.NBinary.argR);
1938         return;
1939      case ARMin_NeonImm:
1940         vex_printf("vmov  ");
1941         ppHRegARM(i->ARMin.NeonImm.dst);
1942         vex_printf(", ");
1943         ppARMNImm(i->ARMin.NeonImm.imm);
1944         return;
1945      case ARMin_NCMovQ:
1946         vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
1947         ppHRegARM(i->ARMin.NCMovQ.dst);
1948         vex_printf(", ");
1949         ppHRegARM(i->ARMin.NCMovQ.src);
1950         return;
1951      case ARMin_Add32:
1952         vex_printf("add32 ");
1953         ppHRegARM(i->ARMin.Add32.rD);
1954         vex_printf(", ");
1955         ppHRegARM(i->ARMin.Add32.rN);
1956         vex_printf(", ");
1957         vex_printf("%d", i->ARMin.Add32.imm32);
1958         return;
1959      case ARMin_EvCheck:
1960         vex_printf("(evCheck) ldr r12,");
1961         ppARMAMode1(i->ARMin.EvCheck.amCounter);
1962         vex_printf("; subs r12,r12,$1; str r12,");
1963         ppARMAMode1(i->ARMin.EvCheck.amCounter);
1964         vex_printf("; bpl nofail; ldr r12,");
1965         ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
1966         vex_printf("; bx r12; nofail:");
1967         return;
1968      case ARMin_ProfInc:
1969         vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
1970                    "movw r12,HI16($NotKnownYet); "
1971                    "ldr r11,[r12]; "
1972                    "adds r11,r11,$1; "
1973                    "str r11,[r12]; "
1974                    "ldr r11,[r12+4]; "
1975                    "adc r11,r11,$0; "
1976                    "str r11,[r12+4]");
1977         return;
1978      default:
1979      unhandled:
1980         vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
1981         vpanic("ppARMInstr(1)");
1982         return;
1983   }
1984}
1985
1986
1987/* --------- Helpers for register allocation. --------- */
1988
1989void getRegUsage_ARMInstr ( HRegUsage* u, ARMInstr* i, Bool mode64 )
1990{
1991   vassert(mode64 == False);
1992   initHRegUsage(u);
1993   switch (i->tag) {
1994      case ARMin_Alu:
1995         addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
1996         addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
1997         addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
1998         return;
1999      case ARMin_Shift:
2000         addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
2001         addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
2002         addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
2003         return;
2004      case ARMin_Unary:
2005         addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
2006         addHRegUse(u, HRmRead, i->ARMin.Unary.src);
2007         return;
2008      case ARMin_CmpOrTst:
2009         addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
2010         addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
2011         return;
2012      case ARMin_Mov:
2013         addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
2014         addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
2015         return;
2016      case ARMin_Imm32:
2017         addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
2018         return;
2019      case ARMin_LdSt32:
2020         addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
2021         if (i->ARMin.LdSt32.isLoad) {
2022            addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
2023         } else {
2024            addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2025         }
2026         return;
2027      case ARMin_LdSt16:
2028         addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
2029         if (i->ARMin.LdSt16.isLoad) {
2030            addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
2031         } else {
2032            addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2033         }
2034         return;
2035      case ARMin_LdSt8U:
2036         addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
2037         if (i->ARMin.LdSt8U.isLoad) {
2038            addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
2039         } else {
2040            addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2041         }
2042         return;
2043      case ARMin_Ld8S:
2044         goto unhandled;
2045      /* XDirect/XIndir/XAssisted are also a bit subtle.  They
2046         conditionally exit the block.  Hence we only need to list (1)
2047         the registers that they read, and (2) the registers that they
2048         write in the case where the block is not exited.  (2) is
2049         empty, hence only (1) is relevant here. */
2050      case ARMin_XDirect:
2051         addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
2052         return;
2053      case ARMin_XIndir:
2054         addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
2055         addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
2056         return;
2057      case ARMin_XAssisted:
2058         addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
2059         addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
2060         return;
2061      case ARMin_CMov:
2062         addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
2063         addHRegUse(u, HRmRead,  i->ARMin.CMov.dst);
2064         addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
2065         return;
2066      case ARMin_Call:
2067         /* logic and comments copied/modified from x86 back end */
2068         /* This is a bit subtle. */
2069         /* First off, claim it trashes all the caller-saved regs
2070            which fall within the register allocator's jurisdiction.
2071            These I believe to be r0,1,2,3.  If it turns out that r9
2072            is also caller-saved, then we'll have to add that here
2073            too. */
2074         addHRegUse(u, HRmWrite, hregARM_R0());
2075         addHRegUse(u, HRmWrite, hregARM_R1());
2076         addHRegUse(u, HRmWrite, hregARM_R2());
2077         addHRegUse(u, HRmWrite, hregARM_R3());
2078         /* Now we have to state any parameter-carrying registers
2079            which might be read.  This depends on nArgRegs. */
2080         switch (i->ARMin.Call.nArgRegs) {
2081            case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
2082            case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
2083            case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
2084            case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
2085            case 0: break;
2086            default: vpanic("getRegUsage_ARM:Call:regparms");
2087         }
2088         /* Finally, there is the issue that the insn trashes a
2089            register because the literal target address has to be
2090            loaded into a register.  Fortunately, for the nArgRegs=
2091            0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
2092            this does not cause any further damage.  For the
2093            nArgRegs=4 case, we'll have to choose another register
2094            arbitrarily since all the caller saved regs are used for
2095            parameters, and so we might as well choose r11.
2096            */
2097         if (i->ARMin.Call.nArgRegs == 4)
2098            addHRegUse(u, HRmWrite, hregARM_R11());
2099         /* Upshot of this is that the assembler really must observe
2100            the here-stated convention of which register to use as an
2101            address temporary, depending on nArgRegs: 0==r0,
2102            1==r1, 2==r2, 3==r3, 4==r11 */
2103         return;
2104      case ARMin_Mul:
2105         addHRegUse(u, HRmRead, hregARM_R2());
2106         addHRegUse(u, HRmRead, hregARM_R3());
2107         addHRegUse(u, HRmWrite, hregARM_R0());
2108         if (i->ARMin.Mul.op != ARMmul_PLAIN)
2109            addHRegUse(u, HRmWrite, hregARM_R1());
2110         return;
2111      case ARMin_Div:
2112         addHRegUse(u, HRmWrite, i->ARMin.Div.dst);
2113         addHRegUse(u, HRmRead, i->ARMin.Div.argL);
2114         addHRegUse(u, HRmRead, i->ARMin.Div.argR);
2115         return;
2116      case ARMin_LdrEX:
2117         addHRegUse(u, HRmRead, hregARM_R4());
2118         addHRegUse(u, HRmWrite, hregARM_R2());
2119         if (i->ARMin.LdrEX.szB == 8)
2120            addHRegUse(u, HRmWrite, hregARM_R3());
2121         return;
2122      case ARMin_StrEX:
2123         addHRegUse(u, HRmRead, hregARM_R4());
2124         addHRegUse(u, HRmWrite, hregARM_R0());
2125         addHRegUse(u, HRmRead, hregARM_R2());
2126         if (i->ARMin.StrEX.szB == 8)
2127            addHRegUse(u, HRmRead, hregARM_R3());
2128         return;
2129      case ARMin_VLdStD:
2130         addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2131         if (i->ARMin.VLdStD.isLoad) {
2132            addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2133         } else {
2134            addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2135         }
2136         return;
2137      case ARMin_VLdStS:
2138         addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2139         if (i->ARMin.VLdStS.isLoad) {
2140            addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2141         } else {
2142            addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2143         }
2144         return;
2145      case ARMin_VAluD:
2146         addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2147         addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2148         addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2149         return;
2150      case ARMin_VAluS:
2151         addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2152         addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2153         addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2154         return;
2155      case ARMin_VUnaryD:
2156         addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2157         addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2158         return;
2159      case ARMin_VUnaryS:
2160         addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2161         addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2162         return;
2163      case ARMin_VCmpD:
2164         addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2165         addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2166         return;
2167      case ARMin_VCMovD:
2168         addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2169         addHRegUse(u, HRmRead,  i->ARMin.VCMovD.dst);
2170         addHRegUse(u, HRmRead,  i->ARMin.VCMovD.src);
2171         return;
2172      case ARMin_VCMovS:
2173         addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2174         addHRegUse(u, HRmRead,  i->ARMin.VCMovS.dst);
2175         addHRegUse(u, HRmRead,  i->ARMin.VCMovS.src);
2176         return;
2177      case ARMin_VCvtSD:
2178         addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2179         addHRegUse(u, HRmRead,  i->ARMin.VCvtSD.src);
2180         return;
2181      case ARMin_VXferD:
2182         if (i->ARMin.VXferD.toD) {
2183            addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2184            addHRegUse(u, HRmRead,  i->ARMin.VXferD.rHi);
2185            addHRegUse(u, HRmRead,  i->ARMin.VXferD.rLo);
2186         } else {
2187            addHRegUse(u, HRmRead,  i->ARMin.VXferD.dD);
2188            addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2189            addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2190         }
2191         return;
2192      case ARMin_VXferS:
2193         if (i->ARMin.VXferS.toS) {
2194            addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2195            addHRegUse(u, HRmRead,  i->ARMin.VXferS.rLo);
2196         } else {
2197            addHRegUse(u, HRmRead,  i->ARMin.VXferS.fD);
2198            addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2199         }
2200         return;
2201      case ARMin_VCvtID:
2202         addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2203         addHRegUse(u, HRmRead,  i->ARMin.VCvtID.src);
2204         return;
2205      case ARMin_FPSCR:
2206         if (i->ARMin.FPSCR.toFPSCR)
2207            addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2208         else
2209            addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2210         return;
2211      case ARMin_MFence:
2212         return;
2213      case ARMin_CLREX:
2214         return;
2215      case ARMin_NLdStQ:
2216         if (i->ARMin.NLdStQ.isLoad)
2217            addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2218         else
2219            addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2220         addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2221         return;
2222      case ARMin_NLdStD:
2223         if (i->ARMin.NLdStD.isLoad)
2224            addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2225         else
2226            addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2227         addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2228         return;
2229      case ARMin_NUnary:
2230         addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2231         addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2232         return;
2233      case ARMin_NUnaryS:
2234         addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2235         addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2236         return;
2237      case ARMin_NShift:
2238         addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2239         addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2240         addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2241         return;
2242      case ARMin_NDual:
2243         addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2244         addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2245         addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2246         addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2247         return;
2248      case ARMin_NBinary:
2249         addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2250         /* TODO: sometimes dst is also being read! */
2251         // XXX fix this
2252         addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2253         addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2254         return;
2255      case ARMin_NeonImm:
2256         addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2257         return;
2258      case ARMin_NCMovQ:
2259         addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2260         addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.dst);
2261         addHRegUse(u, HRmRead,  i->ARMin.NCMovQ.src);
2262         return;
2263      case ARMin_Add32:
2264         addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2265         addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2266         return;
2267      case ARMin_EvCheck:
2268         /* We expect both amodes only to mention r8, so this is in
2269            fact pointless, since r8 isn't allocatable, but
2270            anyway.. */
2271         addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
2272         addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
2273         addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
2274         return;
2275      case ARMin_ProfInc:
2276         addHRegUse(u, HRmWrite, hregARM_R12());
2277         addHRegUse(u, HRmWrite, hregARM_R11());
2278         return;
2279      unhandled:
2280      default:
2281         ppARMInstr(i);
2282         vpanic("getRegUsage_ARMInstr");
2283   }
2284}
2285
2286
2287void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2288{
2289   vassert(mode64 == False);
2290   switch (i->tag) {
2291      case ARMin_Alu:
2292         i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2293         i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2294         mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2295         return;
2296      case ARMin_Shift:
2297         i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2298         i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2299         mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2300         return;
2301      case ARMin_Unary:
2302         i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2303         i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2304         return;
2305      case ARMin_CmpOrTst:
2306         i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2307         mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2308         return;
2309      case ARMin_Mov:
2310         i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2311         mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2312         return;
2313      case ARMin_Imm32:
2314         i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2315         return;
2316      case ARMin_LdSt32:
2317         i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2318         mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2319         return;
2320      case ARMin_LdSt16:
2321         i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2322         mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2323         return;
2324      case ARMin_LdSt8U:
2325         i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2326         mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2327         return;
2328      case ARMin_Ld8S:
2329         goto unhandled;
2330      case ARMin_XDirect:
2331         mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
2332         return;
2333      case ARMin_XIndir:
2334         i->ARMin.XIndir.dstGA
2335            = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
2336         mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
2337         return;
2338      case ARMin_XAssisted:
2339         i->ARMin.XAssisted.dstGA
2340            = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
2341         mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
2342         return;
2343      case ARMin_CMov:
2344         i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2345         mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2346         return;
2347      case ARMin_Call:
2348         return;
2349      case ARMin_Mul:
2350         return;
2351      case ARMin_Div:
2352         i->ARMin.Div.dst = lookupHRegRemap(m, i->ARMin.Div.dst);
2353         i->ARMin.Div.argL = lookupHRegRemap(m, i->ARMin.Div.argL);
2354         i->ARMin.Div.argR = lookupHRegRemap(m, i->ARMin.Div.argR);
2355         return;
2356      case ARMin_LdrEX:
2357         return;
2358      case ARMin_StrEX:
2359         return;
2360      case ARMin_VLdStD:
2361         i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2362         mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2363         return;
2364      case ARMin_VLdStS:
2365         i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2366         mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2367         return;
2368      case ARMin_VAluD:
2369         i->ARMin.VAluD.dst  = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2370         i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2371         i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2372         return;
2373      case ARMin_VAluS:
2374         i->ARMin.VAluS.dst  = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2375         i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2376         i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2377         return;
2378      case ARMin_VUnaryD:
2379         i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2380         i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2381         return;
2382      case ARMin_VUnaryS:
2383         i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2384         i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2385         return;
2386      case ARMin_VCmpD:
2387         i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2388         i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2389         return;
2390      case ARMin_VCMovD:
2391         i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2392         i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2393         return;
2394      case ARMin_VCMovS:
2395         i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2396         i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2397         return;
2398      case ARMin_VCvtSD:
2399         i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2400         i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2401         return;
2402      case ARMin_VXferD:
2403         i->ARMin.VXferD.dD  = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2404         i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2405         i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2406         return;
2407      case ARMin_VXferS:
2408         i->ARMin.VXferS.fD  = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2409         i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2410         return;
2411      case ARMin_VCvtID:
2412         i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2413         i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2414         return;
2415      case ARMin_FPSCR:
2416         i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2417         return;
2418      case ARMin_MFence:
2419         return;
2420      case ARMin_CLREX:
2421         return;
2422      case ARMin_NLdStQ:
2423         i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2424         mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2425         return;
2426      case ARMin_NLdStD:
2427         i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2428         mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2429         return;
2430      case ARMin_NUnary:
2431         i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2432         i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2433         return;
2434      case ARMin_NUnaryS:
2435         i->ARMin.NUnaryS.src->reg
2436            = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2437         i->ARMin.NUnaryS.dst->reg
2438            = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2439         return;
2440      case ARMin_NShift:
2441         i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2442         i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2443         i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2444         return;
2445      case ARMin_NDual:
2446         i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2447         i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2448         return;
2449      case ARMin_NBinary:
2450         i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2451         i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2452         i->ARMin.NBinary.dst  = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2453         return;
2454      case ARMin_NeonImm:
2455         i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2456         return;
2457      case ARMin_NCMovQ:
2458         i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2459         i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2460         return;
2461      case ARMin_Add32:
2462         i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2463         i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2464         return;
2465      case ARMin_EvCheck:
2466         /* We expect both amodes only to mention r8, so this is in
2467            fact pointless, since r8 isn't allocatable, but
2468            anyway.. */
2469         mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
2470         mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
2471         return;
2472      case ARMin_ProfInc:
2473         /* hardwires r11 and r12 -- nothing to modify. */
2474         return;
2475      unhandled:
2476      default:
2477         ppARMInstr(i);
2478         vpanic("mapRegs_ARMInstr");
2479   }
2480}
2481
2482/* Figure out if i represents a reg-reg move, and if so assign the
2483   source and destination to *src and *dst.  If in doubt say No.  Used
2484   by the register allocator to do move coalescing.
2485*/
2486Bool isMove_ARMInstr ( ARMInstr* i, HReg* src, HReg* dst )
2487{
2488   /* Moves between integer regs */
2489   switch (i->tag) {
2490      case ARMin_Mov:
2491         if (i->ARMin.Mov.src->tag == ARMri84_R) {
2492            *src = i->ARMin.Mov.src->ARMri84.R.reg;
2493            *dst = i->ARMin.Mov.dst;
2494            return True;
2495         }
2496         break;
2497      case ARMin_VUnaryD:
2498         if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2499            *src = i->ARMin.VUnaryD.src;
2500            *dst = i->ARMin.VUnaryD.dst;
2501            return True;
2502         }
2503         break;
2504      case ARMin_VUnaryS:
2505         if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2506            *src = i->ARMin.VUnaryS.src;
2507            *dst = i->ARMin.VUnaryS.dst;
2508            return True;
2509         }
2510         break;
2511      case ARMin_NUnary:
2512         if (i->ARMin.NUnary.op == ARMneon_COPY) {
2513            *src = i->ARMin.NUnary.src;
2514            *dst = i->ARMin.NUnary.dst;
2515            return True;
2516         }
2517         break;
2518      default:
2519         break;
2520   }
2521
2522   return False;
2523}
2524
2525
2526/* Generate arm spill/reload instructions under the direction of the
2527   register allocator.  Note it's critical these don't write the
2528   condition codes. */
2529
2530void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2531                    HReg rreg, Int offsetB, Bool mode64 )
2532{
2533   HRegClass rclass;
2534   vassert(offsetB >= 0);
2535   vassert(!hregIsVirtual(rreg));
2536   vassert(mode64 == False);
2537   *i1 = *i2 = NULL;
2538   rclass = hregClass(rreg);
2539   switch (rclass) {
2540      case HRcInt32:
2541         vassert(offsetB <= 4095);
2542         *i1 = ARMInstr_LdSt32( False/*!isLoad*/,
2543                                rreg,
2544                                ARMAMode1_RI(hregARM_R8(), offsetB) );
2545         return;
2546      case HRcFlt32:
2547      case HRcFlt64: {
2548         HReg r8   = hregARM_R8();  /* baseblock */
2549         HReg r12  = hregARM_R12(); /* spill temp */
2550         HReg base = r8;
2551         vassert(0 == (offsetB & 3));
2552         if (offsetB >= 1024) {
2553            Int offsetKB = offsetB / 1024;
2554            /* r12 = r8 + (1024 * offsetKB) */
2555            *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2556                               ARMRI84_I84(offsetKB, 11));
2557            offsetB -= (1024 * offsetKB);
2558            base = r12;
2559         }
2560         vassert(offsetB <= 1020);
2561         if (rclass == HRcFlt32) {
2562            *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2563                                   rreg,
2564                                   mkARMAModeV(base, offsetB) );
2565         } else {
2566            *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2567                                   rreg,
2568                                   mkARMAModeV(base, offsetB) );
2569         }
2570         return;
2571      }
2572      case HRcVec128: {
2573         HReg r8  = hregARM_R8();
2574         HReg r12 = hregARM_R12();
2575         *i1 = ARMInstr_Add32(r12, r8, offsetB);
2576         *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2577         return;
2578      }
2579      default:
2580         ppHRegClass(rclass);
2581         vpanic("genSpill_ARM: unimplemented regclass");
2582   }
2583}
2584
2585void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2586                     HReg rreg, Int offsetB, Bool mode64 )
2587{
2588   HRegClass rclass;
2589   vassert(offsetB >= 0);
2590   vassert(!hregIsVirtual(rreg));
2591   vassert(mode64 == False);
2592   *i1 = *i2 = NULL;
2593   rclass = hregClass(rreg);
2594   switch (rclass) {
2595      case HRcInt32:
2596         vassert(offsetB <= 4095);
2597         *i1 = ARMInstr_LdSt32( True/*isLoad*/,
2598                                rreg,
2599                                ARMAMode1_RI(hregARM_R8(), offsetB) );
2600         return;
2601      case HRcFlt32:
2602      case HRcFlt64: {
2603         HReg r8   = hregARM_R8();  /* baseblock */
2604         HReg r12  = hregARM_R12(); /* spill temp */
2605         HReg base = r8;
2606         vassert(0 == (offsetB & 3));
2607         if (offsetB >= 1024) {
2608            Int offsetKB = offsetB / 1024;
2609            /* r12 = r8 + (1024 * offsetKB) */
2610            *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2611                               ARMRI84_I84(offsetKB, 11));
2612            offsetB -= (1024 * offsetKB);
2613            base = r12;
2614         }
2615         vassert(offsetB <= 1020);
2616         if (rclass == HRcFlt32) {
2617            *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2618                                   rreg,
2619                                   mkARMAModeV(base, offsetB) );
2620         } else {
2621            *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2622                                   rreg,
2623                                   mkARMAModeV(base, offsetB) );
2624         }
2625         return;
2626      }
2627      case HRcVec128: {
2628         HReg r8  = hregARM_R8();
2629         HReg r12 = hregARM_R12();
2630         *i1 = ARMInstr_Add32(r12, r8, offsetB);
2631         *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2632         return;
2633      }
2634      default:
2635         ppHRegClass(rclass);
2636         vpanic("genReload_ARM: unimplemented regclass");
2637   }
2638}
2639
2640
2641/* Emit an instruction into buf and return the number of bytes used.
2642   Note that buf is not the insn's final place, and therefore it is
2643   imperative to emit position-independent code. */
2644
2645static inline UChar iregNo ( HReg r )
2646{
2647   UInt n;
2648   vassert(hregClass(r) == HRcInt32);
2649   vassert(!hregIsVirtual(r));
2650   n = hregNumber(r);
2651   vassert(n <= 15);
2652   return toUChar(n);
2653}
2654
2655static inline UChar dregNo ( HReg r )
2656{
2657   UInt n;
2658   if (hregClass(r) != HRcFlt64)
2659      ppHRegClass(hregClass(r));
2660   vassert(hregClass(r) == HRcFlt64);
2661   vassert(!hregIsVirtual(r));
2662   n = hregNumber(r);
2663   vassert(n <= 31);
2664   return toUChar(n);
2665}
2666
2667static inline UChar fregNo ( HReg r )
2668{
2669   UInt n;
2670   vassert(hregClass(r) == HRcFlt32);
2671   vassert(!hregIsVirtual(r));
2672   n = hregNumber(r);
2673   vassert(n <= 31);
2674   return toUChar(n);
2675}
2676
2677static inline UChar qregNo ( HReg r )
2678{
2679   UInt n;
2680   vassert(hregClass(r) == HRcVec128);
2681   vassert(!hregIsVirtual(r));
2682   n = hregNumber(r);
2683   vassert(n <= 15);
2684   return toUChar(n);
2685}
2686
2687#define BITS4(zzb3,zzb2,zzb1,zzb0) \
2688   (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2689#define X0000  BITS4(0,0,0,0)
2690#define X0001  BITS4(0,0,0,1)
2691#define X0010  BITS4(0,0,1,0)
2692#define X0011  BITS4(0,0,1,1)
2693#define X0100  BITS4(0,1,0,0)
2694#define X0101  BITS4(0,1,0,1)
2695#define X0110  BITS4(0,1,1,0)
2696#define X0111  BITS4(0,1,1,1)
2697#define X1000  BITS4(1,0,0,0)
2698#define X1001  BITS4(1,0,0,1)
2699#define X1010  BITS4(1,0,1,0)
2700#define X1011  BITS4(1,0,1,1)
2701#define X1100  BITS4(1,1,0,0)
2702#define X1101  BITS4(1,1,0,1)
2703#define X1110  BITS4(1,1,1,0)
2704#define X1111  BITS4(1,1,1,1)
2705
2706#define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2707   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2708    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2709    (((zzx3) & 0xF) << 12))
2710
2711#define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2)        \
2712   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2713    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2714    (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8))
2715
2716#define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0)        \
2717   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2718    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2719    (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) <<  0))
2720
2721#define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2722  ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2723   (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2724   (((zzx0) & 0xF) << 0))
2725
2726#define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0)  \
2727   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) |  \
2728    (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) |  \
2729    (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) <<  8) |  \
2730    (((zzx1) & 0xF) <<  4) | (((zzx0) & 0xF) <<  0))
2731
2732#define XX______(zzx7,zzx6) \
2733   ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2734
2735/* Generate a skeletal insn that involves an a RI84 shifter operand.
2736   Returns a word which is all zeroes apart from bits 25 and 11..0,
2737   since it is those that encode the shifter operand (at least to the
2738   extent that we care about it.) */
2739static UInt skeletal_RI84 ( ARMRI84* ri )
2740{
2741   UInt instr;
2742   if (ri->tag == ARMri84_I84) {
2743      vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2744      vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2745      instr = 1 << 25;
2746      instr |= (ri->ARMri84.I84.imm4 << 8);
2747      instr |= ri->ARMri84.I84.imm8;
2748   } else {
2749      instr = 0 << 25;
2750      instr |= iregNo(ri->ARMri84.R.reg);
2751   }
2752   return instr;
2753}
2754
2755/* Ditto for RI5.  Resulting word is zeroes apart from bit 4 and bits
2756   11..7. */
2757static UInt skeletal_RI5 ( ARMRI5* ri )
2758{
2759   UInt instr;
2760   if (ri->tag == ARMri5_I5) {
2761      UInt imm5 = ri->ARMri5.I5.imm5;
2762      vassert(imm5 >= 1 && imm5 <= 31);
2763      instr = 0 << 4;
2764      instr |= imm5 << 7;
2765   } else {
2766      instr = 1 << 4;
2767      instr |= iregNo(ri->ARMri5.R.reg) << 8;
2768   }
2769   return instr;
2770}
2771
2772
2773/* Get an immediate into a register, using only that
2774   register.  (very lame..) */
2775static UInt* imm32_to_iregNo ( UInt* p, Int rD, UInt imm32 )
2776{
2777   UInt instr;
2778   vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2779#if 0
2780   if (0 == (imm32 & ~0xFF)) {
2781      /* mov with a immediate shifter operand of (0, imm32) (??) */
2782      instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2783      instr |= imm32;
2784      *p++ = instr;
2785   } else {
2786      // this is very bad; causes Dcache pollution
2787      // ldr  rD, [pc]
2788      instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2789      *p++ = instr;
2790      // b .+8
2791      instr = 0xEA000000;
2792      *p++ = instr;
2793      // .word imm32
2794      *p++ = imm32;
2795   }
2796#else
2797   if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2798      /* Generate movw rD, #low16.  Then, if the high 16 are
2799         nonzero, generate movt rD, #high16. */
2800      UInt lo16 = imm32 & 0xFFFF;
2801      UInt hi16 = (imm32 >> 16) & 0xFFFF;
2802      instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2803                       (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2804                       lo16 & 0xF);
2805      *p++ = instr;
2806      if (hi16 != 0) {
2807         instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2808                          (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2809                          hi16 & 0xF);
2810         *p++ = instr;
2811      }
2812   } else {
2813      UInt imm, rot;
2814      UInt op = X1010;
2815      UInt rN = 0;
2816      if ((imm32 & 0xFF) || (imm32 == 0)) {
2817         imm = imm32 & 0xFF;
2818         rot = 0;
2819         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2820         *p++ = instr;
2821         op = X1000;
2822         rN = rD;
2823      }
2824      if (imm32 & 0xFF000000) {
2825         imm = (imm32 >> 24) & 0xFF;
2826         rot = 4;
2827         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2828         *p++ = instr;
2829         op = X1000;
2830         rN = rD;
2831      }
2832      if (imm32 & 0xFF0000) {
2833         imm = (imm32 >> 16) & 0xFF;
2834         rot = 8;
2835         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2836         *p++ = instr;
2837         op = X1000;
2838         rN = rD;
2839      }
2840      if (imm32 & 0xFF00) {
2841         imm = (imm32 >> 8) & 0xFF;
2842         rot = 12;
2843         instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2844         *p++ = instr;
2845         op = X1000;
2846         rN = rD;
2847      }
2848   }
2849#endif
2850   return p;
2851}
2852
2853/* Get an immediate into a register, using only that register, and
2854   generating exactly 2 instructions, regardless of the value of the
2855   immediate. This is used when generating sections of code that need
2856   to be patched later, so as to guarantee a specific size. */
2857static UInt* imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2858{
2859   if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2860      /* Generate movw rD, #low16 ;  movt rD, #high16. */
2861      UInt lo16 = imm32 & 0xFFFF;
2862      UInt hi16 = (imm32 >> 16) & 0xFFFF;
2863      UInt instr;
2864      instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2865                       (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2866                       lo16 & 0xF);
2867      *p++ = instr;
2868      instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2869                       (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2870                       hi16 & 0xF);
2871      *p++ = instr;
2872   } else {
2873      vassert(0); /* lose */
2874   }
2875   return p;
2876}
2877
2878/* Check whether p points at a 2-insn sequence cooked up by
2879   imm32_to_iregNo_EXACTLY2(). */
2880static Bool is_imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2881{
2882   if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2883      /* Generate movw rD, #low16 ;  movt rD, #high16. */
2884      UInt lo16 = imm32 & 0xFFFF;
2885      UInt hi16 = (imm32 >> 16) & 0xFFFF;
2886      UInt i0, i1;
2887      i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2888                    (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2889                    lo16 & 0xF);
2890      i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2891                    (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2892                    hi16 & 0xF);
2893      return p[0] == i0 && p[1] == i1;
2894   } else {
2895      vassert(0); /* lose */
2896   }
2897}
2898
2899
2900static UInt* do_load_or_store32 ( UInt* p,
2901                                  Bool isLoad, UInt rD, ARMAMode1* am )
2902{
2903   vassert(rD <= 12);
2904   vassert(am->tag == ARMam1_RI); // RR case is not handled
2905   UInt bB = 0;
2906   UInt bL = isLoad ? 1 : 0;
2907   Int  simm12;
2908   UInt instr, bP;
2909   if (am->ARMam1.RI.simm13 < 0) {
2910      bP = 0;
2911      simm12 = -am->ARMam1.RI.simm13;
2912   } else {
2913      bP = 1;
2914      simm12 = am->ARMam1.RI.simm13;
2915   }
2916   vassert(simm12 >= 0 && simm12 <= 4095);
2917   instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
2918                    iregNo(am->ARMam1.RI.reg),
2919                    rD);
2920   instr |= simm12;
2921   *p++ = instr;
2922   return p;
2923}
2924
2925
2926/* Emit an instruction into buf and return the number of bytes used.
2927   Note that buf is not the insn's final place, and therefore it is
2928   imperative to emit position-independent code.  If the emitted
2929   instruction was a profiler inc, set *is_profInc to True, else
2930   leave it unchanged. */
2931
2932Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
2933                    UChar* buf, Int nbuf, ARMInstr* i,
2934                    Bool mode64,
2935                    void* disp_cp_chain_me_to_slowEP,
2936                    void* disp_cp_chain_me_to_fastEP,
2937                    void* disp_cp_xindir,
2938                    void* disp_cp_xassisted )
2939{
2940   UInt* p = (UInt*)buf;
2941   vassert(nbuf >= 32);
2942   vassert(mode64 == False);
2943   vassert(0 == (((HWord)buf) & 3));
2944
2945   switch (i->tag) {
2946      case ARMin_Alu: {
2947         UInt     instr, subopc;
2948         UInt     rD   = iregNo(i->ARMin.Alu.dst);
2949         UInt     rN   = iregNo(i->ARMin.Alu.argL);
2950         ARMRI84* argR = i->ARMin.Alu.argR;
2951         switch (i->ARMin.Alu.op) {
2952            case ARMalu_ADDS: /* fallthru */
2953            case ARMalu_ADD:  subopc = X0100; break;
2954            case ARMalu_ADC:  subopc = X0101; break;
2955            case ARMalu_SUBS: /* fallthru */
2956            case ARMalu_SUB:  subopc = X0010; break;
2957            case ARMalu_SBC:  subopc = X0110; break;
2958            case ARMalu_AND:  subopc = X0000; break;
2959            case ARMalu_BIC:  subopc = X1110; break;
2960            case ARMalu_OR:   subopc = X1100; break;
2961            case ARMalu_XOR:  subopc = X0001; break;
2962            default: goto bad;
2963         }
2964         instr = skeletal_RI84(argR);
2965         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
2966                           (subopc << 1) & 0xF, rN, rD);
2967         if (i->ARMin.Alu.op == ARMalu_ADDS
2968             || i->ARMin.Alu.op == ARMalu_SUBS) {
2969            instr |= 1<<20;  /* set the S bit */
2970         }
2971         *p++ = instr;
2972         goto done;
2973      }
2974      case ARMin_Shift: {
2975         UInt    instr, subopc;
2976         HReg    rD   = iregNo(i->ARMin.Shift.dst);
2977         HReg    rM   = iregNo(i->ARMin.Shift.argL);
2978         ARMRI5* argR = i->ARMin.Shift.argR;
2979         switch (i->ARMin.Shift.op) {
2980            case ARMsh_SHL: subopc = X0000; break;
2981            case ARMsh_SHR: subopc = X0001; break;
2982            case ARMsh_SAR: subopc = X0010; break;
2983            default: goto bad;
2984         }
2985         instr = skeletal_RI5(argR);
2986         instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
2987         instr |= (subopc & 3) << 5;
2988         *p++ = instr;
2989         goto done;
2990      }
2991      case ARMin_Unary: {
2992         UInt instr;
2993         HReg rDst = iregNo(i->ARMin.Unary.dst);
2994         HReg rSrc = iregNo(i->ARMin.Unary.src);
2995         switch (i->ARMin.Unary.op) {
2996            case ARMun_CLZ:
2997               instr = XXXXXXXX(X1110,X0001,X0110,X1111,
2998                                rDst,X1111,X0001,rSrc);
2999               *p++ = instr;
3000               goto done;
3001            case ARMun_NEG: /* RSB rD,rS,#0 */
3002               instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
3003               *p++ = instr;
3004               goto done;
3005            case ARMun_NOT: {
3006               UInt subopc = X1111; /* MVN */
3007               instr = rSrc;
3008               instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3009                                 (subopc << 1) & 0xF, 0, rDst);
3010               *p++ = instr;
3011               goto done;
3012            }
3013            default:
3014               break;
3015         }
3016         goto bad;
3017      }
3018      case ARMin_CmpOrTst: {
3019         UInt instr  = skeletal_RI84(i->ARMin.CmpOrTst.argR);
3020         UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
3021         UInt SBZ    = 0;
3022         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3023                           ((subopc << 1) & 0xF) | 1,
3024                           i->ARMin.CmpOrTst.argL, SBZ );
3025         *p++ = instr;
3026         goto done;
3027      }
3028      case ARMin_Mov: {
3029         UInt instr  = skeletal_RI84(i->ARMin.Mov.src);
3030         UInt subopc = X1101; /* MOV */
3031         UInt SBZ    = 0;
3032         instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3033                           (subopc << 1) & 0xF, SBZ, i->ARMin.Mov.dst);
3034         *p++ = instr;
3035         goto done;
3036      }
3037      case ARMin_Imm32: {
3038         p = imm32_to_iregNo( (UInt*)p, iregNo(i->ARMin.Imm32.dst),
3039                                        i->ARMin.Imm32.imm32 );
3040         goto done;
3041      }
3042      case ARMin_LdSt32:
3043      case ARMin_LdSt8U: {
3044         UInt       bL, bB;
3045         HReg       rD;
3046         ARMAMode1* am;
3047         if (i->tag == ARMin_LdSt32) {
3048            bB = 0;
3049            bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
3050            am = i->ARMin.LdSt32.amode;
3051            rD = i->ARMin.LdSt32.rD;
3052         } else {
3053            bB = 1;
3054            bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
3055            am = i->ARMin.LdSt8U.amode;
3056            rD = i->ARMin.LdSt8U.rD;
3057         }
3058         if (am->tag == ARMam1_RI) {
3059            Int  simm12;
3060            UInt instr, bP;
3061            if (am->ARMam1.RI.simm13 < 0) {
3062               bP = 0;
3063               simm12 = -am->ARMam1.RI.simm13;
3064            } else {
3065               bP = 1;
3066               simm12 = am->ARMam1.RI.simm13;
3067            }
3068            vassert(simm12 >= 0 && simm12 <= 4095);
3069            instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
3070                             iregNo(am->ARMam1.RI.reg),
3071                             iregNo(rD));
3072            instr |= simm12;
3073            *p++ = instr;
3074            goto done;
3075         } else {
3076            // RR case
3077            goto bad;
3078         }
3079      }
3080      case ARMin_LdSt16: {
3081         HReg       rD = i->ARMin.LdSt16.rD;
3082         UInt       bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
3083         UInt       bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
3084         ARMAMode2* am = i->ARMin.LdSt16.amode;
3085         if (am->tag == ARMam2_RI) {
3086            HReg rN = am->ARMam2.RI.reg;
3087            Int  simm8;
3088            UInt bP, imm8hi, imm8lo, instr;
3089            if (am->ARMam2.RI.simm9 < 0) {
3090               bP = 0;
3091               simm8 = -am->ARMam2.RI.simm9;
3092            } else {
3093               bP = 1;
3094               simm8 = am->ARMam2.RI.simm9;
3095            }
3096            vassert(simm8 >= 0 && simm8 <= 255);
3097            imm8hi = (simm8 >> 4) & 0xF;
3098            imm8lo = simm8 & 0xF;
3099            vassert(!(bL == 0 && bS == 1)); // "! signed store"
3100            /**/ if (bL == 0 && bS == 0) {
3101               // strh
3102               instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,0), iregNo(rN),
3103                                iregNo(rD), imm8hi, X1011, imm8lo);
3104               *p++ = instr;
3105               goto done;
3106            }
3107            else if (bL == 1 && bS == 0) {
3108               // ldrh
3109               instr = XXXXXXXX(X1110,X0001, BITS4(bP,1,0,1), iregNo(rN),
3110                                iregNo(rD), imm8hi, X1011, imm8lo);
3111               *p++ = instr;
3112               goto done;
3113            }
3114            else if (bL == 1 && bS == 1) {
3115               goto bad;
3116            }
3117            else vassert(0); // ill-constructed insn
3118         } else {
3119            // RR case
3120            goto bad;
3121         }
3122      }
3123      case ARMin_Ld8S:
3124         goto bad;
3125
3126      case ARMin_XDirect: {
3127         /* NB: what goes on here has to be very closely coordinated
3128            with the chainXDirect_ARM and unchainXDirect_ARM below. */
3129         /* We're generating chain-me requests here, so we need to be
3130            sure this is actually allowed -- no-redir translations
3131            can't use chain-me's.  Hence: */
3132         vassert(disp_cp_chain_me_to_slowEP != NULL);
3133         vassert(disp_cp_chain_me_to_fastEP != NULL);
3134
3135         /* Use ptmp for backpatching conditional jumps. */
3136         UInt* ptmp = NULL;
3137
3138         /* First off, if this is conditional, create a conditional
3139            jump over the rest of it.  Or at least, leave a space for
3140            it that we will shortly fill in. */
3141         if (i->ARMin.XDirect.cond != ARMcc_AL) {
3142            vassert(i->ARMin.XDirect.cond != ARMcc_NV);
3143            ptmp = p;
3144            *p++ = 0;
3145         }
3146
3147         /* Update the guest R15T. */
3148         /* movw r12, lo16(dstGA) */
3149         /* movt r12, hi16(dstGA) */
3150         /* str r12, amR15T */
3151         p = imm32_to_iregNo(p, /*r*/12, i->ARMin.XDirect.dstGA);
3152         p = do_load_or_store32(p, False/*!isLoad*/,
3153                                /*r*/12, i->ARMin.XDirect.amR15T);
3154
3155         /* --- FIRST PATCHABLE BYTE follows --- */
3156         /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3157            calling to) backs up the return address, so as to find the
3158            address of the first patchable byte.  So: don't change the
3159            number of instructions (3) below. */
3160         /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3161         /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3162         /* blx  r12  (A1) */
3163         void* disp_cp_chain_me
3164                  = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3165                                              : disp_cp_chain_me_to_slowEP;
3166         p = imm32_to_iregNo_EXACTLY2(p, /*r*/12,
3167                                      (UInt)Ptr_to_ULong(disp_cp_chain_me));
3168         *p++ = 0xE12FFF3C;
3169         /* --- END of PATCHABLE BYTES --- */
3170
3171         /* Fix up the conditional jump, if there was one. */
3172         if (i->ARMin.XDirect.cond != ARMcc_AL) {
3173            Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3174            vassert(delta > 0 && delta < 40);
3175            vassert((delta & 3) == 0);
3176            UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
3177            vassert(notCond <= 13); /* Neither AL nor NV */
3178            delta = (delta >> 2) - 2;
3179            *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3180         }
3181         goto done;
3182      }
3183
3184      case ARMin_XIndir: {
3185         /* We're generating transfers that could lead indirectly to a
3186            chain-me, so we need to be sure this is actually allowed
3187            -- no-redir translations are not allowed to reach normal
3188            translations without going through the scheduler.  That
3189            means no XDirects or XIndirs out from no-redir
3190            translations.  Hence: */
3191         vassert(disp_cp_xindir != NULL);
3192
3193         /* Use ptmp for backpatching conditional jumps. */
3194         UInt* ptmp = NULL;
3195
3196         /* First off, if this is conditional, create a conditional
3197            jump over the rest of it.  Or at least, leave a space for
3198            it that we will shortly fill in. */
3199         if (i->ARMin.XIndir.cond != ARMcc_AL) {
3200            vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3201            ptmp = p;
3202            *p++ = 0;
3203         }
3204
3205         /* Update the guest R15T. */
3206         /* str r-dstGA, amR15T */
3207         p = do_load_or_store32(p, False/*!isLoad*/,
3208                                iregNo(i->ARMin.XIndir.dstGA),
3209                                i->ARMin.XIndir.amR15T);
3210
3211         /* movw r12, lo16(VG_(disp_cp_xindir)) */
3212         /* movt r12, hi16(VG_(disp_cp_xindir)) */
3213         /* bx   r12  (A1) */
3214         p = imm32_to_iregNo(p, /*r*/12,
3215                             (UInt)Ptr_to_ULong(disp_cp_xindir));
3216         *p++ = 0xE12FFF1C;
3217
3218         /* Fix up the conditional jump, if there was one. */
3219         if (i->ARMin.XIndir.cond != ARMcc_AL) {
3220            Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3221            vassert(delta > 0 && delta < 40);
3222            vassert((delta & 3) == 0);
3223            UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3224            vassert(notCond <= 13); /* Neither AL nor NV */
3225            delta = (delta >> 2) - 2;
3226            *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3227         }
3228         goto done;
3229      }
3230
3231      case ARMin_XAssisted: {
3232         /* Use ptmp for backpatching conditional jumps. */
3233         UInt* ptmp = NULL;
3234
3235         /* First off, if this is conditional, create a conditional
3236            jump over the rest of it.  Or at least, leave a space for
3237            it that we will shortly fill in. */
3238         if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3239            vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
3240            ptmp = p;
3241            *p++ = 0;
3242         }
3243
3244         /* Update the guest R15T. */
3245         /* str r-dstGA, amR15T */
3246         p = do_load_or_store32(p, False/*!isLoad*/,
3247                                iregNo(i->ARMin.XAssisted.dstGA),
3248                                i->ARMin.XAssisted.amR15T);
3249
3250         /* movw r8,  $magic_number */
3251         UInt trcval = 0;
3252         switch (i->ARMin.XAssisted.jk) {
3253            case Ijk_ClientReq:   trcval = VEX_TRC_JMP_CLIENTREQ;   break;
3254            case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3255            //case Ijk_Sys_int128:  trcval = VEX_TRC_JMP_SYS_INT128;  break;
3256            //case Ijk_Yield:       trcval = VEX_TRC_JMP_YIELD;       break;
3257            //case Ijk_EmWarn:      trcval = VEX_TRC_JMP_EMWARN;      break;
3258            //case Ijk_MapFail:     trcval = VEX_TRC_JMP_MAPFAIL;     break;
3259            case Ijk_NoDecode:    trcval = VEX_TRC_JMP_NODECODE;    break;
3260            case Ijk_TInval:      trcval = VEX_TRC_JMP_TINVAL;      break;
3261            case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
3262            //case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
3263            //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
3264            case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
3265            /* We don't expect to see the following being assisted. */
3266            //case Ijk_Ret:
3267            //case Ijk_Call:
3268            /* fallthrough */
3269            default:
3270               ppIRJumpKind(i->ARMin.XAssisted.jk);
3271               vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
3272         }
3273         vassert(trcval != 0);
3274         p = imm32_to_iregNo(p, /*r*/8, trcval);
3275
3276         /* movw r12, lo16(VG_(disp_cp_xassisted)) */
3277         /* movt r12, hi16(VG_(disp_cp_xassisted)) */
3278         /* bx   r12  (A1) */
3279         p = imm32_to_iregNo(p, /*r*/12,
3280                             (UInt)Ptr_to_ULong(disp_cp_xassisted));
3281         *p++ = 0xE12FFF1C;
3282
3283         /* Fix up the conditional jump, if there was one. */
3284         if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3285            Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3286            vassert(delta > 0 && delta < 40);
3287            vassert((delta & 3) == 0);
3288            UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
3289            vassert(notCond <= 13); /* Neither AL nor NV */
3290            delta = (delta >> 2) - 2;
3291            *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3292         }
3293         goto done;
3294      }
3295
3296      case ARMin_CMov: {
3297         UInt instr  = skeletal_RI84(i->ARMin.CMov.src);
3298         UInt subopc = X1101; /* MOV */
3299         UInt SBZ    = 0;
3300         instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
3301                           (subopc << 1) & 0xF, SBZ, i->ARMin.CMov.dst);
3302         *p++ = instr;
3303         goto done;
3304      }
3305      case ARMin_Call: {
3306         UInt instr;
3307         /* Decide on a scratch reg used to hold to the call address.
3308            This has to be done as per the comments in getRegUsage. */
3309         Int scratchNo;
3310         switch (i->ARMin.Call.nArgRegs) {
3311            case 0:  scratchNo = 0;  break;
3312            case 1:  scratchNo = 1;  break;
3313            case 2:  scratchNo = 2;  break;
3314            case 3:  scratchNo = 3;  break;
3315            case 4:  scratchNo = 11; break;
3316            default: vassert(0);
3317         }
3318         // r"scratchNo" = &target
3319         p = imm32_to_iregNo( (UInt*)p,
3320                              scratchNo, (UInt)i->ARMin.Call.target );
3321         // blx{cond} r"scratchNo"
3322         instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
3323                          X0011, scratchNo);
3324         instr |= 0xFFF << 8; // stick in the SBOnes
3325         *p++ = instr;
3326         goto done;
3327      }
3328      case ARMin_Mul: {
3329         /* E0000392   mul     r0, r2, r3
3330            E0810392   umull   r0(LO), r1(HI), r2, r3
3331            E0C10392   smull   r0(LO), r1(HI), r2, r3
3332         */
3333         switch (i->ARMin.Mul.op) {
3334            case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
3335            case ARMmul_ZX:    *p++ = 0xE0810392; goto done;
3336            case ARMmul_SX:    *p++ = 0xE0C10392; goto done;
3337            default: vassert(0);
3338         }
3339         goto bad;
3340      }
3341      case ARMin_Div: {
3342         UInt subopc = i->ARMin.Div.op == ARMdiv_U ?
3343                        X0011 : X0001;
3344         UInt rD    = iregNo(i->ARMin.Div.dst);
3345         UInt rN    = iregNo(i->ARMin.Div.argL);
3346         UInt rM    = iregNo(i->ARMin.Div.argR);
3347         UInt instr = XXXXXXXX(X1110, X0111, subopc, rD, 0xF, rM, X0001, rN);
3348         *p++ = instr;
3349         goto done;
3350      }
3351      case ARMin_LdrEX: {
3352         /* E1D42F9F   ldrexb r2, [r4]
3353            E1F42F9F   ldrexh r2, [r4]
3354            E1942F9F   ldrex  r2, [r4]
3355            E1B42F9F   ldrexd r2, r3, [r4]
3356         */
3357         switch (i->ARMin.LdrEX.szB) {
3358            case 1: *p++ = 0xE1D42F9F; goto done;
3359            case 2: *p++ = 0xE1F42F9F; goto done;
3360            case 4: *p++ = 0xE1942F9F; goto done;
3361            case 8: *p++ = 0xE1B42F9F; goto done;
3362            default: break;
3363         }
3364         goto bad;
3365      }
3366      case ARMin_StrEX: {
3367         /* E1C40F92   strexb r0, r2, [r4]
3368            E1E40F92   strexh r0, r2, [r4]
3369            E1840F92   strex  r0, r2, [r4]
3370            E1A40F92   strexd r0, r2, r3, [r4]
3371         */
3372         switch (i->ARMin.StrEX.szB) {
3373            case 1: *p++ = 0xE1C40F92; goto done;
3374            case 2: *p++ = 0xE1E40F92; goto done;
3375            case 4: *p++ = 0xE1840F92; goto done;
3376            case 8: *p++ = 0xE1A40F92; goto done;
3377            default: break;
3378         }
3379         goto bad;
3380      }
3381      case ARMin_VLdStD: {
3382         UInt dD     = dregNo(i->ARMin.VLdStD.dD);
3383         UInt rN     = iregNo(i->ARMin.VLdStD.amode->reg);
3384         Int  simm11 = i->ARMin.VLdStD.amode->simm11;
3385         UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3386         UInt bU     = simm11 >= 0 ? 1 : 0;
3387         UInt bL     = i->ARMin.VLdStD.isLoad ? 1 : 0;
3388         UInt insn;
3389         vassert(0 == (off8 & 3));
3390         off8 >>= 2;
3391         vassert(0 == (off8 & 0xFFFFFF00));
3392         insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3393         insn |= off8;
3394         *p++ = insn;
3395         goto done;
3396      }
3397      case ARMin_VLdStS: {
3398         UInt fD     = fregNo(i->ARMin.VLdStS.fD);
3399         UInt rN     = iregNo(i->ARMin.VLdStS.amode->reg);
3400         Int  simm11 = i->ARMin.VLdStS.amode->simm11;
3401         UInt off8   = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3402         UInt bU     = simm11 >= 0 ? 1 : 0;
3403         UInt bL     = i->ARMin.VLdStS.isLoad ? 1 : 0;
3404         UInt bD     = fD & 1;
3405         UInt insn;
3406         vassert(0 == (off8 & 3));
3407         off8 >>= 2;
3408         vassert(0 == (off8 & 0xFFFFFF00));
3409         insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3410         insn |= off8;
3411         *p++ = insn;
3412         goto done;
3413      }
3414      case ARMin_VAluD: {
3415         UInt dN = dregNo(i->ARMin.VAluD.argL);
3416         UInt dD = dregNo(i->ARMin.VAluD.dst);
3417         UInt dM = dregNo(i->ARMin.VAluD.argR);
3418         UInt pqrs = X1111; /* undefined */
3419         switch (i->ARMin.VAluD.op) {
3420            case ARMvfp_ADD: pqrs = X0110; break;
3421            case ARMvfp_SUB: pqrs = X0111; break;
3422            case ARMvfp_MUL: pqrs = X0100; break;
3423            case ARMvfp_DIV: pqrs = X1000; break;
3424            default: goto bad;
3425         }
3426         vassert(pqrs != X1111);
3427         UInt bP  = (pqrs >> 3) & 1;
3428         UInt bQ  = (pqrs >> 2) & 1;
3429         UInt bR  = (pqrs >> 1) & 1;
3430         UInt bS  = (pqrs >> 0) & 1;
3431         UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3432                              X1011, BITS4(0,bS,0,0), dM);
3433         *p++ = insn;
3434         goto done;
3435      }
3436      case ARMin_VAluS: {
3437         UInt dN = fregNo(i->ARMin.VAluS.argL);
3438         UInt dD = fregNo(i->ARMin.VAluS.dst);
3439         UInt dM = fregNo(i->ARMin.VAluS.argR);
3440         UInt bN = dN & 1;
3441         UInt bD = dD & 1;
3442         UInt bM = dM & 1;
3443         UInt pqrs = X1111; /* undefined */
3444         switch (i->ARMin.VAluS.op) {
3445            case ARMvfp_ADD: pqrs = X0110; break;
3446            case ARMvfp_SUB: pqrs = X0111; break;
3447            case ARMvfp_MUL: pqrs = X0100; break;
3448            case ARMvfp_DIV: pqrs = X1000; break;
3449            default: goto bad;
3450         }
3451         vassert(pqrs != X1111);
3452         UInt bP  = (pqrs >> 3) & 1;
3453         UInt bQ  = (pqrs >> 2) & 1;
3454         UInt bR  = (pqrs >> 1) & 1;
3455         UInt bS  = (pqrs >> 0) & 1;
3456         UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3457                              (dN >> 1), (dD >> 1),
3458                              X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3459         *p++ = insn;
3460         goto done;
3461      }
3462      case ARMin_VUnaryD: {
3463         UInt dD   = dregNo(i->ARMin.VUnaryD.dst);
3464         UInt dM   = dregNo(i->ARMin.VUnaryD.src);
3465         UInt insn = 0;
3466         switch (i->ARMin.VUnaryD.op) {
3467            case ARMvfpu_COPY:
3468               insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3469               break;
3470            case ARMvfpu_ABS:
3471               insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3472               break;
3473            case ARMvfpu_NEG:
3474               insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3475               break;
3476            case ARMvfpu_SQRT:
3477               insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3478               break;
3479            default:
3480               goto bad;
3481         }
3482         *p++ = insn;
3483         goto done;
3484      }
3485      case ARMin_VUnaryS: {
3486         UInt fD   = fregNo(i->ARMin.VUnaryS.dst);
3487         UInt fM   = fregNo(i->ARMin.VUnaryS.src);
3488         UInt insn = 0;
3489         switch (i->ARMin.VUnaryS.op) {
3490            case ARMvfpu_COPY:
3491               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3492                               (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3493                               (fM >> 1));
3494               break;
3495            case ARMvfpu_ABS:
3496               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3497                               (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3498                               (fM >> 1));
3499               break;
3500            case ARMvfpu_NEG:
3501               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3502                               (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3503                               (fM >> 1));
3504               break;
3505            case ARMvfpu_SQRT:
3506               insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3507                               (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3508                               (fM >> 1));
3509               break;
3510            default:
3511               goto bad;
3512         }
3513         *p++ = insn;
3514         goto done;
3515      }
3516      case ARMin_VCmpD: {
3517         UInt dD   = dregNo(i->ARMin.VCmpD.argL);
3518         UInt dM   = dregNo(i->ARMin.VCmpD.argR);
3519         UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3520         *p++ = insn;       /* FCMPD dD, dM */
3521         *p++ = 0xEEF1FA10; /* FMSTAT */
3522         goto done;
3523      }
3524      case ARMin_VCMovD: {
3525         UInt cc = (UInt)i->ARMin.VCMovD.cond;
3526         UInt dD = dregNo(i->ARMin.VCMovD.dst);
3527         UInt dM = dregNo(i->ARMin.VCMovD.src);
3528         vassert(cc < 16 && cc != ARMcc_AL);
3529         UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3530         *p++ = insn;
3531         goto done;
3532      }
3533      case ARMin_VCMovS: {
3534         UInt cc = (UInt)i->ARMin.VCMovS.cond;
3535         UInt fD = fregNo(i->ARMin.VCMovS.dst);
3536         UInt fM = fregNo(i->ARMin.VCMovS.src);
3537         vassert(cc < 16 && cc != ARMcc_AL);
3538         UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3539                              X0000,(fD >> 1),X1010,
3540                              BITS4(0,1,(fM & 1),0), (fM >> 1));
3541         *p++ = insn;
3542         goto done;
3543      }
3544      case ARMin_VCvtSD: {
3545         if (i->ARMin.VCvtSD.sToD) {
3546            UInt dD = dregNo(i->ARMin.VCvtSD.dst);
3547            UInt fM = fregNo(i->ARMin.VCvtSD.src);
3548            UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3549                                 BITS4(1,1, (fM & 1), 0),
3550                                 (fM >> 1));
3551            *p++ = insn;
3552            goto done;
3553         } else {
3554            UInt fD = fregNo(i->ARMin.VCvtSD.dst);
3555            UInt dM = dregNo(i->ARMin.VCvtSD.src);
3556            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3557                                 X0111, (fD >> 1),
3558                                 X1011, X1100, dM);
3559            *p++ = insn;
3560            goto done;
3561         }
3562         goto bad;
3563      }
3564      case ARMin_VXferD: {
3565         UInt dD  = dregNo(i->ARMin.VXferD.dD);
3566         UInt rHi = iregNo(i->ARMin.VXferD.rHi);
3567         UInt rLo = iregNo(i->ARMin.VXferD.rLo);
3568         /* vmov dD, rLo, rHi is
3569            E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3570            vmov rLo, rHi, dD is
3571            E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3572         */
3573         UInt insn
3574            = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3575                       rHi, rLo, 0xB,
3576                       BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3577         *p++ = insn;
3578         goto done;
3579      }
3580      case ARMin_VXferS: {
3581         UInt fD  = fregNo(i->ARMin.VXferS.fD);
3582         UInt rLo = iregNo(i->ARMin.VXferS.rLo);
3583         /* vmov fD, rLo is
3584            E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3585            vmov rLo, fD is
3586            E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3587         */
3588         UInt insn
3589            = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3590                       (fD >> 1) & 0xF, rLo, 0xA,
3591                       BITS4((fD & 1),0,0,1), 0);
3592         *p++ = insn;
3593         goto done;
3594      }
3595      case ARMin_VCvtID: {
3596         Bool iToD = i->ARMin.VCvtID.iToD;
3597         Bool syned = i->ARMin.VCvtID.syned;
3598         if (iToD && syned) {
3599            // FSITOD: I32S-in-freg to F64-in-dreg
3600            UInt regF = fregNo(i->ARMin.VCvtID.src);
3601            UInt regD = dregNo(i->ARMin.VCvtID.dst);
3602            UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3603                                 X1011, BITS4(1,1,(regF & 1),0),
3604                                 (regF >> 1) & 0xF);
3605            *p++ = insn;
3606            goto done;
3607         }
3608         if (iToD && (!syned)) {
3609            // FUITOD: I32U-in-freg to F64-in-dreg
3610            UInt regF = fregNo(i->ARMin.VCvtID.src);
3611            UInt regD = dregNo(i->ARMin.VCvtID.dst);
3612            UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3613                                 X1011, BITS4(0,1,(regF & 1),0),
3614                                 (regF >> 1) & 0xF);
3615            *p++ = insn;
3616            goto done;
3617         }
3618         if ((!iToD) && syned) {
3619            // FTOSID: F64-in-dreg to I32S-in-freg
3620            UInt regD = dregNo(i->ARMin.VCvtID.src);
3621            UInt regF = fregNo(i->ARMin.VCvtID.dst);
3622            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3623                                 X1101, (regF >> 1) & 0xF,
3624                                 X1011, X0100, regD);
3625            *p++ = insn;
3626            goto done;
3627         }
3628         if ((!iToD) && (!syned)) {
3629            // FTOUID: F64-in-dreg to I32U-in-freg
3630            UInt regD = dregNo(i->ARMin.VCvtID.src);
3631            UInt regF = fregNo(i->ARMin.VCvtID.dst);
3632            UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3633                                 X1100, (regF >> 1) & 0xF,
3634                                 X1011, X0100, regD);
3635            *p++ = insn;
3636            goto done;
3637         }
3638         /*UNREACHED*/
3639         vassert(0);
3640      }
3641      case ARMin_FPSCR: {
3642         Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3643         HReg iReg    = iregNo(i->ARMin.FPSCR.iReg);
3644         if (toFPSCR) {
3645            /* fmxr fpscr, iReg is EEE1 iReg A10 */
3646            *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3647            goto done;
3648         }
3649         goto bad; // FPSCR -> iReg case currently ATC
3650      }
3651      case ARMin_MFence: {
3652         // It's not clear (to me) how these relate to the ARMv7
3653         // versions, so let's just use the v7 versions as they
3654         // are at least well documented.
3655         //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3656         //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3657         //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
3658         *p++ = 0xF57FF04F; /* DSB sy */
3659         *p++ = 0xF57FF05F; /* DMB sy */
3660         *p++ = 0xF57FF06F; /* ISB */
3661         goto done;
3662      }
3663      case ARMin_CLREX: {
3664         *p++ = 0xF57FF01F; /* clrex */
3665         goto done;
3666      }
3667
3668      case ARMin_NLdStQ: {
3669         UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
3670         UInt regN, regM;
3671         UInt D = regD >> 4;
3672         UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
3673         UInt insn;
3674         vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
3675         regD &= 0xF;
3676         if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
3677            regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
3678            regM = iregNo(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
3679         } else {
3680            regN = iregNo(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
3681            regM = 15;
3682         }
3683         insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3684                              regN, regD, X1010, X1000, regM);
3685         *p++ = insn;
3686         goto done;
3687      }
3688      case ARMin_NLdStD: {
3689         UInt regD = dregNo(i->ARMin.NLdStD.dD);
3690         UInt regN, regM;
3691         UInt D = regD >> 4;
3692         UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
3693         UInt insn;
3694         vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
3695         regD &= 0xF;
3696         if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
3697            regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
3698            regM = iregNo(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
3699         } else {
3700            regN = iregNo(i->ARMin.NLdStD.amode->ARMamN.R.rN);
3701            regM = 15;
3702         }
3703         insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
3704                              regN, regD, X0111, X1000, regM);
3705         *p++ = insn;
3706         goto done;
3707      }
3708      case ARMin_NUnaryS: {
3709         UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
3710         UInt regD, D;
3711         UInt regM, M;
3712         UInt size = i->ARMin.NUnaryS.size;
3713         UInt insn;
3714         UInt opc, opc1, opc2;
3715         switch (i->ARMin.NUnaryS.op) {
3716	    case ARMneon_VDUP:
3717               if (i->ARMin.NUnaryS.size >= 16)
3718                  goto bad;
3719               if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
3720                  goto bad;
3721               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3722                  goto bad;
3723               regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
3724                        ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1)
3725                        : dregNo(i->ARMin.NUnaryS.dst->reg);
3726               regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
3727                        ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1)
3728                        : dregNo(i->ARMin.NUnaryS.src->reg);
3729               D = regD >> 4;
3730               M = regM >> 4;
3731               regD &= 0xf;
3732               regM &= 0xf;
3733               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
3734                               (i->ARMin.NUnaryS.size & 0xf), regD,
3735                               X1100, BITS4(0,Q,M,0), regM);
3736               *p++ = insn;
3737               goto done;
3738            case ARMneon_SETELEM:
3739               regD = Q ? (qregNo(i->ARMin.NUnaryS.dst->reg) << 1) :
3740                                dregNo(i->ARMin.NUnaryS.dst->reg);
3741               regM = iregNo(i->ARMin.NUnaryS.src->reg);
3742               M = regM >> 4;
3743               D = regD >> 4;
3744               regM &= 0xF;
3745               regD &= 0xF;
3746               if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
3747                  goto bad;
3748               switch (size) {
3749                  case 0:
3750                     if (i->ARMin.NUnaryS.dst->index > 7)
3751                        goto bad;
3752                     opc = X1000 | i->ARMin.NUnaryS.dst->index;
3753                     break;
3754                  case 1:
3755                     if (i->ARMin.NUnaryS.dst->index > 3)
3756                        goto bad;
3757                     opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
3758                     break;
3759                  case 2:
3760                     if (i->ARMin.NUnaryS.dst->index > 1)
3761                        goto bad;
3762                     opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
3763                     break;
3764                  default:
3765                     goto bad;
3766               }
3767               opc1 = (opc >> 2) & 3;
3768               opc2 = opc & 3;
3769               insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
3770                               regD, regM, X1011,
3771                               BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
3772               *p++ = insn;
3773               goto done;
3774            case ARMneon_GETELEMU:
3775               regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
3776                                dregNo(i->ARMin.NUnaryS.src->reg);
3777               regD = iregNo(i->ARMin.NUnaryS.dst->reg);
3778               M = regM >> 4;
3779               D = regD >> 4;
3780               regM &= 0xF;
3781               regD &= 0xF;
3782               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3783                  goto bad;
3784               switch (size) {
3785                  case 0:
3786                     if (Q && i->ARMin.NUnaryS.src->index > 7) {
3787                        regM++;
3788                        i->ARMin.NUnaryS.src->index -= 8;
3789                     }
3790                     if (i->ARMin.NUnaryS.src->index > 7)
3791                        goto bad;
3792                     opc = X1000 | i->ARMin.NUnaryS.src->index;
3793                     break;
3794                  case 1:
3795                     if (Q && i->ARMin.NUnaryS.src->index > 3) {
3796                        regM++;
3797                        i->ARMin.NUnaryS.src->index -= 4;
3798                     }
3799                     if (i->ARMin.NUnaryS.src->index > 3)
3800                        goto bad;
3801                     opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3802                     break;
3803                  case 2:
3804                     goto bad;
3805                  default:
3806                     goto bad;
3807               }
3808               opc1 = (opc >> 2) & 3;
3809               opc2 = opc & 3;
3810               insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
3811                               regM, regD, X1011,
3812                               BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3813               *p++ = insn;
3814               goto done;
3815            case ARMneon_GETELEMS:
3816               regM = Q ? (qregNo(i->ARMin.NUnaryS.src->reg) << 1) :
3817                                dregNo(i->ARMin.NUnaryS.src->reg);
3818               regD = iregNo(i->ARMin.NUnaryS.dst->reg);
3819               M = regM >> 4;
3820               D = regD >> 4;
3821               regM &= 0xF;
3822               regD &= 0xF;
3823               if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
3824                  goto bad;
3825               switch (size) {
3826                  case 0:
3827                     if (Q && i->ARMin.NUnaryS.src->index > 7) {
3828                        regM++;
3829                        i->ARMin.NUnaryS.src->index -= 8;
3830                     }
3831                     if (i->ARMin.NUnaryS.src->index > 7)
3832                        goto bad;
3833                     opc = X1000 | i->ARMin.NUnaryS.src->index;
3834                     break;
3835                  case 1:
3836                     if (Q && i->ARMin.NUnaryS.src->index > 3) {
3837                        regM++;
3838                        i->ARMin.NUnaryS.src->index -= 4;
3839                     }
3840                     if (i->ARMin.NUnaryS.src->index > 3)
3841                        goto bad;
3842                     opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
3843                     break;
3844                  case 2:
3845                     if (Q && i->ARMin.NUnaryS.src->index > 1) {
3846                        regM++;
3847                        i->ARMin.NUnaryS.src->index -= 2;
3848                     }
3849                     if (i->ARMin.NUnaryS.src->index > 1)
3850                        goto bad;
3851                     opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
3852                     break;
3853                  default:
3854                     goto bad;
3855               }
3856               opc1 = (opc >> 2) & 3;
3857               opc2 = opc & 3;
3858               insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
3859                               regM, regD, X1011,
3860                               BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
3861               *p++ = insn;
3862               goto done;
3863            default:
3864               goto bad;
3865         }
3866      }
3867      case ARMin_NUnary: {
3868         UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
3869         UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
3870                       ? (qregNo(i->ARMin.NUnary.dst) << 1)
3871                       : dregNo(i->ARMin.NUnary.dst);
3872         UInt regM, M;
3873         UInt D = regD >> 4;
3874         UInt sz1 = i->ARMin.NUnary.size >> 1;
3875         UInt sz2 = i->ARMin.NUnary.size & 1;
3876         UInt sz = i->ARMin.NUnary.size;
3877         UInt insn;
3878         UInt F = 0; /* TODO: floating point EQZ ??? */
3879         if (i->ARMin.NUnary.op != ARMneon_DUP) {
3880            regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
3881                     ? (qregNo(i->ARMin.NUnary.src) << 1)
3882                     : dregNo(i->ARMin.NUnary.src);
3883            M = regM >> 4;
3884         } else {
3885            regM = iregNo(i->ARMin.NUnary.src);
3886            M = regM >> 4;
3887         }
3888         regD &= 0xF;
3889         regM &= 0xF;
3890         switch (i->ARMin.NUnary.op) {
3891            case ARMneon_COPY: /* VMOV reg, reg */
3892               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
3893                               BITS4(M,Q,M,1), regM);
3894               break;
3895            case ARMneon_COPYN: /* VMOVN regD, regQ */
3896               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3897                               regD, X0010, BITS4(0,0,M,0), regM);
3898               break;
3899            case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
3900               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3901                               regD, X0010, BITS4(1,0,M,0), regM);
3902               break;
3903            case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
3904               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3905                               regD, X0010, BITS4(0,1,M,0), regM);
3906               break;
3907            case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
3908               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
3909                               regD, X0010, BITS4(1,1,M,0), regM);
3910               break;
3911            case ARMneon_COPYLS: /* VMOVL regQ, regD */
3912               if (sz >= 3)
3913                  goto bad;
3914               insn = XXXXXXXX(0xF, X0010,
3915                               BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
3916                               BITS4((sz == 0) ? 1 : 0,0,0,0),
3917                               regD, X1010, BITS4(0,0,M,1), regM);
3918               break;
3919            case ARMneon_COPYLU: /* VMOVL regQ, regD */
3920               if (sz >= 3)
3921                  goto bad;
3922               insn = XXXXXXXX(0xF, X0011,
3923                               BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
3924                               BITS4((sz == 0) ? 1 : 0,0,0,0),
3925                               regD, X1010, BITS4(0,0,M,1), regM);
3926               break;
3927            case ARMneon_NOT: /* VMVN reg, reg*/
3928               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
3929                               BITS4(1,Q,M,0), regM);
3930               break;
3931            case ARMneon_EQZ:
3932               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
3933                               regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
3934               break;
3935            case ARMneon_CNT:
3936               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
3937                               BITS4(0,Q,M,0), regM);
3938               break;
3939            case ARMneon_CLZ:
3940               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3941                               regD, X0100, BITS4(1,Q,M,0), regM);
3942               break;
3943            case ARMneon_CLS:
3944               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3945                               regD, X0100, BITS4(0,Q,M,0), regM);
3946               break;
3947            case ARMneon_ABS:
3948               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
3949                               regD, X0011, BITS4(0,Q,M,0), regM);
3950               break;
3951            case ARMneon_DUP:
3952               sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
3953               sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
3954               vassert(sz1 + sz2 < 2);
3955               insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
3956                               X1011, BITS4(D,0,sz2,1), X0000);
3957               break;
3958            case ARMneon_REV16:
3959               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3960                               regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
3961               break;
3962            case ARMneon_REV32:
3963               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3964                               regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
3965               break;
3966            case ARMneon_REV64:
3967               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3968                               regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
3969               break;
3970            case ARMneon_PADDLU:
3971               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3972                               regD, X0010, BITS4(1,Q,M,0), regM);
3973               break;
3974            case ARMneon_PADDLS:
3975               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
3976                               regD, X0010, BITS4(0,Q,M,0), regM);
3977               break;
3978            case ARMneon_VQSHLNUU:
3979               insn = XXXXXXXX(0xF, X0011,
3980                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
3981                               sz & 0xf, regD, X0111,
3982                               BITS4(sz >> 6,Q,M,1), regM);
3983               break;
3984            case ARMneon_VQSHLNSS:
3985               insn = XXXXXXXX(0xF, X0010,
3986                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
3987                               sz & 0xf, regD, X0111,
3988                               BITS4(sz >> 6,Q,M,1), regM);
3989               break;
3990            case ARMneon_VQSHLNUS:
3991               insn = XXXXXXXX(0xF, X0011,
3992                               (1 << 3) | (D << 2) | ((sz >> 4) & 3),
3993                               sz & 0xf, regD, X0110,
3994                               BITS4(sz >> 6,Q,M,1), regM);
3995               break;
3996            case ARMneon_VCVTFtoS:
3997               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
3998                               BITS4(0,Q,M,0), regM);
3999               break;
4000            case ARMneon_VCVTFtoU:
4001               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4002                               BITS4(1,Q,M,0), regM);
4003               break;
4004            case ARMneon_VCVTStoF:
4005               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4006                               BITS4(0,Q,M,0), regM);
4007               break;
4008            case ARMneon_VCVTUtoF:
4009               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4010                               BITS4(1,Q,M,0), regM);
4011               break;
4012            case ARMneon_VCVTFtoFixedU:
4013               sz1 = (sz >> 5) & 1;
4014               sz2 = (sz >> 4) & 1;
4015               sz &= 0xf;
4016               insn = XXXXXXXX(0xF, X0011,
4017                               BITS4(1,D,sz1,sz2), sz, regD, X1111,
4018                               BITS4(0,Q,M,1), regM);
4019               break;
4020            case ARMneon_VCVTFtoFixedS:
4021               sz1 = (sz >> 5) & 1;
4022               sz2 = (sz >> 4) & 1;
4023               sz &= 0xf;
4024               insn = XXXXXXXX(0xF, X0010,
4025                               BITS4(1,D,sz1,sz2), sz, regD, X1111,
4026                               BITS4(0,Q,M,1), regM);
4027               break;
4028            case ARMneon_VCVTFixedUtoF:
4029               sz1 = (sz >> 5) & 1;
4030               sz2 = (sz >> 4) & 1;
4031               sz &= 0xf;
4032               insn = XXXXXXXX(0xF, X0011,
4033                               BITS4(1,D,sz1,sz2), sz, regD, X1110,
4034                               BITS4(0,Q,M,1), regM);
4035               break;
4036            case ARMneon_VCVTFixedStoF:
4037               sz1 = (sz >> 5) & 1;
4038               sz2 = (sz >> 4) & 1;
4039               sz &= 0xf;
4040               insn = XXXXXXXX(0xF, X0010,
4041                               BITS4(1,D,sz1,sz2), sz, regD, X1110,
4042                               BITS4(0,Q,M,1), regM);
4043               break;
4044            case ARMneon_VCVTF32toF16:
4045               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
4046                               BITS4(0,0,M,0), regM);
4047               break;
4048            case ARMneon_VCVTF16toF32:
4049               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
4050                               BITS4(0,0,M,0), regM);
4051               break;
4052            case ARMneon_VRECIP:
4053               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4054                               BITS4(0,Q,M,0), regM);
4055               break;
4056            case ARMneon_VRECIPF:
4057               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4058                               BITS4(0,Q,M,0), regM);
4059               break;
4060            case ARMneon_VABSFP:
4061               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4062                               BITS4(0,Q,M,0), regM);
4063               break;
4064            case ARMneon_VRSQRTEFP:
4065               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4066                               BITS4(1,Q,M,0), regM);
4067               break;
4068            case ARMneon_VRSQRTE:
4069               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4070                               BITS4(1,Q,M,0), regM);
4071               break;
4072            case ARMneon_VNEGF:
4073               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4074                               BITS4(1,Q,M,0), regM);
4075               break;
4076
4077            default:
4078               goto bad;
4079         }
4080         *p++ = insn;
4081         goto done;
4082      }
4083      case ARMin_NDual: {
4084         UInt Q = i->ARMin.NDual.Q ? 1 : 0;
4085         UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
4086                       ? (qregNo(i->ARMin.NDual.arg1) << 1)
4087                       : dregNo(i->ARMin.NDual.arg1);
4088         UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
4089                       ? (qregNo(i->ARMin.NDual.arg2) << 1)
4090                       : dregNo(i->ARMin.NDual.arg2);
4091         UInt D = regD >> 4;
4092         UInt M = regM >> 4;
4093         UInt sz1 = i->ARMin.NDual.size >> 1;
4094         UInt sz2 = i->ARMin.NDual.size & 1;
4095         UInt insn;
4096         regD &= 0xF;
4097         regM &= 0xF;
4098         switch (i->ARMin.NDual.op) {
4099            case ARMneon_TRN: /* VTRN reg, reg */
4100               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4101                               regD, X0000, BITS4(1,Q,M,0), regM);
4102               break;
4103            case ARMneon_ZIP: /* VZIP reg, reg */
4104               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4105                               regD, X0001, BITS4(1,Q,M,0), regM);
4106               break;
4107            case ARMneon_UZP: /* VUZP reg, reg */
4108               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4109                               regD, X0001, BITS4(0,Q,M,0), regM);
4110               break;
4111            default:
4112               goto bad;
4113         }
4114         *p++ = insn;
4115         goto done;
4116      }
4117      case ARMin_NBinary: {
4118         UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
4119         UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
4120                       ? (qregNo(i->ARMin.NBinary.dst) << 1)
4121                       : dregNo(i->ARMin.NBinary.dst);
4122         UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
4123                       ? (qregNo(i->ARMin.NBinary.argL) << 1)
4124                       : dregNo(i->ARMin.NBinary.argL);
4125         UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
4126                       ? (qregNo(i->ARMin.NBinary.argR) << 1)
4127                       : dregNo(i->ARMin.NBinary.argR);
4128         UInt sz1 = i->ARMin.NBinary.size >> 1;
4129         UInt sz2 = i->ARMin.NBinary.size & 1;
4130         UInt D = regD >> 4;
4131         UInt N = regN >> 4;
4132         UInt M = regM >> 4;
4133         UInt insn;
4134         regD &= 0xF;
4135         regM &= 0xF;
4136         regN &= 0xF;
4137         switch (i->ARMin.NBinary.op) {
4138            case ARMneon_VAND: /* VAND reg, reg, reg */
4139               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
4140                               BITS4(N,Q,M,1), regM);
4141               break;
4142            case ARMneon_VORR: /* VORR reg, reg, reg*/
4143               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
4144                               BITS4(N,Q,M,1), regM);
4145               break;
4146            case ARMneon_VXOR: /* VEOR reg, reg, reg */
4147               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
4148                               BITS4(N,Q,M,1), regM);
4149               break;
4150            case ARMneon_VADD: /* VADD reg, reg, reg */
4151               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4152                               X1000, BITS4(N,Q,M,0), regM);
4153               break;
4154            case ARMneon_VSUB: /* VSUB reg, reg, reg */
4155               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4156                               X1000, BITS4(N,Q,M,0), regM);
4157               break;
4158            case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
4159               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4160                               X0110, BITS4(N,Q,M,1), regM);
4161               break;
4162            case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
4163               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4164                               X0110, BITS4(N,Q,M,1), regM);
4165               break;
4166            case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
4167               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4168                               X0110, BITS4(N,Q,M,0), regM);
4169               break;
4170            case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
4171               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4172                               X0110, BITS4(N,Q,M,0), regM);
4173               break;
4174            case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
4175               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4176                               X0001, BITS4(N,Q,M,0), regM);
4177               break;
4178            case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
4179               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4180                               X0001, BITS4(N,Q,M,0), regM);
4181               break;
4182            case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
4183               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4184                               X0000, BITS4(N,Q,M,1), regM);
4185               break;
4186            case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
4187               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4188                               X0000, BITS4(N,Q,M,1), regM);
4189               break;
4190            case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
4191               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4192                               X0010, BITS4(N,Q,M,1), regM);
4193               break;
4194            case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
4195               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4196                               X0010, BITS4(N,Q,M,1), regM);
4197               break;
4198            case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
4199               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4200                               X0011, BITS4(N,Q,M,0), regM);
4201               break;
4202            case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
4203               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4204                               X0011, BITS4(N,Q,M,0), regM);
4205               break;
4206            case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
4207               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4208                               X0011, BITS4(N,Q,M,1), regM);
4209               break;
4210            case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
4211               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4212                               X0011, BITS4(N,Q,M,1), regM);
4213               break;
4214            case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
4215               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4216                               X1000, BITS4(N,Q,M,1), regM);
4217               break;
4218            case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
4219               if (i->ARMin.NBinary.size >= 16)
4220                  goto bad;
4221               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
4222                               i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
4223                               regM);
4224               break;
4225            case ARMneon_VMUL:
4226               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4227                               X1001, BITS4(N,Q,M,1), regM);
4228               break;
4229            case ARMneon_VMULLU:
4230               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
4231                               X1100, BITS4(N,0,M,0), regM);
4232               break;
4233            case ARMneon_VMULLS:
4234               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4235                               X1100, BITS4(N,0,M,0), regM);
4236               break;
4237            case ARMneon_VMULP:
4238               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4239                               X1001, BITS4(N,Q,M,1), regM);
4240               break;
4241            case ARMneon_VMULFP:
4242               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4243                               X1101, BITS4(N,Q,M,1), regM);
4244               break;
4245            case ARMneon_VMULLP:
4246               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4247                               X1110, BITS4(N,0,M,0), regM);
4248               break;
4249            case ARMneon_VQDMULH:
4250               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4251                               X1011, BITS4(N,Q,M,0), regM);
4252               break;
4253            case ARMneon_VQRDMULH:
4254               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4255                               X1011, BITS4(N,Q,M,0), regM);
4256               break;
4257            case ARMneon_VQDMULL:
4258               insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4259                               X1101, BITS4(N,0,M,0), regM);
4260               break;
4261            case ARMneon_VTBL:
4262               insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
4263                               X1000, BITS4(N,0,M,0), regM);
4264               break;
4265            case ARMneon_VPADD:
4266               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4267                               X1011, BITS4(N,Q,M,1), regM);
4268               break;
4269            case ARMneon_VPADDFP:
4270               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4271                               X1101, BITS4(N,Q,M,0), regM);
4272               break;
4273            case ARMneon_VPMINU:
4274               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4275                               X1010, BITS4(N,Q,M,1), regM);
4276               break;
4277            case ARMneon_VPMINS:
4278               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4279                               X1010, BITS4(N,Q,M,1), regM);
4280               break;
4281            case ARMneon_VPMAXU:
4282               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4283                               X1010, BITS4(N,Q,M,0), regM);
4284               break;
4285            case ARMneon_VPMAXS:
4286               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4287                               X1010, BITS4(N,Q,M,0), regM);
4288               break;
4289            case ARMneon_VADDFP: /* VADD reg, reg, reg */
4290               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4291                               X1101, BITS4(N,Q,M,0), regM);
4292               break;
4293            case ARMneon_VSUBFP: /* VADD reg, reg, reg */
4294               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4295                               X1101, BITS4(N,Q,M,0), regM);
4296               break;
4297            case ARMneon_VABDFP: /* VABD reg, reg, reg */
4298               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4299                               X1101, BITS4(N,Q,M,0), regM);
4300               break;
4301            case ARMneon_VMINF:
4302               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4303                               X1111, BITS4(N,Q,M,0), regM);
4304               break;
4305            case ARMneon_VMAXF:
4306               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4307                               X1111, BITS4(N,Q,M,0), regM);
4308               break;
4309            case ARMneon_VPMINF:
4310               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4311                               X1111, BITS4(N,Q,M,0), regM);
4312               break;
4313            case ARMneon_VPMAXF:
4314               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4315                               X1111, BITS4(N,Q,M,0), regM);
4316               break;
4317            case ARMneon_VRECPS:
4318               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
4319                               BITS4(N,Q,M,1), regM);
4320               break;
4321            case ARMneon_VCGTF:
4322               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
4323                               BITS4(N,Q,M,0), regM);
4324               break;
4325            case ARMneon_VCGEF:
4326               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
4327                               BITS4(N,Q,M,0), regM);
4328               break;
4329            case ARMneon_VCEQF:
4330               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
4331                               BITS4(N,Q,M,0), regM);
4332               break;
4333            case ARMneon_VRSQRTS:
4334               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
4335                               BITS4(N,Q,M,1), regM);
4336               break;
4337            default:
4338               goto bad;
4339         }
4340         *p++ = insn;
4341         goto done;
4342      }
4343      case ARMin_NShift: {
4344         UInt Q = i->ARMin.NShift.Q ? 1 : 0;
4345         UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
4346                       ? (qregNo(i->ARMin.NShift.dst) << 1)
4347                       : dregNo(i->ARMin.NShift.dst);
4348         UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
4349                       ? (qregNo(i->ARMin.NShift.argL) << 1)
4350                       : dregNo(i->ARMin.NShift.argL);
4351         UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
4352                       ? (qregNo(i->ARMin.NShift.argR) << 1)
4353                       : dregNo(i->ARMin.NShift.argR);
4354         UInt sz1 = i->ARMin.NShift.size >> 1;
4355         UInt sz2 = i->ARMin.NShift.size & 1;
4356         UInt D = regD >> 4;
4357         UInt N = regN >> 4;
4358         UInt M = regM >> 4;
4359         UInt insn;
4360         regD &= 0xF;
4361         regM &= 0xF;
4362         regN &= 0xF;
4363         switch (i->ARMin.NShift.op) {
4364            case ARMneon_VSHL:
4365               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4366                               X0100, BITS4(N,Q,M,0), regM);
4367               break;
4368            case ARMneon_VSAL:
4369               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4370                               X0100, BITS4(N,Q,M,0), regM);
4371               break;
4372            case ARMneon_VQSHL:
4373               insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4374                               X0100, BITS4(N,Q,M,1), regM);
4375               break;
4376            case ARMneon_VQSAL:
4377               insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4378                               X0100, BITS4(N,Q,M,1), regM);
4379               break;
4380            default:
4381               goto bad;
4382         }
4383         *p++ = insn;
4384         goto done;
4385      }
4386      case ARMin_NeonImm: {
4387         UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
4388         UInt regD = Q ? (qregNo(i->ARMin.NeonImm.dst) << 1) :
4389                          dregNo(i->ARMin.NeonImm.dst);
4390         UInt D = regD >> 4;
4391         UInt imm = i->ARMin.NeonImm.imm->imm8;
4392         UInt tp = i->ARMin.NeonImm.imm->type;
4393         UInt j = imm >> 7;
4394         UInt imm3 = (imm >> 4) & 0x7;
4395         UInt imm4 = imm & 0xF;
4396         UInt cmode, op;
4397         UInt insn;
4398         regD &= 0xF;
4399         if (tp == 9)
4400            op = 1;
4401         else
4402            op = 0;
4403         switch (tp) {
4404            case 0:
4405            case 1:
4406            case 2:
4407            case 3:
4408            case 4:
4409            case 5:
4410               cmode = tp << 1;
4411               break;
4412            case 9:
4413            case 6:
4414               cmode = 14;
4415               break;
4416            case 7:
4417               cmode = 12;
4418               break;
4419            case 8:
4420               cmode = 13;
4421               break;
4422            case 10:
4423               cmode = 15;
4424               break;
4425            default:
4426               vpanic("ARMin_NeonImm");
4427
4428         }
4429         insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4430                         cmode, BITS4(0,Q,op,1), imm4);
4431         *p++ = insn;
4432         goto done;
4433      }
4434      case ARMin_NCMovQ: {
4435         UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4436         UInt qM = qregNo(i->ARMin.NCMovQ.src) << 1;
4437         UInt qD = qregNo(i->ARMin.NCMovQ.dst) << 1;
4438         UInt vM = qM & 0xF;
4439         UInt vD = qD & 0xF;
4440         UInt M  = (qM >> 4) & 1;
4441         UInt D  = (qD >> 4) & 1;
4442         vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4443         /* b!cc here+8: !cc A00 0000 */
4444         UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4445         *p++ = insn;
4446         /* vmov qD, qM */
4447         insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4448                         vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4449         *p++ = insn;
4450         goto done;
4451      }
4452      case ARMin_Add32: {
4453         UInt regD = iregNo(i->ARMin.Add32.rD);
4454         UInt regN = iregNo(i->ARMin.Add32.rN);
4455         UInt imm32 = i->ARMin.Add32.imm32;
4456         vassert(regD != regN);
4457         /* MOV regD, imm32 */
4458         p = imm32_to_iregNo((UInt *)p, regD, imm32);
4459         /* ADD regD, regN, regD */
4460         UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4461         *p++ = insn;
4462         goto done;
4463      }
4464
4465      case ARMin_EvCheck: {
4466         /* We generate:
4467               ldr  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
4468               subs r12, r12, #1  (A1)
4469               str  r12, [r8 + #4]   4 == offsetof(host_EvC_COUNTER)
4470               bpl  nofail
4471               ldr  r12, [r8 + #0]   0 == offsetof(host_EvC_FAILADDR)
4472               bx   r12
4473              nofail:
4474         */
4475         UInt* p0 = p;
4476         p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4477                                i->ARMin.EvCheck.amCounter);
4478         *p++ = 0xE25CC001; /* subs r12, r12, #1 */
4479         p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
4480                                i->ARMin.EvCheck.amCounter);
4481         *p++ = 0x5A000001; /* bpl nofail */
4482         p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4483                                i->ARMin.EvCheck.amFailAddr);
4484         *p++ = 0xE12FFF1C; /* bx r12 */
4485         /* nofail: */
4486
4487         /* Crosscheck */
4488         vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
4489         goto done;
4490      }
4491
4492      case ARMin_ProfInc: {
4493         /* We generate:
4494              (ctrP is unknown now, so use 0x65556555 in the
4495              expectation that a later call to LibVEX_patchProfCtr
4496              will be used to fill in the immediate fields once the
4497              right value is known.)
4498            movw r12, lo16(0x65556555)
4499            movt r12, lo16(0x65556555)
4500            ldr  r11, [r12]
4501            adds r11, r11, #1
4502            str  r11, [r12]
4503            ldr  r11, [r12+4]
4504            adc  r11, r11, #0
4505            str  r11, [r12+4]
4506         */
4507         p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555);
4508         *p++ = 0xE59CB000;
4509         *p++ = 0xE29BB001;
4510         *p++ = 0xE58CB000;
4511         *p++ = 0xE59CB004;
4512         *p++ = 0xE2ABB000;
4513         *p++ = 0xE58CB004;
4514         /* Tell the caller .. */
4515         vassert(!(*is_profInc));
4516         *is_profInc = True;
4517         goto done;
4518      }
4519
4520      /* ... */
4521      default:
4522         goto bad;
4523    }
4524
4525  bad:
4526   ppARMInstr(i);
4527   vpanic("emit_ARMInstr");
4528   /*NOTREACHED*/
4529
4530  done:
4531   vassert(((UChar*)p) - &buf[0] <= 32);
4532   return ((UChar*)p) - &buf[0];
4533}
4534
4535
4536/* How big is an event check?  See case for ARMin_EvCheck in
4537   emit_ARMInstr just above.  That crosschecks what this returns, so
4538   we can tell if we're inconsistent. */
4539Int evCheckSzB_ARM ( void )
4540{
4541   return 24;
4542}
4543
4544
4545/* NB: what goes on here has to be very closely coordinated with the
4546   emitInstr case for XDirect, above. */
4547VexInvalRange chainXDirect_ARM ( void* place_to_chain,
4548                                 void* disp_cp_chain_me_EXPECTED,
4549                                 void* place_to_jump_to )
4550{
4551   /* What we're expecting to see is:
4552        movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
4553        movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
4554        blx  r12
4555      viz
4556        <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4557        E1 2F FF 3C
4558   */
4559   UInt* p = (UInt*)place_to_chain;
4560   vassert(0 == (3 & (HWord)p));
4561   vassert(is_imm32_to_iregNo_EXACTLY2(
4562              p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED)));
4563   vassert(p[2] == 0xE12FFF3C);
4564   /* And what we want to change it to is either:
4565        (general case)
4566          movw r12, lo16(place_to_jump_to)
4567          movt r12, hi16(place_to_jump_to)
4568          bx   r12
4569        viz
4570          <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4571          E1 2F FF 1C
4572      ---OR---
4573        in the case where the displacement falls within 26 bits
4574          b disp24; undef; undef
4575        viz
4576          EA <3 bytes == disp24>
4577          FF 00 00 00
4578          FF 00 00 00
4579
4580      In both cases the replacement has the same length as the original.
4581      To remain sane & verifiable,
4582      (1) limit the displacement for the short form to
4583          (say) +/- 30 million, so as to avoid wraparound
4584          off-by-ones
4585      (2) even if the short form is applicable, once every (say)
4586          1024 times use the long form anyway, so as to maintain
4587          verifiability
4588   */
4589
4590   /* This is the delta we need to put into a B insn.  It's relative
4591      to the start of the next-but-one insn, hence the -8.  */
4592   Long delta   = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)8;
4593   Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
4594   vassert(0 == (delta & (Long)3));
4595
4596   static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4597   if (shortOK) {
4598      shortCTR++; // thread safety bleh
4599      if (0 == (shortCTR & 0x3FF)) {
4600         shortOK = False;
4601         if (0)
4602            vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
4603                       "using long form\n", shortCTR);
4604      }
4605   }
4606
4607   /* And make the modifications. */
4608   if (shortOK) {
4609      Int simm24 = (Int)(delta >> 2);
4610      vassert(simm24 == ((simm24 << 8) >> 8));
4611      p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
4612      p[1] = 0xFF000000;
4613      p[2] = 0xFF000000;
4614   } else {
4615      (void)imm32_to_iregNo_EXACTLY2(
4616               p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to));
4617      p[2] = 0xE12FFF1C;
4618   }
4619
4620   VexInvalRange vir = {(HWord)p, 12};
4621   return vir;
4622}
4623
4624
4625/* NB: what goes on here has to be very closely coordinated with the
4626   emitInstr case for XDirect, above. */
4627VexInvalRange unchainXDirect_ARM ( void* place_to_unchain,
4628                                   void* place_to_jump_to_EXPECTED,
4629                                   void* disp_cp_chain_me )
4630{
4631   /* What we're expecting to see is:
4632        (general case)
4633          movw r12, lo16(place_to_jump_to_EXPECTED)
4634          movt r12, lo16(place_to_jump_to_EXPECTED)
4635          bx   r12
4636        viz
4637          <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4638          E1 2F FF 1C
4639      ---OR---
4640        in the case where the displacement falls within 26 bits
4641          b disp24; undef; undef
4642        viz
4643          EA <3 bytes == disp24>
4644          FF 00 00 00
4645          FF 00 00 00
4646   */
4647   UInt* p = (UInt*)place_to_unchain;
4648   vassert(0 == (3 & (HWord)p));
4649
4650   Bool valid = False;
4651   if (is_imm32_to_iregNo_EXACTLY2(
4652          p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to_EXPECTED))
4653       && p[2] == 0xE12FFF1C) {
4654      valid = True; /* it's the long form */
4655      if (0)
4656         vex_printf("QQQ unchainXDirect_ARM: found long form\n");
4657   } else
4658   if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
4659      /* It's the short form.  Check the displacement is right. */
4660      Int simm24 = p[0] & 0x00FFFFFF;
4661      simm24 <<= 8; simm24 >>= 8;
4662      if ((UChar*)p + (simm24 << 2) + 8 == (UChar*)place_to_jump_to_EXPECTED) {
4663         valid = True;
4664         if (0)
4665            vex_printf("QQQ unchainXDirect_ARM: found short form\n");
4666      }
4667   }
4668   vassert(valid);
4669
4670   /* And what we want to change it to is:
4671        movw r12, lo16(disp_cp_chain_me)
4672        movt r12, hi16(disp_cp_chain_me)
4673        blx  r12
4674      viz
4675        <8 bytes generated by imm32_to_iregNo_EXACTLY2>
4676        E1 2F FF 3C
4677   */
4678   (void)imm32_to_iregNo_EXACTLY2(
4679            p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me));
4680   p[2] = 0xE12FFF3C;
4681   VexInvalRange vir = {(HWord)p, 12};
4682   return vir;
4683}
4684
4685
4686/* Patch the counter address into a profile inc point, as previously
4687   created by the ARMin_ProfInc case for emit_ARMInstr. */
4688VexInvalRange patchProfInc_ARM ( void*  place_to_patch,
4689                                 ULong* location_of_counter )
4690{
4691   vassert(sizeof(ULong*) == 4);
4692   UInt* p = (UInt*)place_to_patch;
4693   vassert(0 == (3 & (HWord)p));
4694   vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555));
4695   vassert(p[2] == 0xE59CB000);
4696   vassert(p[3] == 0xE29BB001);
4697   vassert(p[4] == 0xE58CB000);
4698   vassert(p[5] == 0xE59CB004);
4699   vassert(p[6] == 0xE2ABB000);
4700   vassert(p[7] == 0xE58CB004);
4701   imm32_to_iregNo_EXACTLY2(p, /*r*/12,
4702                            (UInt)Ptr_to_ULong(location_of_counter));
4703   VexInvalRange vir = {(HWord)p, 8};
4704   return vir;
4705}
4706
4707
4708#undef BITS4
4709#undef X0000
4710#undef X0001
4711#undef X0010
4712#undef X0011
4713#undef X0100
4714#undef X0101
4715#undef X0110
4716#undef X0111
4717#undef X1000
4718#undef X1001
4719#undef X1010
4720#undef X1011
4721#undef X1100
4722#undef X1101
4723#undef X1110
4724#undef X1111
4725#undef XXXXX___
4726#undef XXXXXX__
4727#undef XXX___XX
4728#undef XXXXX__X
4729#undef XXXXXXXX
4730#undef XX______
4731
4732/*---------------------------------------------------------------*/
4733/*--- end                                     host_arm_defs.c ---*/
4734/*---------------------------------------------------------------*/
4735