1/*--------------------------------------------------------------------*/
2/*--- Machine-related stuff.                           m_machine.c ---*/
3/*--------------------------------------------------------------------*/
4
5/*
6   This file is part of Valgrind, a dynamic binary instrumentation
7   framework.
8
9   Copyright (C) 2000-2015 Julian Seward
10      jseward@acm.org
11
12   This program is free software; you can redistribute it and/or
13   modify it under the terms of the GNU General Public License as
14   published by the Free Software Foundation; either version 2 of the
15   License, or (at your option) any later version.
16
17   This program is distributed in the hope that it will be useful, but
18   WITHOUT ANY WARRANTY; without even the implied warranty of
19   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20   General Public License for more details.
21
22   You should have received a copy of the GNU General Public License
23   along with this program; if not, write to the Free Software
24   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25   02111-1307, USA.
26
27   The GNU General Public License is contained in the file COPYING.
28*/
29
30#include "pub_core_basics.h"
31#include "pub_core_vki.h"
32#include "pub_core_threadstate.h"
33#include "pub_core_libcassert.h"
34#include "pub_core_libcbase.h"
35#include "pub_core_libcfile.h"
36#include "pub_core_libcprint.h"
37#include "pub_core_mallocfree.h"
38#include "pub_core_machine.h"
39#include "pub_core_cpuid.h"
40#include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
41#include "pub_core_debuglog.h"
42
43
44#define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
45#define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
46#define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
47
48Addr VG_(get_IP) ( ThreadId tid ) {
49   return INSTR_PTR( VG_(threads)[tid].arch );
50}
51Addr VG_(get_SP) ( ThreadId tid ) {
52   return STACK_PTR( VG_(threads)[tid].arch );
53}
54Addr VG_(get_FP) ( ThreadId tid ) {
55   return FRAME_PTR( VG_(threads)[tid].arch );
56}
57
58void VG_(set_IP) ( ThreadId tid, Addr ip ) {
59   INSTR_PTR( VG_(threads)[tid].arch ) = ip;
60}
61void VG_(set_SP) ( ThreadId tid, Addr sp ) {
62   STACK_PTR( VG_(threads)[tid].arch ) = sp;
63}
64
65void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
66                                ThreadId tid )
67{
68#  if defined(VGA_x86)
69   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
70   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
71   regs->misc.X86.r_ebp
72      = VG_(threads)[tid].arch.vex.guest_EBP;
73#  elif defined(VGA_amd64)
74   regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
75   regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
76   regs->misc.AMD64.r_rbp
77      = VG_(threads)[tid].arch.vex.guest_RBP;
78#  elif defined(VGA_ppc32)
79   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
80   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
81   regs->misc.PPC32.r_lr
82      = VG_(threads)[tid].arch.vex.guest_LR;
83#  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
84   regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
85   regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
86   regs->misc.PPC64.r_lr
87      = VG_(threads)[tid].arch.vex.guest_LR;
88#  elif defined(VGA_arm)
89   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
90   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
91   regs->misc.ARM.r14
92      = VG_(threads)[tid].arch.vex.guest_R14;
93   regs->misc.ARM.r12
94      = VG_(threads)[tid].arch.vex.guest_R12;
95   regs->misc.ARM.r11
96      = VG_(threads)[tid].arch.vex.guest_R11;
97   regs->misc.ARM.r7
98      = VG_(threads)[tid].arch.vex.guest_R7;
99#  elif defined(VGA_arm64)
100   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
101   regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
102   regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
103   regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
104#  elif defined(VGA_s390x)
105   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
106   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
107   regs->misc.S390X.r_fp
108      = VG_(threads)[tid].arch.vex.guest_FP;
109   regs->misc.S390X.r_lr
110      = VG_(threads)[tid].arch.vex.guest_LR;
111#  elif defined(VGA_mips32)
112   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
113   regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
114   regs->misc.MIPS32.r30
115      = VG_(threads)[tid].arch.vex.guest_r30;
116   regs->misc.MIPS32.r31
117      = VG_(threads)[tid].arch.vex.guest_r31;
118   regs->misc.MIPS32.r28
119      = VG_(threads)[tid].arch.vex.guest_r28;
120#  elif defined(VGA_mips64)
121   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
122   regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
123   regs->misc.MIPS64.r30
124      = VG_(threads)[tid].arch.vex.guest_r30;
125   regs->misc.MIPS64.r31
126      = VG_(threads)[tid].arch.vex.guest_r31;
127   regs->misc.MIPS64.r28
128      = VG_(threads)[tid].arch.vex.guest_r28;
129#  elif defined(VGA_tilegx)
130   regs->r_pc = VG_(threads)[tid].arch.vex.guest_pc;
131   regs->r_sp = VG_(threads)[tid].arch.vex.guest_r54;
132   regs->misc.TILEGX.r52
133      = VG_(threads)[tid].arch.vex.guest_r52;
134   regs->misc.TILEGX.r55
135      = VG_(threads)[tid].arch.vex.guest_r55;
136#  else
137#    error "Unknown arch"
138#  endif
139}
140
141void
142VG_(get_shadow_regs_area) ( ThreadId tid,
143                            /*DST*/UChar* dst,
144                            /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
145{
146   void*        src;
147   ThreadState* tst;
148   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
149   vg_assert(VG_(is_valid_tid)(tid));
150   // Bounds check
151   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
152   vg_assert(offset + size <= sizeof(VexGuestArchState));
153   // Copy
154   tst = & VG_(threads)[tid];
155   src = NULL;
156   switch (shadowNo) {
157      case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
158      case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
159      case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
160   }
161   vg_assert(src != NULL);
162   VG_(memcpy)( dst, src, size);
163}
164
165void
166VG_(set_shadow_regs_area) ( ThreadId tid,
167                            /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
168                            /*SRC*/const UChar* src )
169{
170   void*        dst;
171   ThreadState* tst;
172   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
173   vg_assert(VG_(is_valid_tid)(tid));
174   // Bounds check
175   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
176   vg_assert(offset + size <= sizeof(VexGuestArchState));
177   // Copy
178   tst = & VG_(threads)[tid];
179   dst = NULL;
180   switch (shadowNo) {
181      case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
182      case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
183      case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
184   }
185   vg_assert(dst != NULL);
186   VG_(memcpy)( dst, src, size);
187}
188
189
190static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
191                                                        const HChar*, Addr))
192{
193   VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
194   VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %u\n", tid);
195#if defined(VGA_x86)
196   (*f)(tid, "EAX", vex->guest_EAX);
197   (*f)(tid, "ECX", vex->guest_ECX);
198   (*f)(tid, "EDX", vex->guest_EDX);
199   (*f)(tid, "EBX", vex->guest_EBX);
200   (*f)(tid, "ESI", vex->guest_ESI);
201   (*f)(tid, "EDI", vex->guest_EDI);
202   (*f)(tid, "ESP", vex->guest_ESP);
203   (*f)(tid, "EBP", vex->guest_EBP);
204#elif defined(VGA_amd64)
205   (*f)(tid, "RAX", vex->guest_RAX);
206   (*f)(tid, "RCX", vex->guest_RCX);
207   (*f)(tid, "RDX", vex->guest_RDX);
208   (*f)(tid, "RBX", vex->guest_RBX);
209   (*f)(tid, "RSI", vex->guest_RSI);
210   (*f)(tid, "RDI", vex->guest_RDI);
211   (*f)(tid, "RSP", vex->guest_RSP);
212   (*f)(tid, "RBP", vex->guest_RBP);
213   (*f)(tid, "R8" , vex->guest_R8 );
214   (*f)(tid, "R9" , vex->guest_R9 );
215   (*f)(tid, "R10", vex->guest_R10);
216   (*f)(tid, "R11", vex->guest_R11);
217   (*f)(tid, "R12", vex->guest_R12);
218   (*f)(tid, "R13", vex->guest_R13);
219   (*f)(tid, "R14", vex->guest_R14);
220   (*f)(tid, "R15", vex->guest_R15);
221#elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
222   (*f)(tid, "GPR0" , vex->guest_GPR0 );
223   (*f)(tid, "GPR1" , vex->guest_GPR1 );
224   (*f)(tid, "GPR2" , vex->guest_GPR2 );
225   (*f)(tid, "GPR3" , vex->guest_GPR3 );
226   (*f)(tid, "GPR4" , vex->guest_GPR4 );
227   (*f)(tid, "GPR5" , vex->guest_GPR5 );
228   (*f)(tid, "GPR6" , vex->guest_GPR6 );
229   (*f)(tid, "GPR7" , vex->guest_GPR7 );
230   (*f)(tid, "GPR8" , vex->guest_GPR8 );
231   (*f)(tid, "GPR9" , vex->guest_GPR9 );
232   (*f)(tid, "GPR10", vex->guest_GPR10);
233   (*f)(tid, "GPR11", vex->guest_GPR11);
234   (*f)(tid, "GPR12", vex->guest_GPR12);
235   (*f)(tid, "GPR13", vex->guest_GPR13);
236   (*f)(tid, "GPR14", vex->guest_GPR14);
237   (*f)(tid, "GPR15", vex->guest_GPR15);
238   (*f)(tid, "GPR16", vex->guest_GPR16);
239   (*f)(tid, "GPR17", vex->guest_GPR17);
240   (*f)(tid, "GPR18", vex->guest_GPR18);
241   (*f)(tid, "GPR19", vex->guest_GPR19);
242   (*f)(tid, "GPR20", vex->guest_GPR20);
243   (*f)(tid, "GPR21", vex->guest_GPR21);
244   (*f)(tid, "GPR22", vex->guest_GPR22);
245   (*f)(tid, "GPR23", vex->guest_GPR23);
246   (*f)(tid, "GPR24", vex->guest_GPR24);
247   (*f)(tid, "GPR25", vex->guest_GPR25);
248   (*f)(tid, "GPR26", vex->guest_GPR26);
249   (*f)(tid, "GPR27", vex->guest_GPR27);
250   (*f)(tid, "GPR28", vex->guest_GPR28);
251   (*f)(tid, "GPR29", vex->guest_GPR29);
252   (*f)(tid, "GPR30", vex->guest_GPR30);
253   (*f)(tid, "GPR31", vex->guest_GPR31);
254   (*f)(tid, "CTR"  , vex->guest_CTR  );
255   (*f)(tid, "LR"   , vex->guest_LR   );
256#elif defined(VGA_arm)
257   (*f)(tid, "R0" , vex->guest_R0 );
258   (*f)(tid, "R1" , vex->guest_R1 );
259   (*f)(tid, "R2" , vex->guest_R2 );
260   (*f)(tid, "R3" , vex->guest_R3 );
261   (*f)(tid, "R4" , vex->guest_R4 );
262   (*f)(tid, "R5" , vex->guest_R5 );
263   (*f)(tid, "R6" , vex->guest_R6 );
264   (*f)(tid, "R8" , vex->guest_R8 );
265   (*f)(tid, "R9" , vex->guest_R9 );
266   (*f)(tid, "R10", vex->guest_R10);
267   (*f)(tid, "R11", vex->guest_R11);
268   (*f)(tid, "R12", vex->guest_R12);
269   (*f)(tid, "R13", vex->guest_R13);
270   (*f)(tid, "R14", vex->guest_R14);
271#elif defined(VGA_s390x)
272   (*f)(tid, "r0" , vex->guest_r0 );
273   (*f)(tid, "r1" , vex->guest_r1 );
274   (*f)(tid, "r2" , vex->guest_r2 );
275   (*f)(tid, "r3" , vex->guest_r3 );
276   (*f)(tid, "r4" , vex->guest_r4 );
277   (*f)(tid, "r5" , vex->guest_r5 );
278   (*f)(tid, "r6" , vex->guest_r6 );
279   (*f)(tid, "r7" , vex->guest_r7 );
280   (*f)(tid, "r8" , vex->guest_r8 );
281   (*f)(tid, "r9" , vex->guest_r9 );
282   (*f)(tid, "r10", vex->guest_r10);
283   (*f)(tid, "r11", vex->guest_r11);
284   (*f)(tid, "r12", vex->guest_r12);
285   (*f)(tid, "r13", vex->guest_r13);
286   (*f)(tid, "r14", vex->guest_r14);
287   (*f)(tid, "r15", vex->guest_r15);
288#elif defined(VGA_mips32) || defined(VGA_mips64)
289   (*f)(tid, "r0" , vex->guest_r0 );
290   (*f)(tid, "r1" , vex->guest_r1 );
291   (*f)(tid, "r2" , vex->guest_r2 );
292   (*f)(tid, "r3" , vex->guest_r3 );
293   (*f)(tid, "r4" , vex->guest_r4 );
294   (*f)(tid, "r5" , vex->guest_r5 );
295   (*f)(tid, "r6" , vex->guest_r6 );
296   (*f)(tid, "r7" , vex->guest_r7 );
297   (*f)(tid, "r8" , vex->guest_r8 );
298   (*f)(tid, "r9" , vex->guest_r9 );
299   (*f)(tid, "r10", vex->guest_r10);
300   (*f)(tid, "r11", vex->guest_r11);
301   (*f)(tid, "r12", vex->guest_r12);
302   (*f)(tid, "r13", vex->guest_r13);
303   (*f)(tid, "r14", vex->guest_r14);
304   (*f)(tid, "r15", vex->guest_r15);
305   (*f)(tid, "r16", vex->guest_r16);
306   (*f)(tid, "r17", vex->guest_r17);
307   (*f)(tid, "r18", vex->guest_r18);
308   (*f)(tid, "r19", vex->guest_r19);
309   (*f)(tid, "r20", vex->guest_r20);
310   (*f)(tid, "r21", vex->guest_r21);
311   (*f)(tid, "r22", vex->guest_r22);
312   (*f)(tid, "r23", vex->guest_r23);
313   (*f)(tid, "r24", vex->guest_r24);
314   (*f)(tid, "r25", vex->guest_r25);
315   (*f)(tid, "r26", vex->guest_r26);
316   (*f)(tid, "r27", vex->guest_r27);
317   (*f)(tid, "r28", vex->guest_r28);
318   (*f)(tid, "r29", vex->guest_r29);
319   (*f)(tid, "r30", vex->guest_r30);
320   (*f)(tid, "r31", vex->guest_r31);
321#elif defined(VGA_arm64)
322   (*f)(tid, "x0" , vex->guest_X0 );
323   (*f)(tid, "x1" , vex->guest_X1 );
324   (*f)(tid, "x2" , vex->guest_X2 );
325   (*f)(tid, "x3" , vex->guest_X3 );
326   (*f)(tid, "x4" , vex->guest_X4 );
327   (*f)(tid, "x5" , vex->guest_X5 );
328   (*f)(tid, "x6" , vex->guest_X6 );
329   (*f)(tid, "x7" , vex->guest_X7 );
330   (*f)(tid, "x8" , vex->guest_X8 );
331   (*f)(tid, "x9" , vex->guest_X9 );
332   (*f)(tid, "x10", vex->guest_X10);
333   (*f)(tid, "x11", vex->guest_X11);
334   (*f)(tid, "x12", vex->guest_X12);
335   (*f)(tid, "x13", vex->guest_X13);
336   (*f)(tid, "x14", vex->guest_X14);
337   (*f)(tid, "x15", vex->guest_X15);
338   (*f)(tid, "x16", vex->guest_X16);
339   (*f)(tid, "x17", vex->guest_X17);
340   (*f)(tid, "x18", vex->guest_X18);
341   (*f)(tid, "x19", vex->guest_X19);
342   (*f)(tid, "x20", vex->guest_X20);
343   (*f)(tid, "x21", vex->guest_X21);
344   (*f)(tid, "x22", vex->guest_X22);
345   (*f)(tid, "x23", vex->guest_X23);
346   (*f)(tid, "x24", vex->guest_X24);
347   (*f)(tid, "x25", vex->guest_X25);
348   (*f)(tid, "x26", vex->guest_X26);
349   (*f)(tid, "x27", vex->guest_X27);
350   (*f)(tid, "x28", vex->guest_X28);
351   (*f)(tid, "x29", vex->guest_X29);
352   (*f)(tid, "x30", vex->guest_X30);
353#elif defined(VGA_tilegx)
354   (*f)(tid, "r0",  vex->guest_r0 );
355   (*f)(tid, "r1",  vex->guest_r1 );
356   (*f)(tid, "r2",  vex->guest_r2 );
357   (*f)(tid, "r3",  vex->guest_r3 );
358   (*f)(tid, "r4",  vex->guest_r4 );
359   (*f)(tid, "r5",  vex->guest_r5 );
360   (*f)(tid, "r6",  vex->guest_r6 );
361   (*f)(tid, "r7",  vex->guest_r7 );
362   (*f)(tid, "r8",  vex->guest_r8 );
363   (*f)(tid, "r9",  vex->guest_r9 );
364   (*f)(tid, "r10", vex->guest_r10);
365   (*f)(tid, "r11", vex->guest_r11);
366   (*f)(tid, "r12", vex->guest_r12);
367   (*f)(tid, "r13", vex->guest_r13);
368   (*f)(tid, "r14", vex->guest_r14);
369   (*f)(tid, "r15", vex->guest_r15);
370   (*f)(tid, "r16", vex->guest_r16);
371   (*f)(tid, "r17", vex->guest_r17);
372   (*f)(tid, "r18", vex->guest_r18);
373   (*f)(tid, "r19", vex->guest_r19);
374   (*f)(tid, "r20", vex->guest_r20);
375   (*f)(tid, "r21", vex->guest_r21);
376   (*f)(tid, "r22", vex->guest_r22);
377   (*f)(tid, "r23", vex->guest_r23);
378   (*f)(tid, "r24", vex->guest_r24);
379   (*f)(tid, "r25", vex->guest_r25);
380   (*f)(tid, "r26", vex->guest_r26);
381   (*f)(tid, "r27", vex->guest_r27);
382   (*f)(tid, "r28", vex->guest_r28);
383   (*f)(tid, "r29", vex->guest_r29);
384   (*f)(tid, "r30", vex->guest_r30);
385   (*f)(tid, "r31", vex->guest_r31);
386   (*f)(tid, "r32", vex->guest_r32);
387   (*f)(tid, "r33", vex->guest_r33);
388   (*f)(tid, "r34", vex->guest_r34);
389   (*f)(tid, "r35", vex->guest_r35);
390   (*f)(tid, "r36", vex->guest_r36);
391   (*f)(tid, "r37", vex->guest_r37);
392   (*f)(tid, "r38", vex->guest_r38);
393   (*f)(tid, "r39", vex->guest_r39);
394   (*f)(tid, "r40", vex->guest_r40);
395   (*f)(tid, "r41", vex->guest_r41);
396   (*f)(tid, "r42", vex->guest_r42);
397   (*f)(tid, "r43", vex->guest_r43);
398   (*f)(tid, "r44", vex->guest_r44);
399   (*f)(tid, "r45", vex->guest_r45);
400   (*f)(tid, "r46", vex->guest_r46);
401   (*f)(tid, "r47", vex->guest_r47);
402   (*f)(tid, "r48", vex->guest_r48);
403   (*f)(tid, "r49", vex->guest_r49);
404   (*f)(tid, "r50", vex->guest_r50);
405   (*f)(tid, "r51", vex->guest_r51);
406   (*f)(tid, "r52", vex->guest_r52);
407   (*f)(tid, "r53", vex->guest_r53);
408   (*f)(tid, "r54", vex->guest_r54);
409   (*f)(tid, "r55", vex->guest_r55);
410#else
411#  error Unknown arch
412#endif
413}
414
415
416void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
417{
418   ThreadId tid;
419
420   for (tid = 1; tid < VG_N_THREADS; tid++) {
421      if (VG_(is_valid_tid)(tid)
422          || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
423         // live thread or thread instructed to die by another thread that
424         // called exit.
425         apply_to_GPs_of_tid(tid, f);
426      }
427   }
428}
429
430void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
431{
432   *tid = (ThreadId)(-1);
433}
434
435Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
436                            /*OUT*/Addr* stack_min,
437                            /*OUT*/Addr* stack_max)
438{
439   ThreadId i;
440   for (i = (*tid)+1; i < VG_N_THREADS; i++) {
441      if (i == VG_INVALID_THREADID)
442         continue;
443      if (VG_(threads)[i].status != VgTs_Empty) {
444         *tid       = i;
445         *stack_min = VG_(get_SP)(i);
446         *stack_max = VG_(threads)[i].client_stack_highest_byte;
447         return True;
448      }
449   }
450   return False;
451}
452
453Addr VG_(thread_get_stack_max)(ThreadId tid)
454{
455   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
456   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
457   return VG_(threads)[tid].client_stack_highest_byte;
458}
459
460SizeT VG_(thread_get_stack_size)(ThreadId tid)
461{
462   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
463   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
464   return VG_(threads)[tid].client_stack_szB;
465}
466
467Addr VG_(thread_get_altstack_min)(ThreadId tid)
468{
469   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
470   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
471   return (Addr)VG_(threads)[tid].altstack.ss_sp;
472}
473
474SizeT VG_(thread_get_altstack_size)(ThreadId tid)
475{
476   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
477   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
478   return VG_(threads)[tid].altstack.ss_size;
479}
480
481//-------------------------------------------------------------
482/* Details about the capabilities of the underlying (host) CPU.  These
483   details are acquired by (1) enquiring with the CPU at startup, or
484   (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
485   line size).  It's a bit nasty in the sense that there's no obvious
486   way to stop uses of some of this info before it's ready to go.
487   See pub_core_machine.h for more information about that.
488
489   VG_(machine_get_hwcaps) may use signals (although it attempts to
490   leave signal state unchanged) and therefore should only be
491   called before m_main sets up the client's signal state.
492*/
493
494/* --------- State --------- */
495static Bool hwcaps_done = False;
496
497/* --- all archs --- */
498static VexArch     va = VexArch_INVALID;
499static VexArchInfo vai;
500
501#if defined(VGA_x86)
502UInt VG_(machine_x86_have_mxcsr) = 0;
503#endif
504#if defined(VGA_ppc32)
505UInt VG_(machine_ppc32_has_FP)  = 0;
506UInt VG_(machine_ppc32_has_VMX) = 0;
507#endif
508#if defined(VGA_ppc64be) || defined(VGA_ppc64le)
509ULong VG_(machine_ppc64_has_VMX) = 0;
510#endif
511#if defined(VGA_arm)
512Int VG_(machine_arm_archlevel) = 4;
513#endif
514
515
516/* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
517   testing, so we need a VG_MINIMAL_JMP_BUF. */
518#if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
519    || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
520#include "pub_core_libcsetjmp.h"
521static VG_MINIMAL_JMP_BUF(env_unsup_insn);
522static void handler_unsup_insn ( Int x ) {
523   VG_MINIMAL_LONGJMP(env_unsup_insn);
524}
525#endif
526
527
528/* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
529 * handlers are installed.  Determines the sizes affected by dcbz
530 * and dcbzl instructions and updates the given VexArchInfo structure
531 * accordingly.
532 *
533 * Not very defensive: assumes that as long as the dcbz/dcbzl
534 * instructions don't raise a SIGILL, that they will zero an aligned,
535 * contiguous block of memory of a sensible size. */
536#if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
537static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
538{
539   Int dcbz_szB = 0;
540   Int dcbzl_szB;
541#  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
542   char test_block[4*MAX_DCBZL_SZB];
543   char *aligned = test_block;
544   Int i;
545
546   /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
547   aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
548   vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
549
550   /* dcbz often clears 32B, although sometimes whatever the native cache
551    * block size is */
552   VG_(memset)(test_block, 0xff, sizeof(test_block));
553   __asm__ __volatile__("dcbz 0,%0"
554                        : /*out*/
555                        : "r" (aligned) /*in*/
556                        : "memory" /*clobber*/);
557   for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
558      if (!test_block[i])
559         ++dcbz_szB;
560   }
561   vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
562
563   /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
564   if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
565      dcbzl_szB = 0; /* indicates unsupported */
566   }
567   else {
568      VG_(memset)(test_block, 0xff, sizeof(test_block));
569      /* some older assemblers won't understand the dcbzl instruction
570       * variant, so we directly emit the instruction ourselves */
571      __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
572                           : /*out*/
573                           : "r" (aligned) /*in*/
574                           : "memory", "r9" /*clobber*/);
575      for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
576         if (!test_block[i])
577            ++dcbzl_szB;
578      }
579      vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
580   }
581
582   arch_info->ppc_dcbz_szB  = dcbz_szB;
583   arch_info->ppc_dcbzl_szB = dcbzl_szB;
584
585   VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
586                 dcbz_szB, dcbzl_szB);
587#  undef MAX_DCBZL_SZB
588}
589#endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
590
591#ifdef VGA_s390x
592
593/* Read /proc/cpuinfo. Look for lines like these
594
595   processor 0: version = FF,  identification = 0117C9,  machine = 2064
596
597   and return the machine model. If the machine model could not be determined
598   or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
599
600static UInt VG_(get_machine_model)(void)
601{
602   static struct model_map {
603      const HChar name[5];
604      UInt  id;
605   } model_map[] = {
606      { "2064", VEX_S390X_MODEL_Z900 },
607      { "2066", VEX_S390X_MODEL_Z800 },
608      { "2084", VEX_S390X_MODEL_Z990 },
609      { "2086", VEX_S390X_MODEL_Z890 },
610      { "2094", VEX_S390X_MODEL_Z9_EC },
611      { "2096", VEX_S390X_MODEL_Z9_BC },
612      { "2097", VEX_S390X_MODEL_Z10_EC },
613      { "2098", VEX_S390X_MODEL_Z10_BC },
614      { "2817", VEX_S390X_MODEL_Z196 },
615      { "2818", VEX_S390X_MODEL_Z114 },
616      { "2827", VEX_S390X_MODEL_ZEC12 },
617      { "2828", VEX_S390X_MODEL_ZBC12 },
618      { "2964", VEX_S390X_MODEL_Z13 },
619   };
620
621   Int    model, n, fh;
622   SysRes fd;
623   SizeT  num_bytes, file_buf_size;
624   HChar *p, *m, *model_name, *file_buf;
625
626   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
627   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
628   if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
629
630   fh  = sr_Res(fd);
631
632   /* Determine the size of /proc/cpuinfo.
633      Work around broken-ness in /proc file system implementation.
634      fstat returns a zero size for /proc/cpuinfo although it is
635      claimed to be a regular file. */
636   num_bytes = 0;
637   file_buf_size = 1000;
638   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
639   while (42) {
640      n = VG_(read)(fh, file_buf, file_buf_size);
641      if (n < 0) break;
642
643      num_bytes += n;
644      if (n < file_buf_size) break;  /* reached EOF */
645   }
646
647   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
648
649   if (num_bytes > file_buf_size) {
650      VG_(free)( file_buf );
651      VG_(lseek)( fh, 0, VKI_SEEK_SET );
652      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
653      n = VG_(read)( fh, file_buf, num_bytes );
654      if (n < 0) num_bytes = 0;
655   }
656
657   file_buf[num_bytes] = '\0';
658   VG_(close)(fh);
659
660   /* Parse file */
661   model = VEX_S390X_MODEL_UNKNOWN;
662   for (p = file_buf; *p; ++p) {
663      /* Beginning of line */
664     if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
665
666     m = VG_(strstr)( p, "machine" );
667     if (m == NULL) continue;
668
669     p = m + sizeof "machine" - 1;
670     while ( VG_(isspace)( *p ) || *p == '=') {
671       if (*p == '\n') goto next_line;
672       ++p;
673     }
674
675     model_name = p;
676     for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
677       struct model_map *mm = model_map + n;
678       SizeT len = VG_(strlen)( mm->name );
679       if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
680            VG_(isspace)( model_name[len] )) {
681         if (mm->id < model) model = mm->id;
682         p = model_name + len;
683         break;
684       }
685     }
686     /* Skip until end-of-line */
687     while (*p != '\n')
688       ++p;
689   next_line: ;
690   }
691
692   VG_(free)( file_buf );
693   VG_(debugLog)(1, "machine", "model = %s\n",
694                 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
695                                                  : model_map[model].name);
696   return model;
697}
698
699#endif /* VGA_s390x */
700
701#if defined(VGA_mips32) || defined(VGA_mips64)
702
703/* Read /proc/cpuinfo and return the machine model. */
704static UInt VG_(get_machine_model)(void)
705{
706   const char *search_MIPS_str = "MIPS";
707   const char *search_Broadcom_str = "Broadcom";
708   const char *search_Netlogic_str = "Netlogic";
709   const char *search_Cavium_str= "Cavium";
710   Int    n, fh;
711   SysRes fd;
712   SizeT  num_bytes, file_buf_size;
713   HChar  *file_buf;
714
715   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
716   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
717   if ( sr_isError(fd) ) return -1;
718
719   fh  = sr_Res(fd);
720
721   /* Determine the size of /proc/cpuinfo.
722      Work around broken-ness in /proc file system implementation.
723      fstat returns a zero size for /proc/cpuinfo although it is
724      claimed to be a regular file. */
725   num_bytes = 0;
726   file_buf_size = 1000;
727   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
728   while (42) {
729      n = VG_(read)(fh, file_buf, file_buf_size);
730      if (n < 0) break;
731
732      num_bytes += n;
733      if (n < file_buf_size) break;  /* reached EOF */
734   }
735
736   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
737
738   if (num_bytes > file_buf_size) {
739      VG_(free)( file_buf );
740      VG_(lseek)( fh, 0, VKI_SEEK_SET );
741      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
742      n = VG_(read)( fh, file_buf, num_bytes );
743      if (n < 0) num_bytes = 0;
744   }
745
746   file_buf[num_bytes] = '\0';
747   VG_(close)(fh);
748
749   /* Parse file */
750   if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
751       return VEX_PRID_COMP_BROADCOM;
752   if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL)
753       return VEX_PRID_COMP_NETLOGIC;
754   if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
755       return VEX_PRID_COMP_CAVIUM;
756   if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
757       return VEX_PRID_COMP_MIPS;
758
759   /* Did not find string in the proc file. */
760   return -1;
761}
762
763#endif
764
765/* Determine what insn set and insn set variant the host has, and
766   record it.  To be called once at system startup.  Returns False if
767   this a CPU incapable of running Valgrind.
768   Also determine information about the caches on this host. */
769
770Bool VG_(machine_get_hwcaps)( void )
771{
772   vg_assert(hwcaps_done == False);
773   hwcaps_done = True;
774
775   // Whack default settings into vai, so that we only need to fill in
776   // any interesting bits.
777   LibVEX_default_VexArchInfo(&vai);
778
779#if defined(VGA_x86)
780   { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
781     UInt eax, ebx, ecx, edx, max_extended;
782     HChar vstr[13];
783     vstr[0] = 0;
784
785     if (!VG_(has_cpuid)())
786        /* we can't do cpuid at all.  Give up. */
787        return False;
788
789     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
790     if (eax < 1)
791        /* we can't ask for cpuid(x) for x > 0.  Give up. */
792        return False;
793
794     /* Get processor ID string, and max basic/extended index
795        values. */
796     VG_(memcpy)(&vstr[0], &ebx, 4);
797     VG_(memcpy)(&vstr[4], &edx, 4);
798     VG_(memcpy)(&vstr[8], &ecx, 4);
799     vstr[12] = 0;
800
801     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
802     max_extended = eax;
803
804     /* get capabilities bits into edx */
805     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
806
807     have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
808     have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
809     have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
810
811     /* cmpxchg8b is a minimum requirement now; if we don't have it we
812        must simply give up.  But all CPUs since Pentium-I have it, so
813        that doesn't seem like much of a restriction. */
814     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
815     if (!have_cx8)
816        return False;
817
818     /* Figure out if this is an AMD that can do MMXEXT. */
819     have_mmxext = False;
820     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
821         && max_extended >= 0x80000001) {
822        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
823        /* Some older AMD processors support a sse1 subset (Integer SSE). */
824        have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
825     }
826
827     /* Figure out if this is an AMD or Intel that can do LZCNT. */
828     have_lzcnt = False;
829     if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
830          || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
831         && max_extended >= 0x80000001) {
832        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
833        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
834     }
835
836     /* Intel processors don't define the mmxext extension, but since it
837        is just a sse1 subset always define it when we have sse1. */
838     if (have_sse1)
839        have_mmxext = True;
840
841     va = VexArchX86;
842     vai.endness = VexEndnessLE;
843
844     if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
845        vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
846        vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
847        vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
848        vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
849        if (have_lzcnt)
850           vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
851        VG_(machine_x86_have_mxcsr) = 1;
852     } else if (have_sse2 && have_sse1 && have_mmxext) {
853        vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
854        vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
855        vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
856        if (have_lzcnt)
857           vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
858        VG_(machine_x86_have_mxcsr) = 1;
859     } else if (have_sse1 && have_mmxext) {
860        vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
861        vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
862        VG_(machine_x86_have_mxcsr) = 1;
863     } else if (have_mmxext) {
864        vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
865        VG_(machine_x86_have_mxcsr) = 0;
866     } else {
867       vai.hwcaps = 0; /*baseline - no sse at all*/
868       VG_(machine_x86_have_mxcsr) = 0;
869     }
870
871     VG_(machine_get_cache_info)(&vai);
872
873     return True;
874   }
875
876#elif defined(VGA_amd64)
877   { Bool have_sse3, have_cx8, have_cx16;
878     Bool have_lzcnt, have_avx, have_bmi, have_avx2;
879     Bool have_rdtscp;
880     UInt eax, ebx, ecx, edx, max_basic, max_extended;
881     ULong xgetbv_0 = 0;
882     HChar vstr[13];
883     vstr[0] = 0;
884
885     if (!VG_(has_cpuid)())
886        /* we can't do cpuid at all.  Give up. */
887        return False;
888
889     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
890     max_basic = eax;
891     if (max_basic < 1)
892        /* we can't ask for cpuid(x) for x > 0.  Give up. */
893        return False;
894
895     /* Get processor ID string, and max basic/extended index
896        values. */
897     VG_(memcpy)(&vstr[0], &ebx, 4);
898     VG_(memcpy)(&vstr[4], &edx, 4);
899     VG_(memcpy)(&vstr[8], &ecx, 4);
900     vstr[12] = 0;
901
902     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
903     max_extended = eax;
904
905     /* get capabilities bits into edx */
906     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
907
908     // we assume that SSE1 and SSE2 are available by default
909     have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
910     // ssse3   is ecx:9
911     // sse41   is ecx:19
912     // sse42   is ecx:20
913
914     // xsave   is ecx:26
915     // osxsave is ecx:27
916     // avx     is ecx:28
917     // fma     is ecx:12
918     have_avx = False;
919     /* have_fma = False; */
920     if ( (ecx & ((1<<28)|(1<<27)|(1<<26))) == ((1<<28)|(1<<27)|(1<<26)) ) {
921        /* Processor supports AVX instructions and XGETBV is enabled
922           by OS and AVX instructions are enabled by the OS. */
923        ULong w;
924        __asm__ __volatile__("movq $0,%%rcx ; "
925                             ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
926                             "movq %%rax,%0"
927                             :/*OUT*/"=r"(w) :/*IN*/
928                             :/*TRASH*/"rdx","rcx","rax");
929        xgetbv_0 = w;
930        if ((xgetbv_0 & 7) == 7) {
931           /* Only say we have AVX if the XSAVE-allowable
932              bitfield-mask allows x87, SSE and AVX state.  We could
933              actually run with a more restrictive XGETBV(0) value,
934              but VEX's implementation of XSAVE and XRSTOR assumes
935              that all 3 bits are enabled.
936
937              Also, the VEX implementation of XSAVE/XRSTOR assumes that
938              state component [2] (the YMM high halves) are located in
939              the XSAVE image at offsets 576 .. 831.  So we have to
940              check that here before declaring AVX to be supported. */
941           UInt eax2, ebx2, ecx2, edx2;
942           VG_(cpuid)(0xD, 2, &eax2, &ebx2, &ecx2, &edx2);
943           if (ebx2 == 576 && eax2 == 256) {
944              have_avx = True;
945           }
946           /* have_fma = (ecx & (1<<12)) != 0; */
947           /* have_fma: Probably correct, but gcc complains due to
948              unusedness. */
949        }
950     }
951
952     /* cmpxchg8b is a minimum requirement now; if we don't have it we
953        must simply give up.  But all CPUs since Pentium-I have it, so
954        that doesn't seem like much of a restriction. */
955     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
956     if (!have_cx8)
957        return False;
958
959     /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
960     have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
961
962     /* Figure out if this CPU can do LZCNT. */
963     have_lzcnt = False;
964     if (max_extended >= 0x80000001) {
965        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
966        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
967     }
968
969     /* Can we do RDTSCP? */
970     have_rdtscp = False;
971     if (max_extended >= 0x80000001) {
972        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
973        have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
974     }
975
976     /* Check for BMI1 and AVX2.  If we have AVX1 (plus OS support). */
977     have_bmi  = False;
978     have_avx2 = False;
979     if (have_avx && max_basic >= 7) {
980        VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
981        have_bmi  = (ebx & (1<<3)) != 0; /* True => have BMI1 */
982        have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
983     }
984
985     va          = VexArchAMD64;
986     vai.endness = VexEndnessLE;
987     vai.hwcaps  = (have_sse3   ? VEX_HWCAPS_AMD64_SSE3   : 0)
988                 | (have_cx16   ? VEX_HWCAPS_AMD64_CX16   : 0)
989                 | (have_lzcnt  ? VEX_HWCAPS_AMD64_LZCNT  : 0)
990                 | (have_avx    ? VEX_HWCAPS_AMD64_AVX    : 0)
991                 | (have_bmi    ? VEX_HWCAPS_AMD64_BMI    : 0)
992                 | (have_avx2   ? VEX_HWCAPS_AMD64_AVX2   : 0)
993                 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
994
995     VG_(machine_get_cache_info)(&vai);
996
997     return True;
998   }
999
1000#elif defined(VGA_ppc32)
1001   {
1002     /* Find out which subset of the ppc32 instruction set is supported by
1003        verifying whether various ppc32 instructions generate a SIGILL
1004        or a SIGFPE. An alternative approach is to check the AT_HWCAP and
1005        AT_PLATFORM entries in the ELF auxiliary table -- see also
1006        the_iifii.client_auxv in m_main.c.
1007      */
1008     vki_sigset_t          saved_set, tmp_set;
1009     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1010     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1011
1012     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1013     volatile Bool have_isa_2_07;
1014     Int r;
1015
1016     /* This is a kludge.  Really we ought to back-convert saved_act
1017        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1018        since that's a no-op on all ppc32 platforms so far supported,
1019        it's not worth the typing effort.  At least include most basic
1020        sanity check: */
1021     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1022
1023     VG_(sigemptyset)(&tmp_set);
1024     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1025     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1026
1027     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1028     vg_assert(r == 0);
1029
1030     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1031     vg_assert(r == 0);
1032     tmp_sigill_act = saved_sigill_act;
1033
1034     r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1035     vg_assert(r == 0);
1036     tmp_sigfpe_act = saved_sigfpe_act;
1037
1038     /* NODEFER: signal handler does not return (from the kernel's point of
1039        view), hence if it is to successfully catch a signal more than once,
1040        we need the NODEFER flag. */
1041     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1042     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1043     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1044     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1045     r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1046     vg_assert(r == 0);
1047
1048     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1049     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1050     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1051     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1052     r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1053     vg_assert(r == 0);
1054
1055     /* standard FP insns */
1056     have_F = True;
1057     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1058        have_F = False;
1059     } else {
1060        __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
1061     }
1062
1063     /* Altivec insns */
1064     have_V = True;
1065     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1066        have_V = False;
1067     } else {
1068        /* Unfortunately some older assemblers don't speak Altivec (or
1069           choose not to), so to be safe we directly emit the 32-bit
1070           word corresponding to "vor 0,0,0".  This fixes a build
1071           problem that happens on Debian 3.1 (ppc32), and probably
1072           various other places. */
1073        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1074     }
1075
1076     /* General-Purpose optional (fsqrt, fsqrts) */
1077     have_FX = True;
1078     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1079        have_FX = False;
1080     } else {
1081        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1082     }
1083
1084     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1085     have_GX = True;
1086     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1087        have_GX = False;
1088     } else {
1089        __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1090     }
1091
1092     /* VSX support implies Power ISA 2.06 */
1093     have_VX = True;
1094     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1095        have_VX = False;
1096     } else {
1097        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1098     }
1099
1100     /* Check for Decimal Floating Point (DFP) support. */
1101     have_DFP = True;
1102     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1103        have_DFP = False;
1104     } else {
1105        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1106     }
1107
1108     /* Check for ISA 2.07 support. */
1109     have_isa_2_07 = True;
1110     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1111        have_isa_2_07 = False;
1112     } else {
1113        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1114     }
1115
1116     /* determine dcbz/dcbzl sizes while we still have the signal
1117      * handlers registered */
1118     find_ppc_dcbz_sz(&vai);
1119
1120     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1121     vg_assert(r == 0);
1122     r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1123     vg_assert(r == 0);
1124     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1125     vg_assert(r == 0);
1126     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1127                    (Int)have_F, (Int)have_V, (Int)have_FX,
1128                    (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1129                    (Int)have_isa_2_07);
1130     /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1131     if (have_V && !have_F)
1132        have_V = False;
1133     if (have_FX && !have_F)
1134        have_FX = False;
1135     if (have_GX && !have_F)
1136        have_GX = False;
1137
1138     VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
1139     VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1140
1141     va = VexArchPPC32;
1142     vai.endness = VexEndnessBE;
1143
1144     vai.hwcaps = 0;
1145     if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1146     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1147     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1148     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1149     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1150     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1151     if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1152
1153     VG_(machine_get_cache_info)(&vai);
1154
1155     /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1156        called before we're ready to go. */
1157     return True;
1158   }
1159
1160#elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1161   {
1162     /* Same instruction set detection algorithm as for ppc32. */
1163     vki_sigset_t          saved_set, tmp_set;
1164     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1165     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1166
1167     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1168     volatile Bool have_isa_2_07;
1169     Int r;
1170
1171     /* This is a kludge.  Really we ought to back-convert saved_act
1172        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1173        since that's a no-op on all ppc64 platforms so far supported,
1174        it's not worth the typing effort.  At least include most basic
1175        sanity check: */
1176     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1177
1178     VG_(sigemptyset)(&tmp_set);
1179     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1180     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1181
1182     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1183     vg_assert(r == 0);
1184
1185     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1186     vg_assert(r == 0);
1187     tmp_sigill_act = saved_sigill_act;
1188
1189     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1190     tmp_sigfpe_act = saved_sigfpe_act;
1191
1192     /* NODEFER: signal handler does not return (from the kernel's point of
1193        view), hence if it is to successfully catch a signal more than once,
1194        we need the NODEFER flag. */
1195     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1196     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1197     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1198     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1199     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1200
1201     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1202     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1203     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1204     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1205     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1206
1207     /* standard FP insns */
1208     have_F = True;
1209     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1210        have_F = False;
1211     } else {
1212        __asm__ __volatile__("fmr 0,0");
1213     }
1214
1215     /* Altivec insns */
1216     have_V = True;
1217     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1218        have_V = False;
1219     } else {
1220        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1221     }
1222
1223     /* General-Purpose optional (fsqrt, fsqrts) */
1224     have_FX = True;
1225     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1226        have_FX = False;
1227     } else {
1228        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1229     }
1230
1231     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1232     have_GX = True;
1233     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1234        have_GX = False;
1235     } else {
1236        __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1237     }
1238
1239     /* VSX support implies Power ISA 2.06 */
1240     have_VX = True;
1241     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1242        have_VX = False;
1243     } else {
1244        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1245     }
1246
1247     /* Check for Decimal Floating Point (DFP) support. */
1248     have_DFP = True;
1249     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1250        have_DFP = False;
1251     } else {
1252        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1253     }
1254
1255     /* Check for ISA 2.07 support. */
1256     have_isa_2_07 = True;
1257     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1258        have_isa_2_07 = False;
1259     } else {
1260        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1261     }
1262
1263     /* determine dcbz/dcbzl sizes while we still have the signal
1264      * handlers registered */
1265     find_ppc_dcbz_sz(&vai);
1266
1267     VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1268     VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1269     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1270     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1271                    (Int)have_F, (Int)have_V, (Int)have_FX,
1272                    (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1273                    (Int)have_isa_2_07);
1274     /* on ppc64be, if we don't even have FP, just give up. */
1275     if (!have_F)
1276        return False;
1277
1278     VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1279
1280     va = VexArchPPC64;
1281#    if defined(VKI_LITTLE_ENDIAN)
1282     vai.endness = VexEndnessLE;
1283#    elif defined(VKI_BIG_ENDIAN)
1284     vai.endness = VexEndnessBE;
1285#    else
1286     vai.endness = VexEndness_INVALID;
1287#    endif
1288
1289     vai.hwcaps = 0;
1290     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1291     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1292     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1293     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1294     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1295     if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1296
1297     VG_(machine_get_cache_info)(&vai);
1298
1299     /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1300        called before we're ready to go. */
1301     return True;
1302   }
1303
1304#elif defined(VGA_s390x)
1305
1306#  include "libvex_s390x_common.h"
1307
1308   {
1309     /* Instruction set detection code borrowed from ppc above. */
1310     vki_sigset_t          saved_set, tmp_set;
1311     vki_sigaction_fromK_t saved_sigill_act;
1312     vki_sigaction_toK_t     tmp_sigill_act;
1313
1314     volatile Bool have_LDISP, have_STFLE;
1315     Int i, r, model;
1316
1317     /* If the model is "unknown" don't treat this as an error. Assume
1318        this is a brand-new machine model for which we don't have the
1319        identification yet. Keeping fingers crossed. */
1320     model = VG_(get_machine_model)();
1321
1322     /* Unblock SIGILL and stash away the old action for that signal */
1323     VG_(sigemptyset)(&tmp_set);
1324     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1325
1326     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1327     vg_assert(r == 0);
1328
1329     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1330     vg_assert(r == 0);
1331     tmp_sigill_act = saved_sigill_act;
1332
1333     /* NODEFER: signal handler does not return (from the kernel's point of
1334        view), hence if it is to successfully catch a signal more than once,
1335        we need the NODEFER flag. */
1336     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1337     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1338     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1339     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1340     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1341
1342     /* Determine hwcaps. Note, we cannot use the stfle insn because it
1343        is not supported on z900. */
1344
1345     have_LDISP = True;
1346     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1347        have_LDISP = False;
1348     } else {
1349       /* BASR loads the address of the next insn into r1. Needed to avoid
1350          a segfault in XY. */
1351        __asm__ __volatile__("basr %%r1,%%r0\n\t"
1352                             ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
1353                             ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1354     }
1355
1356     /* Check availability of STFLE. If available store facility bits
1357        in hoststfle. */
1358     ULong hoststfle[S390_NUM_FACILITY_DW];
1359
1360     for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1361        hoststfle[i] = 0;
1362
1363     have_STFLE = True;
1364     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1365        have_STFLE = False;
1366     } else {
1367         register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1368
1369         __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
1370                              : "=m" (hoststfle), "+d"(reg0)
1371                              : : "cc", "memory");
1372     }
1373
1374     /* Restore signals */
1375     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1376     vg_assert(r == 0);
1377     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1378     vg_assert(r == 0);
1379     va = VexArchS390X;
1380     vai.endness = VexEndnessBE;
1381
1382     vai.hwcaps = model;
1383     if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1384     if (have_LDISP) {
1385        /* Use long displacement only on machines >= z990. For all other
1386           machines it is millicoded and therefore slow. */
1387        if (model >= VEX_S390X_MODEL_Z990)
1388           vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1389     }
1390
1391     /* Detect presence of certain facilities using the STFLE insn.
1392        Note, that these facilities were introduced at the same time or later
1393        as STFLE, so the absence of STLFE implies the absence of the facility
1394        we're trying to detect. */
1395     struct fac_hwcaps_map {
1396        UInt installed;
1397        UInt facility_bit;
1398        UInt hwcaps_bit;
1399        const HChar name[6];   // may need adjustment for new facility names
1400     } fac_hwcaps[] = {
1401        { False, S390_FAC_EIMM,  VEX_HWCAPS_S390X_EIMM,  "EIMM"  },
1402        { False, S390_FAC_GIE,   VEX_HWCAPS_S390X_GIE,   "GIE"   },
1403        { False, S390_FAC_DFP,   VEX_HWCAPS_S390X_DFP,   "DFP"   },
1404        { False, S390_FAC_FPSE,  VEX_HWCAPS_S390X_FGX,   "FGX"   },
1405        { False, S390_FAC_ETF2,  VEX_HWCAPS_S390X_ETF2,  "ETF2"  },
1406        { False, S390_FAC_ETF3,  VEX_HWCAPS_S390X_ETF3,  "ETF3"  },
1407        { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1408        { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1409        { False, S390_FAC_LSC,   VEX_HWCAPS_S390X_LSC,   "LSC"   },
1410        { False, S390_FAC_PFPO,  VEX_HWCAPS_S390X_PFPO,  "PFPO"  },
1411     };
1412
1413     /* Set hwcaps according to the detected facilities */
1414     for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1415        vg_assert(fac_hwcaps[i].facility_bit <= 63);  // for now
1416        if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
1417           fac_hwcaps[i].installed = True;
1418           vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1419        }
1420     }
1421
1422     /* Build up a string showing the probed-for facilities */
1423     HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1424                   (sizeof fac_hwcaps[0].name + 3) + //  %s %d
1425                   7 + 1 + 4 + 2  // machine %4d
1426                   + 1];  // \0
1427     HChar *p = fac_str;
1428     p += VG_(sprintf)(p, "machine %4d  ", model);
1429     for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1430        p += VG_(sprintf)(p, " %s %1u", fac_hwcaps[i].name,
1431                          fac_hwcaps[i].installed);
1432     }
1433     *p++ = '\0';
1434
1435     VG_(debugLog)(1, "machine", "%s\n", fac_str);
1436     VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1437
1438     VG_(machine_get_cache_info)(&vai);
1439
1440     return True;
1441   }
1442
1443#elif defined(VGA_arm)
1444   {
1445     /* Same instruction set detection algorithm as for ppc32. */
1446     vki_sigset_t          saved_set, tmp_set;
1447     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1448     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1449
1450     volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1451     volatile Int archlevel;
1452     Int r;
1453
1454     /* This is a kludge.  Really we ought to back-convert saved_act
1455        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1456        since that's a no-op on all ppc64 platforms so far supported,
1457        it's not worth the typing effort.  At least include most basic
1458        sanity check: */
1459     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1460
1461     VG_(sigemptyset)(&tmp_set);
1462     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1463     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1464
1465     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1466     vg_assert(r == 0);
1467
1468     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1469     vg_assert(r == 0);
1470     tmp_sigill_act = saved_sigill_act;
1471
1472     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1473     tmp_sigfpe_act = saved_sigfpe_act;
1474
1475     /* NODEFER: signal handler does not return (from the kernel's point of
1476        view), hence if it is to successfully catch a signal more than once,
1477        we need the NODEFER flag. */
1478     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1479     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1480     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1481     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1482     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1483
1484     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1485     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1486     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1487     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1488     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1489
1490     /* VFP insns */
1491     have_VFP = True;
1492     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1493        have_VFP = False;
1494     } else {
1495        __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1496     }
1497     /* There are several generation of VFP extension but they differs very
1498        little so for now we will not distinguish them. */
1499     have_VFP2 = have_VFP;
1500     have_VFP3 = have_VFP;
1501
1502     /* NEON insns */
1503     have_NEON = True;
1504     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1505        have_NEON = False;
1506     } else {
1507        __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1508     }
1509
1510     /* ARM architecture level */
1511     archlevel = 5; /* v5 will be base level */
1512     if (archlevel < 7) {
1513        archlevel = 7;
1514        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1515           archlevel = 5;
1516        } else {
1517           __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1518        }
1519     }
1520     if (archlevel < 6) {
1521        archlevel = 6;
1522        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1523           archlevel = 5;
1524        } else {
1525           __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1526        }
1527     }
1528
1529     VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1530     VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1531     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1532     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1533     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1534
1535     VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1536           archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1537           (Int)have_NEON);
1538
1539     VG_(machine_arm_archlevel) = archlevel;
1540
1541     va = VexArchARM;
1542     vai.endness = VexEndnessLE;
1543
1544     vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1545     if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1546     if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1547     if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1548     if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1549
1550     VG_(machine_get_cache_info)(&vai);
1551
1552     return True;
1553   }
1554
1555#elif defined(VGA_arm64)
1556   {
1557     va = VexArchARM64;
1558     vai.endness = VexEndnessLE;
1559
1560     /* So far there are no variants. */
1561     vai.hwcaps = 0;
1562
1563     VG_(machine_get_cache_info)(&vai);
1564
1565     /* 0 denotes 'not set'.  The range of legitimate values here,
1566        after being set that is, is 2 though 17 inclusive. */
1567     vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1568     vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1569     ULong ctr_el0;
1570     __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1571     vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1572     vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >>  0) & 0xF) + 2;
1573     VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1574                      "ctr_el0.iMinLine_szB = %d\n",
1575                   1 << vai.arm64_dMinLine_lg2_szB,
1576                   1 << vai.arm64_iMinLine_lg2_szB);
1577
1578     return True;
1579   }
1580
1581#elif defined(VGA_mips32)
1582   {
1583     /* Define the position of F64 bit in FIR register. */
1584#    define FP64 22
1585     va = VexArchMIPS32;
1586     UInt model = VG_(get_machine_model)();
1587     if (model == -1)
1588         return False;
1589
1590     vai.hwcaps = model;
1591
1592#    if defined(VKI_LITTLE_ENDIAN)
1593     vai.endness = VexEndnessLE;
1594#    elif defined(VKI_BIG_ENDIAN)
1595     vai.endness = VexEndnessBE;
1596#    else
1597     vai.endness = VexEndness_INVALID;
1598#    endif
1599
1600     /* Same instruction set detection algorithm as for ppc32/arm... */
1601     vki_sigset_t          saved_set, tmp_set;
1602     vki_sigaction_fromK_t saved_sigill_act;
1603     vki_sigaction_toK_t   tmp_sigill_act;
1604
1605     volatile Bool have_DSP, have_DSPr2;
1606     Int r;
1607
1608     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1609
1610     VG_(sigemptyset)(&tmp_set);
1611     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1612
1613     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1614     vg_assert(r == 0);
1615
1616     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1617     vg_assert(r == 0);
1618     tmp_sigill_act = saved_sigill_act;
1619
1620     /* NODEFER: signal handler does not return (from the kernel's point of
1621        view), hence if it is to successfully catch a signal more than once,
1622        we need the NODEFER flag. */
1623     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1624     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1625     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1626     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1627     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1628
1629     if (model == VEX_PRID_COMP_MIPS) {
1630        /* DSPr2 instructions. */
1631        have_DSPr2 = True;
1632        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1633           have_DSPr2 = False;
1634        } else {
1635           __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1636        }
1637        if (have_DSPr2) {
1638           /* We assume it's 74K, since it can run DSPr2. */
1639           vai.hwcaps |= VEX_PRID_IMP_74K;
1640        } else {
1641           /* DSP instructions. */
1642           have_DSP = True;
1643           if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1644              have_DSP = False;
1645           } else {
1646              __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1647           }
1648           if (have_DSP) {
1649              /* We assume it's 34K, since it has support for DSP. */
1650              vai.hwcaps |= VEX_PRID_IMP_34K;
1651           }
1652        }
1653     }
1654
1655     /* Check if CPU has FPU and 32 dbl. prec. FP registers */
1656     int FIR = 0;
1657     __asm__ __volatile__(
1658        "cfc1 %0, $0"  "\n\t"
1659        : "=r" (FIR)
1660     );
1661     if (FIR & (1 << FP64)) {
1662        vai.hwcaps |= VEX_PRID_CPU_32FPR;
1663     }
1664
1665     VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1666     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1667     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1668
1669     VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1670     VG_(machine_get_cache_info)(&vai);
1671
1672     return True;
1673   }
1674
1675#elif defined(VGA_mips64)
1676   {
1677     va = VexArchMIPS64;
1678     UInt model = VG_(get_machine_model)();
1679     if (model == -1)
1680         return False;
1681
1682     vai.hwcaps = model;
1683
1684#    if defined(VKI_LITTLE_ENDIAN)
1685     vai.endness = VexEndnessLE;
1686#    elif defined(VKI_BIG_ENDIAN)
1687     vai.endness = VexEndnessBE;
1688#    else
1689     vai.endness = VexEndness_INVALID;
1690#    endif
1691
1692     VG_(machine_get_cache_info)(&vai);
1693
1694     return True;
1695   }
1696
1697#elif defined(VGA_tilegx)
1698   {
1699     va = VexArchTILEGX;
1700     vai.hwcaps = VEX_HWCAPS_TILEGX_BASE;
1701     vai.endness = VexEndnessLE;
1702
1703     VG_(machine_get_cache_info)(&vai);
1704
1705     return True;
1706   }
1707
1708#else
1709#  error "Unknown arch"
1710#endif
1711}
1712
1713/* Notify host cpu instruction cache line size. */
1714#if defined(VGA_ppc32)
1715void VG_(machine_ppc32_set_clszB)( Int szB )
1716{
1717   vg_assert(hwcaps_done);
1718
1719   /* Either the value must not have been set yet (zero) or we can
1720      tolerate it being set to the same value multiple times, as the
1721      stack scanning logic in m_main is a bit stupid. */
1722   vg_assert(vai.ppc_icache_line_szB == 0
1723             || vai.ppc_icache_line_szB == szB);
1724
1725   vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1726   vai.ppc_icache_line_szB = szB;
1727}
1728#endif
1729
1730
1731/* Notify host cpu instruction cache line size. */
1732#if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1733void VG_(machine_ppc64_set_clszB)( Int szB )
1734{
1735   vg_assert(hwcaps_done);
1736
1737   /* Either the value must not have been set yet (zero) or we can
1738      tolerate it being set to the same value multiple times, as the
1739      stack scanning logic in m_main is a bit stupid. */
1740   vg_assert(vai.ppc_icache_line_szB == 0
1741             || vai.ppc_icache_line_szB == szB);
1742
1743   vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1744   vai.ppc_icache_line_szB = szB;
1745}
1746#endif
1747
1748
1749/* Notify host's ability to handle NEON instructions. */
1750#if defined(VGA_arm)
1751void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1752{
1753   vg_assert(hwcaps_done);
1754   /* There's nothing else we can sanity check. */
1755
1756   if (has_neon) {
1757      vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1758   } else {
1759      vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1760   }
1761}
1762#endif
1763
1764
1765/* Fetch host cpu info, once established. */
1766void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1767                                   /*OUT*/VexArchInfo* pVai )
1768{
1769   vg_assert(hwcaps_done);
1770   if (pVa)  *pVa  = va;
1771   if (pVai) *pVai = vai;
1772}
1773
1774
1775/* Returns the size of the largest guest register that we will
1776   simulate in this run.  This depends on both the guest architecture
1777   and on the specific capabilities we are simulating for that guest
1778   (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
1779   or 32.  General rule: if in doubt, return a value larger than
1780   reality.
1781
1782   This information is needed by Cachegrind and Callgrind to decide
1783   what the minimum cache line size they are prepared to simulate is.
1784   Basically require that the minimum cache line size is at least as
1785   large as the largest register that might get transferred to/from
1786   memory, so as to guarantee that any such transaction can straddle
1787   at most 2 cache lines.
1788*/
1789Int VG_(machine_get_size_of_largest_guest_register) ( void )
1790{
1791   vg_assert(hwcaps_done);
1792   /* Once hwcaps_done is True, we can fish around inside va/vai to
1793      find the information we need. */
1794
1795#  if defined(VGA_x86)
1796   vg_assert(va == VexArchX86);
1797   /* We don't support AVX, so 32 is out.  At the other end, even if
1798      we don't support any SSE, the X87 can generate 10 byte
1799      transfers, so let's say 16 to be on the safe side.  Hence the
1800      answer is always 16. */
1801   return 16;
1802
1803#  elif defined(VGA_amd64)
1804   /* if AVX then 32 else 16 */
1805   return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1806
1807#  elif defined(VGA_ppc32)
1808   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1809   if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1810   if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1811   if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1812   return 8;
1813
1814#  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1815   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1816   if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1817   if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1818   if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1819   return 8;
1820
1821#  elif defined(VGA_s390x)
1822   return 8;
1823
1824#  elif defined(VGA_arm)
1825   /* Really it depends whether or not we have NEON, but let's just
1826      assume we always do. */
1827   return 16;
1828
1829#  elif defined(VGA_arm64)
1830   /* ARM64 always has Neon, AFAICS. */
1831   return 16;
1832
1833#  elif defined(VGA_mips32)
1834   /* The guest state implies 4, but that can't really be true, can
1835      it? */
1836   return 8;
1837
1838#  elif defined(VGA_mips64)
1839   return 8;
1840
1841#  elif defined(VGA_tilegx)
1842   return 8;
1843
1844#  else
1845#    error "Unknown arch"
1846#  endif
1847}
1848
1849
1850// Given a pointer to a function as obtained by "& functionname" in C,
1851// produce a pointer to the actual entry point for the function.
1852void* VG_(fnptr_to_fnentry)( void* f )
1853{
1854#  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1855      || defined(VGP_arm_linux) || defined(VGO_darwin)          \
1856      || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
1857      || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
1858      || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) \
1859      || defined(VGP_tilegx_linux) || defined(VGP_x86_solaris) \
1860      || defined(VGP_amd64_solaris)
1861   return f;
1862#  elif defined(VGP_ppc64be_linux)
1863   /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1864      3-word function descriptor, of which the first word is the entry
1865      address. */
1866   UWord* descr = (UWord*)f;
1867   return (void*)(descr[0]);
1868#  else
1869#    error "Unknown platform"
1870#  endif
1871}
1872
1873/*--------------------------------------------------------------------*/
1874/*--- end                                                          ---*/
1875/*--------------------------------------------------------------------*/
1876