m_machine.c revision 732fb4e38991c4f0cef40c5c7e210010360d05eb
1/*--------------------------------------------------------------------*/
2/*--- Machine-related stuff.                           m_machine.c ---*/
3/*--------------------------------------------------------------------*/
4
5/*
6   This file is part of Valgrind, a dynamic binary instrumentation
7   framework.
8
9   Copyright (C) 2000-2013 Julian Seward
10      jseward@acm.org
11
12   This program is free software; you can redistribute it and/or
13   modify it under the terms of the GNU General Public License as
14   published by the Free Software Foundation; either version 2 of the
15   License, or (at your option) any later version.
16
17   This program is distributed in the hope that it will be useful, but
18   WITHOUT ANY WARRANTY; without even the implied warranty of
19   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20   General Public License for more details.
21
22   You should have received a copy of the GNU General Public License
23   along with this program; if not, write to the Free Software
24   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25   02111-1307, USA.
26
27   The GNU General Public License is contained in the file COPYING.
28*/
29
30#include "pub_core_basics.h"
31#include "pub_core_vki.h"
32#include "pub_core_libcsetjmp.h"   // setjmp facilities
33#include "pub_core_threadstate.h"
34#include "pub_core_libcassert.h"
35#include "pub_core_libcbase.h"
36#include "pub_core_libcfile.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_mallocfree.h"
39#include "pub_core_machine.h"
40#include "pub_core_cpuid.h"
41#include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
42#include "pub_core_debuglog.h"
43
44
45#define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
46#define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
47#define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
48
49Addr VG_(get_IP) ( ThreadId tid ) {
50   return INSTR_PTR( VG_(threads)[tid].arch );
51}
52Addr VG_(get_SP) ( ThreadId tid ) {
53   return STACK_PTR( VG_(threads)[tid].arch );
54}
55Addr VG_(get_FP) ( ThreadId tid ) {
56   return FRAME_PTR( VG_(threads)[tid].arch );
57}
58
59void VG_(set_IP) ( ThreadId tid, Addr ip ) {
60   INSTR_PTR( VG_(threads)[tid].arch ) = ip;
61}
62void VG_(set_SP) ( ThreadId tid, Addr sp ) {
63   STACK_PTR( VG_(threads)[tid].arch ) = sp;
64}
65
66void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
67                                ThreadId tid )
68{
69#  if defined(VGA_x86)
70   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
71   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
72   regs->misc.X86.r_ebp
73      = VG_(threads)[tid].arch.vex.guest_EBP;
74#  elif defined(VGA_amd64)
75   regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
76   regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
77   regs->misc.AMD64.r_rbp
78      = VG_(threads)[tid].arch.vex.guest_RBP;
79#  elif defined(VGA_ppc32)
80   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
81   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
82   regs->misc.PPC32.r_lr
83      = VG_(threads)[tid].arch.vex.guest_LR;
84#  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
85   regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
86   regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
87   regs->misc.PPC64.r_lr
88      = VG_(threads)[tid].arch.vex.guest_LR;
89#  elif defined(VGA_arm)
90   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
91   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
92   regs->misc.ARM.r14
93      = VG_(threads)[tid].arch.vex.guest_R14;
94   regs->misc.ARM.r12
95      = VG_(threads)[tid].arch.vex.guest_R12;
96   regs->misc.ARM.r11
97      = VG_(threads)[tid].arch.vex.guest_R11;
98   regs->misc.ARM.r7
99      = VG_(threads)[tid].arch.vex.guest_R7;
100#  elif defined(VGA_arm64)
101   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
102   regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
103   regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
104   regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
105#  elif defined(VGA_s390x)
106   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
107   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
108   regs->misc.S390X.r_fp
109      = VG_(threads)[tid].arch.vex.guest_r11;
110   regs->misc.S390X.r_lr
111      = VG_(threads)[tid].arch.vex.guest_r14;
112#  elif defined(VGA_mips32)
113   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
114   regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
115   regs->misc.MIPS32.r30
116      = VG_(threads)[tid].arch.vex.guest_r30;
117   regs->misc.MIPS32.r31
118      = VG_(threads)[tid].arch.vex.guest_r31;
119   regs->misc.MIPS32.r28
120      = VG_(threads)[tid].arch.vex.guest_r28;
121#  elif defined(VGA_mips64)
122   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
123   regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
124   regs->misc.MIPS64.r30
125      = VG_(threads)[tid].arch.vex.guest_r30;
126   regs->misc.MIPS64.r31
127      = VG_(threads)[tid].arch.vex.guest_r31;
128   regs->misc.MIPS64.r28
129      = VG_(threads)[tid].arch.vex.guest_r28;
130#  else
131#    error "Unknown arch"
132#  endif
133}
134
135void
136VG_(get_shadow_regs_area) ( ThreadId tid,
137                            /*DST*/UChar* dst,
138                            /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
139{
140   void*        src;
141   ThreadState* tst;
142   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
143   vg_assert(VG_(is_valid_tid)(tid));
144   // Bounds check
145   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
146   vg_assert(offset + size <= sizeof(VexGuestArchState));
147   // Copy
148   tst = & VG_(threads)[tid];
149   src = NULL;
150   switch (shadowNo) {
151      case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
152      case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
153      case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
154   }
155   vg_assert(src != NULL);
156   VG_(memcpy)( dst, src, size);
157}
158
159void
160VG_(set_shadow_regs_area) ( ThreadId tid,
161                            /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
162                            /*SRC*/const UChar* src )
163{
164   void*        dst;
165   ThreadState* tst;
166   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
167   vg_assert(VG_(is_valid_tid)(tid));
168   // Bounds check
169   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
170   vg_assert(offset + size <= sizeof(VexGuestArchState));
171   // Copy
172   tst = & VG_(threads)[tid];
173   dst = NULL;
174   switch (shadowNo) {
175      case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
176      case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
177      case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
178   }
179   vg_assert(dst != NULL);
180   VG_(memcpy)( dst, src, size);
181}
182
183
184static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
185                                                        const HChar*, Addr))
186{
187   VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
188   VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %d\n", tid);
189#if defined(VGA_x86)
190   (*f)(tid, "EAX", vex->guest_EAX);
191   (*f)(tid, "ECX", vex->guest_ECX);
192   (*f)(tid, "EDX", vex->guest_EDX);
193   (*f)(tid, "EBX", vex->guest_EBX);
194   (*f)(tid, "ESI", vex->guest_ESI);
195   (*f)(tid, "EDI", vex->guest_EDI);
196   (*f)(tid, "ESP", vex->guest_ESP);
197   (*f)(tid, "EBP", vex->guest_EBP);
198#elif defined(VGA_amd64)
199   (*f)(tid, "RAX", vex->guest_RAX);
200   (*f)(tid, "RCX", vex->guest_RCX);
201   (*f)(tid, "RDX", vex->guest_RDX);
202   (*f)(tid, "RBX", vex->guest_RBX);
203   (*f)(tid, "RSI", vex->guest_RSI);
204   (*f)(tid, "RDI", vex->guest_RDI);
205   (*f)(tid, "RSP", vex->guest_RSP);
206   (*f)(tid, "RBP", vex->guest_RBP);
207   (*f)(tid, "R8" , vex->guest_R8 );
208   (*f)(tid, "R9" , vex->guest_R9 );
209   (*f)(tid, "R10", vex->guest_R10);
210   (*f)(tid, "R11", vex->guest_R11);
211   (*f)(tid, "R12", vex->guest_R12);
212   (*f)(tid, "R13", vex->guest_R13);
213   (*f)(tid, "R14", vex->guest_R14);
214   (*f)(tid, "R15", vex->guest_R15);
215#elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
216   (*f)(tid, "GPR0" , vex->guest_GPR0 );
217   (*f)(tid, "GPR1" , vex->guest_GPR1 );
218   (*f)(tid, "GPR2" , vex->guest_GPR2 );
219   (*f)(tid, "GPR3" , vex->guest_GPR3 );
220   (*f)(tid, "GPR4" , vex->guest_GPR4 );
221   (*f)(tid, "GPR5" , vex->guest_GPR5 );
222   (*f)(tid, "GPR6" , vex->guest_GPR6 );
223   (*f)(tid, "GPR7" , vex->guest_GPR7 );
224   (*f)(tid, "GPR8" , vex->guest_GPR8 );
225   (*f)(tid, "GPR9" , vex->guest_GPR9 );
226   (*f)(tid, "GPR10", vex->guest_GPR10);
227   (*f)(tid, "GPR11", vex->guest_GPR11);
228   (*f)(tid, "GPR12", vex->guest_GPR12);
229   (*f)(tid, "GPR13", vex->guest_GPR13);
230   (*f)(tid, "GPR14", vex->guest_GPR14);
231   (*f)(tid, "GPR15", vex->guest_GPR15);
232   (*f)(tid, "GPR16", vex->guest_GPR16);
233   (*f)(tid, "GPR17", vex->guest_GPR17);
234   (*f)(tid, "GPR18", vex->guest_GPR18);
235   (*f)(tid, "GPR19", vex->guest_GPR19);
236   (*f)(tid, "GPR20", vex->guest_GPR20);
237   (*f)(tid, "GPR21", vex->guest_GPR21);
238   (*f)(tid, "GPR22", vex->guest_GPR22);
239   (*f)(tid, "GPR23", vex->guest_GPR23);
240   (*f)(tid, "GPR24", vex->guest_GPR24);
241   (*f)(tid, "GPR25", vex->guest_GPR25);
242   (*f)(tid, "GPR26", vex->guest_GPR26);
243   (*f)(tid, "GPR27", vex->guest_GPR27);
244   (*f)(tid, "GPR28", vex->guest_GPR28);
245   (*f)(tid, "GPR29", vex->guest_GPR29);
246   (*f)(tid, "GPR30", vex->guest_GPR30);
247   (*f)(tid, "GPR31", vex->guest_GPR31);
248   (*f)(tid, "CTR"  , vex->guest_CTR  );
249   (*f)(tid, "LR"   , vex->guest_LR   );
250#elif defined(VGA_arm)
251   (*f)(tid, "R0" , vex->guest_R0 );
252   (*f)(tid, "R1" , vex->guest_R1 );
253   (*f)(tid, "R2" , vex->guest_R2 );
254   (*f)(tid, "R3" , vex->guest_R3 );
255   (*f)(tid, "R4" , vex->guest_R4 );
256   (*f)(tid, "R5" , vex->guest_R5 );
257   (*f)(tid, "R6" , vex->guest_R6 );
258   (*f)(tid, "R8" , vex->guest_R8 );
259   (*f)(tid, "R9" , vex->guest_R9 );
260   (*f)(tid, "R10", vex->guest_R10);
261   (*f)(tid, "R11", vex->guest_R11);
262   (*f)(tid, "R12", vex->guest_R12);
263   (*f)(tid, "R13", vex->guest_R13);
264   (*f)(tid, "R14", vex->guest_R14);
265#elif defined(VGA_s390x)
266   (*f)(tid, "r0" , vex->guest_r0 );
267   (*f)(tid, "r1" , vex->guest_r1 );
268   (*f)(tid, "r2" , vex->guest_r2 );
269   (*f)(tid, "r3" , vex->guest_r3 );
270   (*f)(tid, "r4" , vex->guest_r4 );
271   (*f)(tid, "r5" , vex->guest_r5 );
272   (*f)(tid, "r6" , vex->guest_r6 );
273   (*f)(tid, "r7" , vex->guest_r7 );
274   (*f)(tid, "r8" , vex->guest_r8 );
275   (*f)(tid, "r9" , vex->guest_r9 );
276   (*f)(tid, "r10", vex->guest_r10);
277   (*f)(tid, "r11", vex->guest_r11);
278   (*f)(tid, "r12", vex->guest_r12);
279   (*f)(tid, "r13", vex->guest_r13);
280   (*f)(tid, "r14", vex->guest_r14);
281   (*f)(tid, "r15", vex->guest_r15);
282#elif defined(VGA_mips32) || defined(VGA_mips64)
283   (*f)(tid, "r0" , vex->guest_r0 );
284   (*f)(tid, "r1" , vex->guest_r1 );
285   (*f)(tid, "r2" , vex->guest_r2 );
286   (*f)(tid, "r3" , vex->guest_r3 );
287   (*f)(tid, "r4" , vex->guest_r4 );
288   (*f)(tid, "r5" , vex->guest_r5 );
289   (*f)(tid, "r6" , vex->guest_r6 );
290   (*f)(tid, "r7" , vex->guest_r7 );
291   (*f)(tid, "r8" , vex->guest_r8 );
292   (*f)(tid, "r9" , vex->guest_r9 );
293   (*f)(tid, "r10", vex->guest_r10);
294   (*f)(tid, "r11", vex->guest_r11);
295   (*f)(tid, "r12", vex->guest_r12);
296   (*f)(tid, "r13", vex->guest_r13);
297   (*f)(tid, "r14", vex->guest_r14);
298   (*f)(tid, "r15", vex->guest_r15);
299   (*f)(tid, "r16", vex->guest_r16);
300   (*f)(tid, "r17", vex->guest_r17);
301   (*f)(tid, "r18", vex->guest_r18);
302   (*f)(tid, "r19", vex->guest_r19);
303   (*f)(tid, "r20", vex->guest_r20);
304   (*f)(tid, "r21", vex->guest_r21);
305   (*f)(tid, "r22", vex->guest_r22);
306   (*f)(tid, "r23", vex->guest_r23);
307   (*f)(tid, "r24", vex->guest_r24);
308   (*f)(tid, "r25", vex->guest_r25);
309   (*f)(tid, "r26", vex->guest_r26);
310   (*f)(tid, "r27", vex->guest_r27);
311   (*f)(tid, "r28", vex->guest_r28);
312   (*f)(tid, "r29", vex->guest_r29);
313   (*f)(tid, "r30", vex->guest_r30);
314   (*f)(tid, "r31", vex->guest_r31);
315#elif defined(VGA_arm64)
316   (*f)(tid, "x0" , vex->guest_X0 );
317   (*f)(tid, "x1" , vex->guest_X1 );
318   (*f)(tid, "x2" , vex->guest_X2 );
319   (*f)(tid, "x3" , vex->guest_X3 );
320   (*f)(tid, "x4" , vex->guest_X4 );
321   (*f)(tid, "x5" , vex->guest_X5 );
322   (*f)(tid, "x6" , vex->guest_X6 );
323   (*f)(tid, "x7" , vex->guest_X7 );
324   (*f)(tid, "x8" , vex->guest_X8 );
325   (*f)(tid, "x9" , vex->guest_X9 );
326   (*f)(tid, "x10", vex->guest_X10);
327   (*f)(tid, "x11", vex->guest_X11);
328   (*f)(tid, "x12", vex->guest_X12);
329   (*f)(tid, "x13", vex->guest_X13);
330   (*f)(tid, "x14", vex->guest_X14);
331   (*f)(tid, "x15", vex->guest_X15);
332   (*f)(tid, "x16", vex->guest_X16);
333   (*f)(tid, "x17", vex->guest_X17);
334   (*f)(tid, "x18", vex->guest_X18);
335   (*f)(tid, "x19", vex->guest_X19);
336   (*f)(tid, "x20", vex->guest_X20);
337   (*f)(tid, "x21", vex->guest_X21);
338   (*f)(tid, "x22", vex->guest_X22);
339   (*f)(tid, "x23", vex->guest_X23);
340   (*f)(tid, "x24", vex->guest_X24);
341   (*f)(tid, "x25", vex->guest_X25);
342   (*f)(tid, "x26", vex->guest_X26);
343   (*f)(tid, "x27", vex->guest_X27);
344   (*f)(tid, "x28", vex->guest_X28);
345   (*f)(tid, "x29", vex->guest_X29);
346   (*f)(tid, "x30", vex->guest_X30);
347#else
348#  error Unknown arch
349#endif
350}
351
352
353void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
354{
355   ThreadId tid;
356
357   for (tid = 1; tid < VG_N_THREADS; tid++) {
358      if (VG_(is_valid_tid)(tid)
359          || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
360         // live thread or thread instructed to die by another thread that
361         // called exit.
362         apply_to_GPs_of_tid(tid, f);
363      }
364   }
365}
366
367void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
368{
369   *tid = (ThreadId)(-1);
370}
371
372Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
373                            /*OUT*/Addr* stack_min,
374                            /*OUT*/Addr* stack_max)
375{
376   ThreadId i;
377   for (i = (*tid)+1; i < VG_N_THREADS; i++) {
378      if (i == VG_INVALID_THREADID)
379         continue;
380      if (VG_(threads)[i].status != VgTs_Empty) {
381         *tid       = i;
382         *stack_min = VG_(get_SP)(i);
383         *stack_max = VG_(threads)[i].client_stack_highest_byte;
384         return True;
385      }
386   }
387   return False;
388}
389
390Addr VG_(thread_get_stack_max)(ThreadId tid)
391{
392   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
393   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
394   return VG_(threads)[tid].client_stack_highest_byte;
395}
396
397SizeT VG_(thread_get_stack_size)(ThreadId tid)
398{
399   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
400   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
401   return VG_(threads)[tid].client_stack_szB;
402}
403
404Addr VG_(thread_get_altstack_min)(ThreadId tid)
405{
406   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
407   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
408   return (Addr)VG_(threads)[tid].altstack.ss_sp;
409}
410
411SizeT VG_(thread_get_altstack_size)(ThreadId tid)
412{
413   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
414   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
415   return VG_(threads)[tid].altstack.ss_size;
416}
417
418//-------------------------------------------------------------
419/* Details about the capabilities of the underlying (host) CPU.  These
420   details are acquired by (1) enquiring with the CPU at startup, or
421   (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
422   line size).  It's a bit nasty in the sense that there's no obvious
423   way to stop uses of some of this info before it's ready to go.
424   See pub_core_machine.h for more information about that.
425
426   VG_(machine_get_hwcaps) may use signals (although it attempts to
427   leave signal state unchanged) and therefore should only be
428   called before m_main sets up the client's signal state.
429*/
430
431/* --------- State --------- */
432static Bool hwcaps_done = False;
433
434/* --- all archs --- */
435static VexArch     va = VexArch_INVALID;
436static VexArchInfo vai;
437
438#if defined(VGA_x86)
439UInt VG_(machine_x86_have_mxcsr) = 0;
440#endif
441#if defined(VGA_ppc32)
442UInt VG_(machine_ppc32_has_FP)  = 0;
443UInt VG_(machine_ppc32_has_VMX) = 0;
444#endif
445#if defined(VGA_ppc64be) || defined(VGA_ppc64le)
446ULong VG_(machine_ppc64_has_VMX) = 0;
447#endif
448#if defined(VGA_arm)
449Int VG_(machine_arm_archlevel) = 4;
450#endif
451
452
453/* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
454   testing, so we need a VG_MINIMAL_JMP_BUF. */
455#if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
456    || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
457#include "pub_core_libcsetjmp.h"
458static VG_MINIMAL_JMP_BUF(env_unsup_insn);
459static void handler_unsup_insn ( Int x ) {
460   VG_MINIMAL_LONGJMP(env_unsup_insn);
461}
462#endif
463
464
465/* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
466 * handlers are installed.  Determines the the sizes affected by dcbz
467 * and dcbzl instructions and updates the given VexArchInfo structure
468 * accordingly.
469 *
470 * Not very defensive: assumes that as long as the dcbz/dcbzl
471 * instructions don't raise a SIGILL, that they will zero an aligned,
472 * contiguous block of memory of a sensible size. */
473#if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
474static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
475{
476   Int dcbz_szB = 0;
477   Int dcbzl_szB;
478#  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
479   char test_block[4*MAX_DCBZL_SZB];
480   char *aligned = test_block;
481   Int i;
482
483   /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
484   aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
485   vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
486
487   /* dcbz often clears 32B, although sometimes whatever the native cache
488    * block size is */
489   VG_(memset)(test_block, 0xff, sizeof(test_block));
490   __asm__ __volatile__("dcbz 0,%0"
491                        : /*out*/
492                        : "r" (aligned) /*in*/
493                        : "memory" /*clobber*/);
494   for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
495      if (!test_block[i])
496         ++dcbz_szB;
497   }
498   vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
499
500   /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
501   if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
502      dcbzl_szB = 0; /* indicates unsupported */
503   }
504   else {
505      VG_(memset)(test_block, 0xff, sizeof(test_block));
506      /* some older assemblers won't understand the dcbzl instruction
507       * variant, so we directly emit the instruction ourselves */
508      __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
509                           : /*out*/
510                           : "r" (aligned) /*in*/
511                           : "memory", "r9" /*clobber*/);
512      for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
513         if (!test_block[i])
514            ++dcbzl_szB;
515      }
516      vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
517   }
518
519   arch_info->ppc_dcbz_szB  = dcbz_szB;
520   arch_info->ppc_dcbzl_szB = dcbzl_szB;
521
522   VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
523                 dcbz_szB, dcbzl_szB);
524#  undef MAX_DCBZL_SZB
525}
526#endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */
527
528#ifdef VGA_s390x
529
530/* Read /proc/cpuinfo. Look for lines like these
531
532   processor 0: version = FF,  identification = 0117C9,  machine = 2064
533
534   and return the machine model. If the machine model could not be determined
535   or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
536
537static UInt VG_(get_machine_model)(void)
538{
539   static struct model_map {
540      const HChar name[5];
541      UInt  id;
542   } model_map[] = {
543      { "2064", VEX_S390X_MODEL_Z900 },
544      { "2066", VEX_S390X_MODEL_Z800 },
545      { "2084", VEX_S390X_MODEL_Z990 },
546      { "2086", VEX_S390X_MODEL_Z890 },
547      { "2094", VEX_S390X_MODEL_Z9_EC },
548      { "2096", VEX_S390X_MODEL_Z9_BC },
549      { "2097", VEX_S390X_MODEL_Z10_EC },
550      { "2098", VEX_S390X_MODEL_Z10_BC },
551      { "2817", VEX_S390X_MODEL_Z196 },
552      { "2818", VEX_S390X_MODEL_Z114 },
553      { "2827", VEX_S390X_MODEL_ZEC12 },
554      { "2828", VEX_S390X_MODEL_ZBC12 },
555   };
556
557   Int    model, n, fh;
558   SysRes fd;
559   SizeT  num_bytes, file_buf_size;
560   HChar *p, *m, *model_name, *file_buf;
561
562   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
563   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
564   if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
565
566   fh  = sr_Res(fd);
567
568   /* Determine the size of /proc/cpuinfo.
569      Work around broken-ness in /proc file system implementation.
570      fstat returns a zero size for /proc/cpuinfo although it is
571      claimed to be a regular file. */
572   num_bytes = 0;
573   file_buf_size = 1000;
574   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
575   while (42) {
576      n = VG_(read)(fh, file_buf, file_buf_size);
577      if (n < 0) break;
578
579      num_bytes += n;
580      if (n < file_buf_size) break;  /* reached EOF */
581   }
582
583   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
584
585   if (num_bytes > file_buf_size) {
586      VG_(free)( file_buf );
587      VG_(lseek)( fh, 0, VKI_SEEK_SET );
588      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
589      n = VG_(read)( fh, file_buf, num_bytes );
590      if (n < 0) num_bytes = 0;
591   }
592
593   file_buf[num_bytes] = '\0';
594   VG_(close)(fh);
595
596   /* Parse file */
597   model = VEX_S390X_MODEL_UNKNOWN;
598   for (p = file_buf; *p; ++p) {
599      /* Beginning of line */
600     if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
601
602     m = VG_(strstr)( p, "machine" );
603     if (m == NULL) continue;
604
605     p = m + sizeof "machine" - 1;
606     while ( VG_(isspace)( *p ) || *p == '=') {
607       if (*p == '\n') goto next_line;
608       ++p;
609     }
610
611     model_name = p;
612     for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
613       struct model_map *mm = model_map + n;
614       SizeT len = VG_(strlen)( mm->name );
615       if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
616            VG_(isspace)( model_name[len] )) {
617         if (mm->id < model) model = mm->id;
618         p = model_name + len;
619         break;
620       }
621     }
622     /* Skip until end-of-line */
623     while (*p != '\n')
624       ++p;
625   next_line: ;
626   }
627
628   VG_(free)( file_buf );
629   VG_(debugLog)(1, "machine", "model = %s\n",
630                 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
631                                                  : model_map[model].name);
632   return model;
633}
634
635#endif /* VGA_s390x */
636
637#if defined(VGA_mips32) || defined(VGA_mips64)
638
639/* Read /proc/cpuinfo and return the machine model. */
640static UInt VG_(get_machine_model)(void)
641{
642   const char *search_MIPS_str = "MIPS";
643   const char *search_Broadcom_str = "Broadcom";
644   const char *search_Netlogic_str = "Netlogic";
645   const char *search_Cavium_str= "Cavium";
646   Int    n, fh;
647   SysRes fd;
648   SizeT  num_bytes, file_buf_size;
649   HChar  *file_buf;
650
651   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
652   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
653   if ( sr_isError(fd) ) return -1;
654
655   fh  = sr_Res(fd);
656
657   /* Determine the size of /proc/cpuinfo.
658      Work around broken-ness in /proc file system implementation.
659      fstat returns a zero size for /proc/cpuinfo although it is
660      claimed to be a regular file. */
661   num_bytes = 0;
662   file_buf_size = 1000;
663   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
664   while (42) {
665      n = VG_(read)(fh, file_buf, file_buf_size);
666      if (n < 0) break;
667
668      num_bytes += n;
669      if (n < file_buf_size) break;  /* reached EOF */
670   }
671
672   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
673
674   if (num_bytes > file_buf_size) {
675      VG_(free)( file_buf );
676      VG_(lseek)( fh, 0, VKI_SEEK_SET );
677      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
678      n = VG_(read)( fh, file_buf, num_bytes );
679      if (n < 0) num_bytes = 0;
680   }
681
682   file_buf[num_bytes] = '\0';
683   VG_(close)(fh);
684
685   /* Parse file */
686   if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
687       return VEX_PRID_COMP_BROADCOM;
688   if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL)
689       return VEX_PRID_COMP_NETLOGIC;
690   if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
691       return VEX_PRID_COMP_CAVIUM;
692   if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
693       return VEX_PRID_COMP_MIPS;
694
695   /* Did not find string in the proc file. */
696   return -1;
697}
698
699#endif
700
701/* Determine what insn set and insn set variant the host has, and
702   record it.  To be called once at system startup.  Returns False if
703   this a CPU incapable of running Valgrind.
704   Also determine information about the caches on this host. */
705
706Bool VG_(machine_get_hwcaps)( void )
707{
708   vg_assert(hwcaps_done == False);
709   hwcaps_done = True;
710
711   // Whack default settings into vai, so that we only need to fill in
712   // any interesting bits.
713   LibVEX_default_VexArchInfo(&vai);
714
715#if defined(VGA_x86)
716   { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext;
717     UInt eax, ebx, ecx, edx, max_extended;
718     HChar vstr[13];
719     vstr[0] = 0;
720
721     if (!VG_(has_cpuid)())
722        /* we can't do cpuid at all.  Give up. */
723        return False;
724
725     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
726     if (eax < 1)
727        /* we can't ask for cpuid(x) for x > 0.  Give up. */
728        return False;
729
730     /* Get processor ID string, and max basic/extended index
731        values. */
732     VG_(memcpy)(&vstr[0], &ebx, 4);
733     VG_(memcpy)(&vstr[4], &edx, 4);
734     VG_(memcpy)(&vstr[8], &ecx, 4);
735     vstr[12] = 0;
736
737     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
738     max_extended = eax;
739
740     /* get capabilities bits into edx */
741     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
742
743     have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
744     have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
745     have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
746
747     /* cmpxchg8b is a minimum requirement now; if we don't have it we
748        must simply give up.  But all CPUs since Pentium-I have it, so
749        that doesn't seem like much of a restriction. */
750     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
751     if (!have_cx8)
752        return False;
753
754     /* Figure out if this is an AMD that can do MMXEXT. */
755     have_mmxext = False;
756     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
757         && max_extended >= 0x80000001) {
758        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
759        /* Some older AMD processors support a sse1 subset (Integer SSE). */
760        have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
761     }
762
763     /* Figure out if this is an AMD or Intel that can do LZCNT. */
764     have_lzcnt = False;
765     if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
766          || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
767         && max_extended >= 0x80000001) {
768        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
769        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
770     }
771
772     /* Intel processors don't define the mmxext extension, but since it
773        is just a sse1 subset always define it when we have sse1. */
774     if (have_sse1)
775        have_mmxext = True;
776
777     va = VexArchX86;
778     vai.endness = VexEndnessLE;
779
780     if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) {
781        vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
782        vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
783        vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
784        vai.hwcaps |= VEX_HWCAPS_X86_SSE3;
785        if (have_lzcnt)
786           vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
787        VG_(machine_x86_have_mxcsr) = 1;
788     } else if (have_sse2 && have_sse1 && have_mmxext) {
789        vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
790        vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
791        vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
792        if (have_lzcnt)
793           vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
794        VG_(machine_x86_have_mxcsr) = 1;
795     } else if (have_sse1 && have_mmxext) {
796        vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
797        vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
798        VG_(machine_x86_have_mxcsr) = 1;
799     } else if (have_mmxext) {
800        vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
801        VG_(machine_x86_have_mxcsr) = 0;
802     } else {
803       vai.hwcaps = 0; /*baseline - no sse at all*/
804       VG_(machine_x86_have_mxcsr) = 0;
805     }
806
807     VG_(machine_get_cache_info)(&vai);
808
809     return True;
810   }
811
812#elif defined(VGA_amd64)
813   { Bool have_sse3, have_cx8, have_cx16;
814     Bool have_lzcnt, have_avx, have_bmi, have_avx2;
815     Bool have_rdtscp;
816     UInt eax, ebx, ecx, edx, max_basic, max_extended;
817     HChar vstr[13];
818     vstr[0] = 0;
819
820     if (!VG_(has_cpuid)())
821        /* we can't do cpuid at all.  Give up. */
822        return False;
823
824     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
825     max_basic = eax;
826     if (max_basic < 1)
827        /* we can't ask for cpuid(x) for x > 0.  Give up. */
828        return False;
829
830     /* Get processor ID string, and max basic/extended index
831        values. */
832     VG_(memcpy)(&vstr[0], &ebx, 4);
833     VG_(memcpy)(&vstr[4], &edx, 4);
834     VG_(memcpy)(&vstr[8], &ecx, 4);
835     vstr[12] = 0;
836
837     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
838     max_extended = eax;
839
840     /* get capabilities bits into edx */
841     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
842
843     // we assume that SSE1 and SSE2 are available by default
844     have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
845     // ssse3   is ecx:9
846     // sse41   is ecx:19
847     // sse42   is ecx:20
848
849     // osxsave is ecx:27
850     // avx     is ecx:28
851     // fma     is ecx:12
852     have_avx = False;
853     /* have_fma = False; */
854     if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) {
855        /* processor supports AVX instructions and XGETBV is enabled
856           by OS */
857        ULong w;
858        __asm__ __volatile__("movq $0,%%rcx ; "
859                             ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
860                             "movq %%rax,%0"
861                             :/*OUT*/"=r"(w) :/*IN*/
862                             :/*TRASH*/"rdx","rcx");
863        if ((w & 6) == 6) {
864           /* OS has enabled both XMM and YMM state support */
865           have_avx = True;
866           /* have_fma = (ecx & (1<<12)) != 0; */
867           /* have_fma: Probably correct, but gcc complains due to
868              unusedness. &*/
869        }
870     }
871
872     /* cmpxchg8b is a minimum requirement now; if we don't have it we
873        must simply give up.  But all CPUs since Pentium-I have it, so
874        that doesn't seem like much of a restriction. */
875     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
876     if (!have_cx8)
877        return False;
878
879     /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
880     have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
881
882     /* Figure out if this CPU can do LZCNT. */
883     have_lzcnt = False;
884     if (max_extended >= 0x80000001) {
885        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
886        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
887     }
888
889     /* Can we do RDTSCP? */
890     have_rdtscp = False;
891     if (max_extended >= 0x80000001) {
892        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
893        have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
894     }
895
896     /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
897     have_bmi = False;
898     have_avx2 = False;
899     if (have_avx && max_basic >= 7) {
900        VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
901        have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
902        have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
903     }
904
905     va          = VexArchAMD64;
906     vai.endness = VexEndnessLE;
907     vai.hwcaps  = (have_sse3   ? VEX_HWCAPS_AMD64_SSE3   : 0)
908                 | (have_cx16   ? VEX_HWCAPS_AMD64_CX16   : 0)
909                 | (have_lzcnt  ? VEX_HWCAPS_AMD64_LZCNT  : 0)
910                 | (have_avx    ? VEX_HWCAPS_AMD64_AVX    : 0)
911                 | (have_bmi    ? VEX_HWCAPS_AMD64_BMI    : 0)
912                 | (have_avx2   ? VEX_HWCAPS_AMD64_AVX2   : 0)
913                 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
914
915     VG_(machine_get_cache_info)(&vai);
916
917     return True;
918   }
919
920#elif defined(VGA_ppc32)
921   {
922     /* Find out which subset of the ppc32 instruction set is supported by
923        verifying whether various ppc32 instructions generate a SIGILL
924        or a SIGFPE. An alternative approach is to check the AT_HWCAP and
925        AT_PLATFORM entries in the ELF auxiliary table -- see also
926        the_iifii.client_auxv in m_main.c.
927      */
928     vki_sigset_t          saved_set, tmp_set;
929     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
930     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
931
932     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
933     volatile Bool have_isa_2_07;
934     Int r;
935
936     /* This is a kludge.  Really we ought to back-convert saved_act
937        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
938        since that's a no-op on all ppc32 platforms so far supported,
939        it's not worth the typing effort.  At least include most basic
940        sanity check: */
941     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
942
943     VG_(sigemptyset)(&tmp_set);
944     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
945     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
946
947     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
948     vg_assert(r == 0);
949
950     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
951     vg_assert(r == 0);
952     tmp_sigill_act = saved_sigill_act;
953
954     r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
955     vg_assert(r == 0);
956     tmp_sigfpe_act = saved_sigfpe_act;
957
958     /* NODEFER: signal handler does not return (from the kernel's point of
959        view), hence if it is to successfully catch a signal more than once,
960        we need the NODEFER flag. */
961     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
962     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
963     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
964     tmp_sigill_act.ksa_handler = handler_unsup_insn;
965     r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
966     vg_assert(r == 0);
967
968     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
969     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
970     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
971     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
972     r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
973     vg_assert(r == 0);
974
975     /* standard FP insns */
976     have_F = True;
977     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
978        have_F = False;
979     } else {
980        __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
981     }
982
983     /* Altivec insns */
984     have_V = True;
985     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
986        have_V = False;
987     } else {
988        /* Unfortunately some older assemblers don't speak Altivec (or
989           choose not to), so to be safe we directly emit the 32-bit
990           word corresponding to "vor 0,0,0".  This fixes a build
991           problem that happens on Debian 3.1 (ppc32), and probably
992           various other places. */
993        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
994     }
995
996     /* General-Purpose optional (fsqrt, fsqrts) */
997     have_FX = True;
998     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
999        have_FX = False;
1000     } else {
1001        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
1002     }
1003
1004     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1005     have_GX = True;
1006     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1007        have_GX = False;
1008     } else {
1009        __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
1010     }
1011
1012     /* VSX support implies Power ISA 2.06 */
1013     have_VX = True;
1014     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1015        have_VX = False;
1016     } else {
1017        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1018     }
1019
1020     /* Check for Decimal Floating Point (DFP) support. */
1021     have_DFP = True;
1022     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1023        have_DFP = False;
1024     } else {
1025        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1026     }
1027
1028     /* Check for ISA 2.07 support. */
1029     have_isa_2_07 = True;
1030     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1031        have_isa_2_07 = False;
1032     } else {
1033        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1034     }
1035
1036     /* determine dcbz/dcbzl sizes while we still have the signal
1037      * handlers registered */
1038     find_ppc_dcbz_sz(&vai);
1039
1040     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1041     vg_assert(r == 0);
1042     r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1043     vg_assert(r == 0);
1044     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1045     vg_assert(r == 0);
1046     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1047                    (Int)have_F, (Int)have_V, (Int)have_FX,
1048                    (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1049                    (Int)have_isa_2_07);
1050     /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1051     if (have_V && !have_F)
1052        have_V = False;
1053     if (have_FX && !have_F)
1054        have_FX = False;
1055     if (have_GX && !have_F)
1056        have_GX = False;
1057
1058     VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
1059     VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1060
1061     va = VexArchPPC32;
1062     vai.endness = VexEndnessBE;
1063
1064     vai.hwcaps = 0;
1065     if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1066     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1067     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1068     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1069     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1070     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1071     if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1072
1073     VG_(machine_get_cache_info)(&vai);
1074
1075     /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1076        called before we're ready to go. */
1077     return True;
1078   }
1079
1080#elif defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1081   {
1082     /* Same instruction set detection algorithm as for ppc32. */
1083     vki_sigset_t          saved_set, tmp_set;
1084     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1085     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1086
1087     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1088     volatile Bool have_isa_2_07;
1089     Int r;
1090
1091     /* This is a kludge.  Really we ought to back-convert saved_act
1092        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1093        since that's a no-op on all ppc64 platforms so far supported,
1094        it's not worth the typing effort.  At least include most basic
1095        sanity check: */
1096     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1097
1098     VG_(sigemptyset)(&tmp_set);
1099     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1100     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1101
1102     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1103     vg_assert(r == 0);
1104
1105     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1106     vg_assert(r == 0);
1107     tmp_sigill_act = saved_sigill_act;
1108
1109     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1110     tmp_sigfpe_act = saved_sigfpe_act;
1111
1112     /* NODEFER: signal handler does not return (from the kernel's point of
1113        view), hence if it is to successfully catch a signal more than once,
1114        we need the NODEFER flag. */
1115     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1116     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1117     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1118     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1119     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1120
1121     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1122     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1123     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1124     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1125     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1126
1127     /* standard FP insns */
1128     have_F = True;
1129     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1130        have_F = False;
1131     } else {
1132        __asm__ __volatile__("fmr 0,0");
1133     }
1134
1135     /* Altivec insns */
1136     have_V = True;
1137     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1138        have_V = False;
1139     } else {
1140        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1141     }
1142
1143     /* General-Purpose optional (fsqrt, fsqrts) */
1144     have_FX = True;
1145     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1146        have_FX = False;
1147     } else {
1148        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1149     }
1150
1151     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1152     have_GX = True;
1153     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1154        have_GX = False;
1155     } else {
1156        __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1157     }
1158
1159     /* VSX support implies Power ISA 2.06 */
1160     have_VX = True;
1161     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1162        have_VX = False;
1163     } else {
1164        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1165     }
1166
1167     /* Check for Decimal Floating Point (DFP) support. */
1168     have_DFP = True;
1169     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1170        have_DFP = False;
1171     } else {
1172        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1173     }
1174
1175     /* Check for ISA 2.07 support. */
1176     have_isa_2_07 = True;
1177     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1178        have_isa_2_07 = False;
1179     } else {
1180        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1181     }
1182
1183     /* determine dcbz/dcbzl sizes while we still have the signal
1184      * handlers registered */
1185     find_ppc_dcbz_sz(&vai);
1186
1187     VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1188     VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1189     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1190     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1191                    (Int)have_F, (Int)have_V, (Int)have_FX,
1192                    (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1193                    (Int)have_isa_2_07);
1194     /* on ppc64be, if we don't even have FP, just give up. */
1195     if (!have_F)
1196        return False;
1197
1198     VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1199
1200     va = VexArchPPC64;
1201#    if defined(VKI_LITTLE_ENDIAN)
1202     vai.endness = VexEndnessLE;
1203#    elif defined(VKI_BIG_ENDIAN)
1204     vai.endness = VexEndnessBE;
1205#    else
1206     vai.endness = VexEndness_INVALID;
1207#    endif
1208
1209     vai.hwcaps = 0;
1210     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1211     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1212     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1213     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1214     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1215     if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1216
1217     VG_(machine_get_cache_info)(&vai);
1218
1219     /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1220        called before we're ready to go. */
1221     return True;
1222   }
1223
1224#elif defined(VGA_s390x)
1225
1226#  include "libvex_s390x_common.h"
1227
1228   {
1229     /* Instruction set detection code borrowed from ppc above. */
1230     vki_sigset_t          saved_set, tmp_set;
1231     vki_sigaction_fromK_t saved_sigill_act;
1232     vki_sigaction_toK_t     tmp_sigill_act;
1233
1234     volatile Bool have_LDISP, have_STFLE;
1235     Int i, r, model;
1236
1237     /* If the model is "unknown" don't treat this as an error. Assume
1238        this is a brand-new machine model for which we don't have the
1239        identification yet. Keeping fingers crossed. */
1240     model = VG_(get_machine_model)();
1241
1242     /* Unblock SIGILL and stash away the old action for that signal */
1243     VG_(sigemptyset)(&tmp_set);
1244     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1245
1246     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1247     vg_assert(r == 0);
1248
1249     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1250     vg_assert(r == 0);
1251     tmp_sigill_act = saved_sigill_act;
1252
1253     /* NODEFER: signal handler does not return (from the kernel's point of
1254        view), hence if it is to successfully catch a signal more than once,
1255        we need the NODEFER flag. */
1256     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1257     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1258     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1259     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1260     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1261
1262     /* Determine hwcaps. Note, we cannot use the stfle insn because it
1263        is not supported on z900. */
1264
1265     have_LDISP = True;
1266     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1267        have_LDISP = False;
1268     } else {
1269       /* BASR loads the address of the next insn into r1. Needed to avoid
1270          a segfault in XY. */
1271        __asm__ __volatile__("basr %%r1,%%r0\n\t"
1272                             ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
1273                             ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1274     }
1275
1276     /* Check availability of STFLE. If available store facility bits
1277        in hoststfle. */
1278     ULong hoststfle[S390_NUM_FACILITY_DW];
1279
1280     for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1281        hoststfle[i] = 0;
1282
1283     have_STFLE = True;
1284     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1285        have_STFLE = False;
1286     } else {
1287         register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1288
1289         __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
1290                              : "=m" (hoststfle), "+d"(reg0)
1291                              : : "cc", "memory");
1292     }
1293
1294     /* Restore signals */
1295     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1296     vg_assert(r == 0);
1297     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1298     vg_assert(r == 0);
1299     va = VexArchS390X;
1300     vai.endness = VexEndnessBE;
1301
1302     vai.hwcaps = model;
1303     if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1304     if (have_LDISP) {
1305        /* Use long displacement only on machines >= z990. For all other
1306           machines it is millicoded and therefore slow. */
1307        if (model >= VEX_S390X_MODEL_Z990)
1308           vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1309     }
1310
1311     /* Detect presence of certain facilities using the STFLE insn.
1312        Note, that these facilities were introduced at the same time or later
1313        as STFLE, so the absence of STLFE implies the absence of the facility
1314        we're trying to detect. */
1315     struct fac_hwcaps_map {
1316        UInt installed;
1317        UInt facility_bit;
1318        UInt hwcaps_bit;
1319        const HChar name[6];   // may need adjustment for new facility names
1320     } fac_hwcaps[] = {
1321        { False, S390_FAC_EIMM,  VEX_HWCAPS_S390X_EIMM,  "EIMM"  },
1322        { False, S390_FAC_GIE,   VEX_HWCAPS_S390X_GIE,   "GIE"   },
1323        { False, S390_FAC_DFP,   VEX_HWCAPS_S390X_DFP,   "DFP"   },
1324        { False, S390_FAC_FPSE,  VEX_HWCAPS_S390X_FGX,   "FGX"   },
1325        { False, S390_FAC_ETF2,  VEX_HWCAPS_S390X_ETF2,  "ETF2"  },
1326        { False, S390_FAC_ETF3,  VEX_HWCAPS_S390X_ETF3,  "ETF3"  },
1327        { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1328        { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1329        { False, S390_FAC_LSC,   VEX_HWCAPS_S390X_LSC,   "LSC"   },
1330        { False, S390_FAC_PFPO,  VEX_HWCAPS_S390X_PFPO,  "PFPO"  },
1331     };
1332
1333     /* Set hwcaps according to the detected facilities */
1334     for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1335        vg_assert(fac_hwcaps[i].facility_bit <= 63);  // for now
1336        if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
1337           fac_hwcaps[i].installed = True;
1338           vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1339        }
1340     }
1341
1342     /* Build up a string showing the probed-for facilities */
1343     HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1344                   (sizeof fac_hwcaps[0].name + 3) + //  %s %d
1345                   7 + 1 + 4 + 2  // machine %4d
1346                   + 1];  // \0
1347     HChar *p = fac_str;
1348     p += VG_(sprintf)(p, "machine %4d  ", model);
1349     for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1350        p += VG_(sprintf)(p, " %s %1d", fac_hwcaps[i].name,
1351                          fac_hwcaps[i].installed);
1352     }
1353     *p++ = '\0';
1354
1355     VG_(debugLog)(1, "machine", "%s\n", fac_str);
1356     VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1357
1358     VG_(machine_get_cache_info)(&vai);
1359
1360     return True;
1361   }
1362
1363#elif defined(VGA_arm)
1364   {
1365     /* Same instruction set detection algorithm as for ppc32. */
1366     vki_sigset_t          saved_set, tmp_set;
1367     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1368     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1369
1370     volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1371     volatile Int archlevel;
1372     Int r;
1373
1374     /* This is a kludge.  Really we ought to back-convert saved_act
1375        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1376        since that's a no-op on all ppc64 platforms so far supported,
1377        it's not worth the typing effort.  At least include most basic
1378        sanity check: */
1379     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1380
1381     VG_(sigemptyset)(&tmp_set);
1382     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1383     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1384
1385     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1386     vg_assert(r == 0);
1387
1388     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1389     vg_assert(r == 0);
1390     tmp_sigill_act = saved_sigill_act;
1391
1392     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1393     tmp_sigfpe_act = saved_sigfpe_act;
1394
1395     /* NODEFER: signal handler does not return (from the kernel's point of
1396        view), hence if it is to successfully catch a signal more than once,
1397        we need the NODEFER flag. */
1398     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1399     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1400     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1401     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1402     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1403
1404     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1405     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1406     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1407     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1408     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1409
1410     /* VFP insns */
1411     have_VFP = True;
1412     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1413        have_VFP = False;
1414     } else {
1415        __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1416     }
1417     /* There are several generation of VFP extension but they differs very
1418        little so for now we will not distinguish them. */
1419     have_VFP2 = have_VFP;
1420     have_VFP3 = have_VFP;
1421
1422     /* NEON insns */
1423     have_NEON = True;
1424     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1425        have_NEON = False;
1426     } else {
1427        __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1428     }
1429
1430     /* ARM architecture level */
1431     archlevel = 5; /* v5 will be base level */
1432     if (archlevel < 7) {
1433        archlevel = 7;
1434        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1435           archlevel = 5;
1436        } else {
1437           __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1438        }
1439     }
1440     if (archlevel < 6) {
1441        archlevel = 6;
1442        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1443           archlevel = 5;
1444        } else {
1445           __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1446        }
1447     }
1448
1449     VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1450     VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1451     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1452     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1453     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1454
1455     VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1456           archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1457           (Int)have_NEON);
1458
1459     VG_(machine_arm_archlevel) = archlevel;
1460
1461     va = VexArchARM;
1462     vai.endness = VexEndnessLE;
1463
1464     vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1465     if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1466     if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1467     if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1468     if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1469
1470     VG_(machine_get_cache_info)(&vai);
1471
1472     return True;
1473   }
1474
1475#elif defined(VGA_arm64)
1476   {
1477     va = VexArchARM64;
1478     vai.endness = VexEndnessLE;
1479
1480     /* So far there are no variants. */
1481     vai.hwcaps = 0;
1482
1483     VG_(machine_get_cache_info)(&vai);
1484
1485     /* 0 denotes 'not set'.  The range of legitimate values here,
1486        after being set that is, is 2 though 17 inclusive. */
1487     vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1488     vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1489     ULong ctr_el0;
1490     __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1491     vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1492     vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >>  0) & 0xF) + 2;
1493     VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1494                      "ctr_el0.iMinLine_szB = %d\n",
1495                   1 << vai.arm64_dMinLine_lg2_szB,
1496                   1 << vai.arm64_iMinLine_lg2_szB);
1497
1498     return True;
1499   }
1500
1501#elif defined(VGA_mips32)
1502   {
1503     /* Define the position of F64 bit in FIR register. */
1504#    define FP64 22
1505     va = VexArchMIPS32;
1506     UInt model = VG_(get_machine_model)();
1507     if (model == -1)
1508         return False;
1509
1510     vai.hwcaps = model;
1511
1512#    if defined(VKI_LITTLE_ENDIAN)
1513     vai.endness = VexEndnessLE;
1514#    elif defined(VKI_BIG_ENDIAN)
1515     vai.endness = VexEndnessBE;
1516#    else
1517     vai.endness = VexEndness_INVALID;
1518#    endif
1519
1520     /* Same instruction set detection algorithm as for ppc32/arm... */
1521     vki_sigset_t          saved_set, tmp_set;
1522     vki_sigaction_fromK_t saved_sigill_act;
1523     vki_sigaction_toK_t   tmp_sigill_act;
1524
1525     volatile Bool have_DSP, have_DSPr2;
1526     Int r;
1527
1528     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1529
1530     VG_(sigemptyset)(&tmp_set);
1531     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1532
1533     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1534     vg_assert(r == 0);
1535
1536     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1537     vg_assert(r == 0);
1538     tmp_sigill_act = saved_sigill_act;
1539
1540     /* NODEFER: signal handler does not return (from the kernel's point of
1541        view), hence if it is to successfully catch a signal more than once,
1542        we need the NODEFER flag. */
1543     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1544     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1545     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1546     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1547     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1548
1549     if (model == VEX_PRID_COMP_MIPS) {
1550        /* DSPr2 instructions. */
1551        have_DSPr2 = True;
1552        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1553           have_DSPr2 = False;
1554        } else {
1555           __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1556        }
1557        if (have_DSPr2) {
1558           /* We assume it's 74K, since it can run DSPr2. */
1559           vai.hwcaps |= VEX_PRID_IMP_74K;
1560        } else {
1561           /* DSP instructions. */
1562           have_DSP = True;
1563           if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1564              have_DSP = False;
1565           } else {
1566              __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1567           }
1568           if (have_DSP) {
1569              /* We assume it's 34K, since it has support for DSP. */
1570              vai.hwcaps |= VEX_PRID_IMP_34K;
1571           }
1572        }
1573     }
1574
1575     /* Check if CPU has FPU and 32 dbl. prec. FP registers */
1576     int FIR = 0;
1577     __asm__ __volatile__(
1578        "cfc1 %0, $0"  "\n\t"
1579        : "=r" (FIR)
1580     );
1581     if (FIR & (1 << FP64)) {
1582        vai.hwcaps |= VEX_PRID_CPU_32FPR;
1583     }
1584
1585     VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1586     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1587     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1588
1589     VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1590     VG_(machine_get_cache_info)(&vai);
1591
1592     return True;
1593   }
1594
1595#elif defined(VGA_mips64)
1596   {
1597     va = VexArchMIPS64;
1598     UInt model = VG_(get_machine_model)();
1599     if (model == -1)
1600         return False;
1601
1602     vai.hwcaps = model;
1603
1604#    if defined(VKI_LITTLE_ENDIAN)
1605     vai.endness = VexEndnessLE;
1606#    elif defined(VKI_BIG_ENDIAN)
1607     vai.endness = VexEndnessBE;
1608#    else
1609     vai.endness = VexEndness_INVALID;
1610#    endif
1611
1612     VG_(machine_get_cache_info)(&vai);
1613
1614     return True;
1615   }
1616
1617#else
1618#  error "Unknown arch"
1619#endif
1620}
1621
1622/* Notify host cpu instruction cache line size. */
1623#if defined(VGA_ppc32)
1624void VG_(machine_ppc32_set_clszB)( Int szB )
1625{
1626   vg_assert(hwcaps_done);
1627
1628   /* Either the value must not have been set yet (zero) or we can
1629      tolerate it being set to the same value multiple times, as the
1630      stack scanning logic in m_main is a bit stupid. */
1631   vg_assert(vai.ppc_icache_line_szB == 0
1632             || vai.ppc_icache_line_szB == szB);
1633
1634   vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1635   vai.ppc_icache_line_szB = szB;
1636}
1637#endif
1638
1639
1640/* Notify host cpu instruction cache line size. */
1641#if defined(VGA_ppc64be)|| defined(VGA_ppc64le)
1642void VG_(machine_ppc64_set_clszB)( Int szB )
1643{
1644   vg_assert(hwcaps_done);
1645
1646   /* Either the value must not have been set yet (zero) or we can
1647      tolerate it being set to the same value multiple times, as the
1648      stack scanning logic in m_main is a bit stupid. */
1649   vg_assert(vai.ppc_icache_line_szB == 0
1650             || vai.ppc_icache_line_szB == szB);
1651
1652   vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1653   vai.ppc_icache_line_szB = szB;
1654}
1655#endif
1656
1657
1658/* Notify host's ability to handle NEON instructions. */
1659#if defined(VGA_arm)
1660void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1661{
1662   vg_assert(hwcaps_done);
1663   /* There's nothing else we can sanity check. */
1664
1665   if (has_neon) {
1666      vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1667   } else {
1668      vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1669   }
1670}
1671#endif
1672
1673
1674/* Fetch host cpu info, once established. */
1675void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1676                                   /*OUT*/VexArchInfo* pVai )
1677{
1678   vg_assert(hwcaps_done);
1679   if (pVa)  *pVa  = va;
1680   if (pVai) *pVai = vai;
1681}
1682
1683
1684/* Returns the size of the largest guest register that we will
1685   simulate in this run.  This depends on both the guest architecture
1686   and on the specific capabilities we are simulating for that guest
1687   (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
1688   or 32.  General rule: if in doubt, return a value larger than
1689   reality.
1690
1691   This information is needed by Cachegrind and Callgrind to decide
1692   what the minimum cache line size they are prepared to simulate is.
1693   Basically require that the minimum cache line size is at least as
1694   large as the largest register that might get transferred to/from
1695   memory, so as to guarantee that any such transaction can straddle
1696   at most 2 cache lines.
1697*/
1698Int VG_(machine_get_size_of_largest_guest_register) ( void )
1699{
1700   vg_assert(hwcaps_done);
1701   /* Once hwcaps_done is True, we can fish around inside va/vai to
1702      find the information we need. */
1703
1704#  if defined(VGA_x86)
1705   vg_assert(va == VexArchX86);
1706   /* We don't support AVX, so 32 is out.  At the other end, even if
1707      we don't support any SSE, the X87 can generate 10 byte
1708      transfers, so let's say 16 to be on the safe side.  Hence the
1709      answer is always 16. */
1710   return 16;
1711
1712#  elif defined(VGA_amd64)
1713   /* if AVX then 32 else 16 */
1714   return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1715
1716#  elif defined(VGA_ppc32)
1717   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1718   if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1719   if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1720   if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1721   return 8;
1722
1723#  elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
1724   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1725   if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1726   if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1727   if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1728   return 8;
1729
1730#  elif defined(VGA_s390x)
1731   return 8;
1732
1733#  elif defined(VGA_arm)
1734   /* Really it depends whether or not we have NEON, but let's just
1735      assume we always do. */
1736   return 16;
1737
1738#  elif defined(VGA_arm64)
1739   /* ARM64 always has Neon, AFAICS. */
1740   return 16;
1741
1742#  elif defined(VGA_mips32)
1743   /* The guest state implies 4, but that can't really be true, can
1744      it? */
1745   return 8;
1746
1747#  elif defined(VGA_mips64)
1748   return 8;
1749
1750#  else
1751#    error "Unknown arch"
1752#  endif
1753}
1754
1755
1756// Given a pointer to a function as obtained by "& functionname" in C,
1757// produce a pointer to the actual entry point for the function.
1758void* VG_(fnptr_to_fnentry)( void* f )
1759{
1760#  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1761      || defined(VGP_arm_linux) || defined(VGO_darwin)          \
1762      || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \
1763      || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
1764      || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
1765   return f;
1766#  elif defined(VGP_ppc64be_linux)
1767   /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1768      3-word function descriptor, of which the first word is the entry
1769      address. */
1770   UWord* descr = (UWord*)f;
1771   return (void*)(descr[0]);
1772#  else
1773#    error "Unknown platform"
1774#  endif
1775}
1776
1777/*--------------------------------------------------------------------*/
1778/*--- end                                                          ---*/
1779/*--------------------------------------------------------------------*/
1780