1/*--------------------------------------------------------------------*/
2/*--- Machine-related stuff.                           m_machine.c ---*/
3/*--------------------------------------------------------------------*/
4
5/*
6   This file is part of Valgrind, a dynamic binary instrumentation
7   framework.
8
9   Copyright (C) 2000-2013 Julian Seward
10      jseward@acm.org
11
12   This program is free software; you can redistribute it and/or
13   modify it under the terms of the GNU General Public License as
14   published by the Free Software Foundation; either version 2 of the
15   License, or (at your option) any later version.
16
17   This program is distributed in the hope that it will be useful, but
18   WITHOUT ANY WARRANTY; without even the implied warranty of
19   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20   General Public License for more details.
21
22   You should have received a copy of the GNU General Public License
23   along with this program; if not, write to the Free Software
24   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25   02111-1307, USA.
26
27   The GNU General Public License is contained in the file COPYING.
28*/
29
30#include "pub_core_basics.h"
31#include "pub_core_vki.h"
32#include "pub_core_libcsetjmp.h"   // setjmp facilities
33#include "pub_core_threadstate.h"
34#include "pub_core_libcassert.h"
35#include "pub_core_libcbase.h"
36#include "pub_core_libcfile.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_mallocfree.h"
39#include "pub_core_machine.h"
40#include "pub_core_cpuid.h"
41#include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
42#include "pub_core_debuglog.h"
43
44
45#define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
46#define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
47#define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
48
49Addr VG_(get_IP) ( ThreadId tid ) {
50   return INSTR_PTR( VG_(threads)[tid].arch );
51}
52Addr VG_(get_SP) ( ThreadId tid ) {
53   return STACK_PTR( VG_(threads)[tid].arch );
54}
55Addr VG_(get_FP) ( ThreadId tid ) {
56   return FRAME_PTR( VG_(threads)[tid].arch );
57}
58
59void VG_(set_IP) ( ThreadId tid, Addr ip ) {
60   INSTR_PTR( VG_(threads)[tid].arch ) = ip;
61}
62void VG_(set_SP) ( ThreadId tid, Addr sp ) {
63   STACK_PTR( VG_(threads)[tid].arch ) = sp;
64}
65
66void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
67                                ThreadId tid )
68{
69#  if defined(VGA_x86)
70   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
71   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
72   regs->misc.X86.r_ebp
73      = VG_(threads)[tid].arch.vex.guest_EBP;
74#  elif defined(VGA_amd64)
75   regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
76   regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
77   regs->misc.AMD64.r_rbp
78      = VG_(threads)[tid].arch.vex.guest_RBP;
79#  elif defined(VGA_ppc32)
80   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
81   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
82   regs->misc.PPC32.r_lr
83      = VG_(threads)[tid].arch.vex.guest_LR;
84#  elif defined(VGA_ppc64)
85   regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
86   regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
87   regs->misc.PPC64.r_lr
88      = VG_(threads)[tid].arch.vex.guest_LR;
89#  elif defined(VGA_arm)
90   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
91   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
92   regs->misc.ARM.r14
93      = VG_(threads)[tid].arch.vex.guest_R14;
94   regs->misc.ARM.r12
95      = VG_(threads)[tid].arch.vex.guest_R12;
96   regs->misc.ARM.r11
97      = VG_(threads)[tid].arch.vex.guest_R11;
98   regs->misc.ARM.r7
99      = VG_(threads)[tid].arch.vex.guest_R7;
100#  elif defined(VGA_arm64)
101   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
102   regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP;
103   regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29;
104   regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30;
105#  elif defined(VGA_s390x)
106   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
107   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
108   regs->misc.S390X.r_fp
109      = VG_(threads)[tid].arch.vex.guest_r11;
110   regs->misc.S390X.r_lr
111      = VG_(threads)[tid].arch.vex.guest_r14;
112#  elif defined(VGA_mips32)
113   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
114   regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
115   regs->misc.MIPS32.r30
116      = VG_(threads)[tid].arch.vex.guest_r30;
117   regs->misc.MIPS32.r31
118      = VG_(threads)[tid].arch.vex.guest_r31;
119   regs->misc.MIPS32.r28
120      = VG_(threads)[tid].arch.vex.guest_r28;
121#  elif defined(VGA_mips64)
122   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
123   regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
124   regs->misc.MIPS64.r30
125      = VG_(threads)[tid].arch.vex.guest_r30;
126   regs->misc.MIPS64.r31
127      = VG_(threads)[tid].arch.vex.guest_r31;
128   regs->misc.MIPS64.r28
129      = VG_(threads)[tid].arch.vex.guest_r28;
130#  else
131#    error "Unknown arch"
132#  endif
133}
134
135void
136VG_(get_shadow_regs_area) ( ThreadId tid,
137                            /*DST*/UChar* dst,
138                            /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
139{
140   void*        src;
141   ThreadState* tst;
142   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
143   vg_assert(VG_(is_valid_tid)(tid));
144   // Bounds check
145   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
146   vg_assert(offset + size <= sizeof(VexGuestArchState));
147   // Copy
148   tst = & VG_(threads)[tid];
149   src = NULL;
150   switch (shadowNo) {
151      case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
152      case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
153      case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
154   }
155   tl_assert(src != NULL);
156   VG_(memcpy)( dst, src, size);
157}
158
159void
160VG_(set_shadow_regs_area) ( ThreadId tid,
161                            /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
162                            /*SRC*/const UChar* src )
163{
164   void*        dst;
165   ThreadState* tst;
166   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
167   vg_assert(VG_(is_valid_tid)(tid));
168   // Bounds check
169   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
170   vg_assert(offset + size <= sizeof(VexGuestArchState));
171   // Copy
172   tst = & VG_(threads)[tid];
173   dst = NULL;
174   switch (shadowNo) {
175      case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
176      case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
177      case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
178   }
179   tl_assert(dst != NULL);
180   VG_(memcpy)( dst, src, size);
181}
182
183
184static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
185                                                        const HChar*, Addr))
186{
187   VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
188   VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %d\n", tid);
189#if defined(VGA_x86)
190   (*f)(tid, "EAX", vex->guest_EAX);
191   (*f)(tid, "ECX", vex->guest_ECX);
192   (*f)(tid, "EDX", vex->guest_EDX);
193   (*f)(tid, "EBX", vex->guest_EBX);
194   (*f)(tid, "ESI", vex->guest_ESI);
195   (*f)(tid, "EDI", vex->guest_EDI);
196   (*f)(tid, "ESP", vex->guest_ESP);
197   (*f)(tid, "EBP", vex->guest_EBP);
198#elif defined(VGA_amd64)
199   (*f)(tid, "RAX", vex->guest_RAX);
200   (*f)(tid, "RCX", vex->guest_RCX);
201   (*f)(tid, "RDX", vex->guest_RDX);
202   (*f)(tid, "RBX", vex->guest_RBX);
203   (*f)(tid, "RSI", vex->guest_RSI);
204   (*f)(tid, "RDI", vex->guest_RDI);
205   (*f)(tid, "RSP", vex->guest_RSP);
206   (*f)(tid, "RBP", vex->guest_RBP);
207   (*f)(tid, "R8" , vex->guest_R8 );
208   (*f)(tid, "R9" , vex->guest_R9 );
209   (*f)(tid, "R10", vex->guest_R10);
210   (*f)(tid, "R11", vex->guest_R11);
211   (*f)(tid, "R12", vex->guest_R12);
212   (*f)(tid, "R13", vex->guest_R13);
213   (*f)(tid, "R14", vex->guest_R14);
214   (*f)(tid, "R15", vex->guest_R15);
215#elif defined(VGA_ppc32) || defined(VGA_ppc64)
216   (*f)(tid, "GPR0" , vex->guest_GPR0 );
217   (*f)(tid, "GPR1" , vex->guest_GPR1 );
218   (*f)(tid, "GPR2" , vex->guest_GPR2 );
219   (*f)(tid, "GPR3" , vex->guest_GPR3 );
220   (*f)(tid, "GPR4" , vex->guest_GPR4 );
221   (*f)(tid, "GPR5" , vex->guest_GPR5 );
222   (*f)(tid, "GPR6" , vex->guest_GPR6 );
223   (*f)(tid, "GPR7" , vex->guest_GPR7 );
224   (*f)(tid, "GPR8" , vex->guest_GPR8 );
225   (*f)(tid, "GPR9" , vex->guest_GPR9 );
226   (*f)(tid, "GPR10", vex->guest_GPR10);
227   (*f)(tid, "GPR11", vex->guest_GPR11);
228   (*f)(tid, "GPR12", vex->guest_GPR12);
229   (*f)(tid, "GPR13", vex->guest_GPR13);
230   (*f)(tid, "GPR14", vex->guest_GPR14);
231   (*f)(tid, "GPR15", vex->guest_GPR15);
232   (*f)(tid, "GPR16", vex->guest_GPR16);
233   (*f)(tid, "GPR17", vex->guest_GPR17);
234   (*f)(tid, "GPR18", vex->guest_GPR18);
235   (*f)(tid, "GPR19", vex->guest_GPR19);
236   (*f)(tid, "GPR20", vex->guest_GPR20);
237   (*f)(tid, "GPR21", vex->guest_GPR21);
238   (*f)(tid, "GPR22", vex->guest_GPR22);
239   (*f)(tid, "GPR23", vex->guest_GPR23);
240   (*f)(tid, "GPR24", vex->guest_GPR24);
241   (*f)(tid, "GPR25", vex->guest_GPR25);
242   (*f)(tid, "GPR26", vex->guest_GPR26);
243   (*f)(tid, "GPR27", vex->guest_GPR27);
244   (*f)(tid, "GPR28", vex->guest_GPR28);
245   (*f)(tid, "GPR29", vex->guest_GPR29);
246   (*f)(tid, "GPR30", vex->guest_GPR30);
247   (*f)(tid, "GPR31", vex->guest_GPR31);
248   (*f)(tid, "CTR"  , vex->guest_CTR  );
249   (*f)(tid, "LR"   , vex->guest_LR   );
250#elif defined(VGA_arm)
251   (*f)(tid, "R0" , vex->guest_R0 );
252   (*f)(tid, "R1" , vex->guest_R1 );
253   (*f)(tid, "R2" , vex->guest_R2 );
254   (*f)(tid, "R3" , vex->guest_R3 );
255   (*f)(tid, "R4" , vex->guest_R4 );
256   (*f)(tid, "R5" , vex->guest_R5 );
257   (*f)(tid, "R6" , vex->guest_R6 );
258   (*f)(tid, "R8" , vex->guest_R8 );
259   (*f)(tid, "R9" , vex->guest_R9 );
260   (*f)(tid, "R10", vex->guest_R10);
261   (*f)(tid, "R11", vex->guest_R11);
262   (*f)(tid, "R12", vex->guest_R12);
263   (*f)(tid, "R13", vex->guest_R13);
264   (*f)(tid, "R14", vex->guest_R14);
265#elif defined(VGA_s390x)
266   (*f)(tid, "r0" , vex->guest_r0 );
267   (*f)(tid, "r1" , vex->guest_r1 );
268   (*f)(tid, "r2" , vex->guest_r2 );
269   (*f)(tid, "r3" , vex->guest_r3 );
270   (*f)(tid, "r4" , vex->guest_r4 );
271   (*f)(tid, "r5" , vex->guest_r5 );
272   (*f)(tid, "r6" , vex->guest_r6 );
273   (*f)(tid, "r7" , vex->guest_r7 );
274   (*f)(tid, "r8" , vex->guest_r8 );
275   (*f)(tid, "r9" , vex->guest_r9 );
276   (*f)(tid, "r10", vex->guest_r10);
277   (*f)(tid, "r11", vex->guest_r11);
278   (*f)(tid, "r12", vex->guest_r12);
279   (*f)(tid, "r13", vex->guest_r13);
280   (*f)(tid, "r14", vex->guest_r14);
281   (*f)(tid, "r15", vex->guest_r15);
282#elif defined(VGA_mips32) || defined(VGA_mips64)
283   (*f)(tid, "r0" , vex->guest_r0 );
284   (*f)(tid, "r1" , vex->guest_r1 );
285   (*f)(tid, "r2" , vex->guest_r2 );
286   (*f)(tid, "r3" , vex->guest_r3 );
287   (*f)(tid, "r4" , vex->guest_r4 );
288   (*f)(tid, "r5" , vex->guest_r5 );
289   (*f)(tid, "r6" , vex->guest_r6 );
290   (*f)(tid, "r7" , vex->guest_r7 );
291   (*f)(tid, "r8" , vex->guest_r8 );
292   (*f)(tid, "r9" , vex->guest_r9 );
293   (*f)(tid, "r10", vex->guest_r10);
294   (*f)(tid, "r11", vex->guest_r11);
295   (*f)(tid, "r12", vex->guest_r12);
296   (*f)(tid, "r13", vex->guest_r13);
297   (*f)(tid, "r14", vex->guest_r14);
298   (*f)(tid, "r15", vex->guest_r15);
299   (*f)(tid, "r16", vex->guest_r16);
300   (*f)(tid, "r17", vex->guest_r17);
301   (*f)(tid, "r18", vex->guest_r18);
302   (*f)(tid, "r19", vex->guest_r19);
303   (*f)(tid, "r20", vex->guest_r20);
304   (*f)(tid, "r21", vex->guest_r21);
305   (*f)(tid, "r22", vex->guest_r22);
306   (*f)(tid, "r23", vex->guest_r23);
307   (*f)(tid, "r24", vex->guest_r24);
308   (*f)(tid, "r25", vex->guest_r25);
309   (*f)(tid, "r26", vex->guest_r26);
310   (*f)(tid, "r27", vex->guest_r27);
311   (*f)(tid, "r28", vex->guest_r28);
312   (*f)(tid, "r29", vex->guest_r29);
313   (*f)(tid, "r30", vex->guest_r30);
314   (*f)(tid, "r31", vex->guest_r31);
315#elif defined(VGA_arm64)
316   (*f)(tid, "x0" , vex->guest_X0 );
317   (*f)(tid, "x1" , vex->guest_X1 );
318   (*f)(tid, "x2" , vex->guest_X2 );
319   (*f)(tid, "x3" , vex->guest_X3 );
320   (*f)(tid, "x4" , vex->guest_X4 );
321   (*f)(tid, "x5" , vex->guest_X5 );
322   (*f)(tid, "x6" , vex->guest_X6 );
323   (*f)(tid, "x7" , vex->guest_X7 );
324   (*f)(tid, "x8" , vex->guest_X8 );
325   (*f)(tid, "x9" , vex->guest_X9 );
326   (*f)(tid, "x10", vex->guest_X10);
327   (*f)(tid, "x11", vex->guest_X11);
328   (*f)(tid, "x12", vex->guest_X12);
329   (*f)(tid, "x13", vex->guest_X13);
330   (*f)(tid, "x14", vex->guest_X14);
331   (*f)(tid, "x15", vex->guest_X15);
332   (*f)(tid, "x16", vex->guest_X16);
333   (*f)(tid, "x17", vex->guest_X17);
334   (*f)(tid, "x18", vex->guest_X18);
335   (*f)(tid, "x19", vex->guest_X19);
336   (*f)(tid, "x20", vex->guest_X20);
337   (*f)(tid, "x21", vex->guest_X21);
338   (*f)(tid, "x22", vex->guest_X22);
339   (*f)(tid, "x23", vex->guest_X23);
340   (*f)(tid, "x24", vex->guest_X24);
341   (*f)(tid, "x25", vex->guest_X25);
342   (*f)(tid, "x26", vex->guest_X26);
343   (*f)(tid, "x27", vex->guest_X27);
344   (*f)(tid, "x28", vex->guest_X28);
345   (*f)(tid, "x29", vex->guest_X29);
346   (*f)(tid, "x30", vex->guest_X30);
347#else
348#  error Unknown arch
349#endif
350}
351
352
353void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
354{
355   ThreadId tid;
356
357   for (tid = 1; tid < VG_N_THREADS; tid++) {
358      if (VG_(is_valid_tid)(tid)
359          || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) {
360         // live thread or thread instructed to die by another thread that
361         // called exit.
362         apply_to_GPs_of_tid(tid, f);
363      }
364   }
365}
366
367void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
368{
369   *tid = (ThreadId)(-1);
370}
371
372Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
373                            /*OUT*/Addr* stack_min,
374                            /*OUT*/Addr* stack_max)
375{
376   ThreadId i;
377   for (i = (*tid)+1; i < VG_N_THREADS; i++) {
378      if (i == VG_INVALID_THREADID)
379         continue;
380      if (VG_(threads)[i].status != VgTs_Empty) {
381         *tid       = i;
382         *stack_min = VG_(get_SP)(i);
383         *stack_max = VG_(threads)[i].client_stack_highest_word;
384         return True;
385      }
386   }
387   return False;
388}
389
390Addr VG_(thread_get_stack_max)(ThreadId tid)
391{
392   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
393   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
394   return VG_(threads)[tid].client_stack_highest_word;
395}
396
397SizeT VG_(thread_get_stack_size)(ThreadId tid)
398{
399   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
400   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
401   return VG_(threads)[tid].client_stack_szB;
402}
403
404Addr VG_(thread_get_altstack_min)(ThreadId tid)
405{
406   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
407   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
408   return (Addr)VG_(threads)[tid].altstack.ss_sp;
409}
410
411SizeT VG_(thread_get_altstack_size)(ThreadId tid)
412{
413   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
414   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
415   return VG_(threads)[tid].altstack.ss_size;
416}
417
418//-------------------------------------------------------------
419/* Details about the capabilities of the underlying (host) CPU.  These
420   details are acquired by (1) enquiring with the CPU at startup, or
421   (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
422   line size).  It's a bit nasty in the sense that there's no obvious
423   way to stop uses of some of this info before it's ready to go.
424   See pub_core_machine.h for more information about that.
425
426   VG_(machine_get_hwcaps) may use signals (although it attempts to
427   leave signal state unchanged) and therefore should only be
428   called before m_main sets up the client's signal state.
429*/
430
431/* --------- State --------- */
432static Bool hwcaps_done = False;
433
434/* --- all archs --- */
435static VexArch     va = VexArch_INVALID;
436static VexArchInfo vai;
437
438#if defined(VGA_x86)
439UInt VG_(machine_x86_have_mxcsr) = 0;
440#endif
441#if defined(VGA_ppc32)
442UInt VG_(machine_ppc32_has_FP)  = 0;
443UInt VG_(machine_ppc32_has_VMX) = 0;
444#endif
445#if defined(VGA_ppc64)
446ULong VG_(machine_ppc64_has_VMX) = 0;
447#endif
448#if defined(VGA_arm)
449Int VG_(machine_arm_archlevel) = 4;
450#endif
451
452
453/* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
454   testing, so we need a VG_MINIMAL_JMP_BUF. */
455#if defined(VGA_ppc32) || defined(VGA_ppc64) \
456    || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32)
457#include "pub_core_libcsetjmp.h"
458static VG_MINIMAL_JMP_BUF(env_unsup_insn);
459static void handler_unsup_insn ( Int x ) {
460   VG_MINIMAL_LONGJMP(env_unsup_insn);
461}
462#endif
463
464
465/* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
466 * handlers are installed.  Determines the the sizes affected by dcbz
467 * and dcbzl instructions and updates the given VexArchInfo structure
468 * accordingly.
469 *
470 * Not very defensive: assumes that as long as the dcbz/dcbzl
471 * instructions don't raise a SIGILL, that they will zero an aligned,
472 * contiguous block of memory of a sensible size. */
473#if defined(VGA_ppc32) || defined(VGA_ppc64)
474static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
475{
476   Int dcbz_szB = 0;
477   Int dcbzl_szB;
478#  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
479   char test_block[4*MAX_DCBZL_SZB];
480   char *aligned = test_block;
481   Int i;
482
483   /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
484   aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
485   vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
486
487   /* dcbz often clears 32B, although sometimes whatever the native cache
488    * block size is */
489   VG_(memset)(test_block, 0xff, sizeof(test_block));
490   __asm__ __volatile__("dcbz 0,%0"
491                        : /*out*/
492                        : "r" (aligned) /*in*/
493                        : "memory" /*clobber*/);
494   for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
495      if (!test_block[i])
496         ++dcbz_szB;
497   }
498   vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
499
500   /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
501   if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
502      dcbzl_szB = 0; /* indicates unsupported */
503   }
504   else {
505      VG_(memset)(test_block, 0xff, sizeof(test_block));
506      /* some older assemblers won't understand the dcbzl instruction
507       * variant, so we directly emit the instruction ourselves */
508      __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
509                           : /*out*/
510                           : "r" (aligned) /*in*/
511                           : "memory", "r9" /*clobber*/);
512      for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
513         if (!test_block[i])
514            ++dcbzl_szB;
515      }
516      vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
517   }
518
519   arch_info->ppc_dcbz_szB  = dcbz_szB;
520   arch_info->ppc_dcbzl_szB = dcbzl_szB;
521
522   VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
523                 dcbz_szB, dcbzl_szB);
524#  undef MAX_DCBZL_SZB
525}
526#endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
527
528#ifdef VGA_s390x
529
530/* Read /proc/cpuinfo. Look for lines like these
531
532   processor 0: version = FF,  identification = 0117C9,  machine = 2064
533
534   and return the machine model. If the machine model could not be determined
535   or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
536
537static UInt VG_(get_machine_model)(void)
538{
539   static struct model_map {
540      HChar name[5];
541      UInt  id;
542   } model_map[] = {
543      { "2064", VEX_S390X_MODEL_Z900 },
544      { "2066", VEX_S390X_MODEL_Z800 },
545      { "2084", VEX_S390X_MODEL_Z990 },
546      { "2086", VEX_S390X_MODEL_Z890 },
547      { "2094", VEX_S390X_MODEL_Z9_EC },
548      { "2096", VEX_S390X_MODEL_Z9_BC },
549      { "2097", VEX_S390X_MODEL_Z10_EC },
550      { "2098", VEX_S390X_MODEL_Z10_BC },
551      { "2817", VEX_S390X_MODEL_Z196 },
552      { "2818", VEX_S390X_MODEL_Z114 },
553      { "2827", VEX_S390X_MODEL_ZEC12 },
554      { "2828", VEX_S390X_MODEL_ZBC12 },
555   };
556
557   Int    model, n, fh;
558   SysRes fd;
559   SizeT  num_bytes, file_buf_size;
560   HChar *p, *m, *model_name, *file_buf;
561
562   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
563   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
564   if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
565
566   fh  = sr_Res(fd);
567
568   /* Determine the size of /proc/cpuinfo.
569      Work around broken-ness in /proc file system implementation.
570      fstat returns a zero size for /proc/cpuinfo although it is
571      claimed to be a regular file. */
572   num_bytes = 0;
573   file_buf_size = 1000;
574   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
575   while (42) {
576      n = VG_(read)(fh, file_buf, file_buf_size);
577      if (n < 0) break;
578
579      num_bytes += n;
580      if (n < file_buf_size) break;  /* reached EOF */
581   }
582
583   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
584
585   if (num_bytes > file_buf_size) {
586      VG_(free)( file_buf );
587      VG_(lseek)( fh, 0, VKI_SEEK_SET );
588      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
589      n = VG_(read)( fh, file_buf, num_bytes );
590      if (n < 0) num_bytes = 0;
591   }
592
593   file_buf[num_bytes] = '\0';
594   VG_(close)(fh);
595
596   /* Parse file */
597   model = VEX_S390X_MODEL_UNKNOWN;
598   for (p = file_buf; *p; ++p) {
599      /* Beginning of line */
600     if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
601
602     m = VG_(strstr)( p, "machine" );
603     if (m == NULL) continue;
604
605     p = m + sizeof "machine" - 1;
606     while ( VG_(isspace)( *p ) || *p == '=') {
607       if (*p == '\n') goto next_line;
608       ++p;
609     }
610
611     model_name = p;
612     for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
613       struct model_map *mm = model_map + n;
614       SizeT len = VG_(strlen)( mm->name );
615       if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
616            VG_(isspace)( model_name[len] )) {
617         if (mm->id < model) model = mm->id;
618         p = model_name + len;
619         break;
620       }
621     }
622     /* Skip until end-of-line */
623     while (*p != '\n')
624       ++p;
625   next_line: ;
626   }
627
628   VG_(free)( file_buf );
629   VG_(debugLog)(1, "machine", "model = %s\n",
630                 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
631                                                  : model_map[model].name);
632   return model;
633}
634
635#endif /* VGA_s390x */
636
637#if defined(VGA_mips32) || defined(VGA_mips64)
638
639/* Read /proc/cpuinfo and return the machine model. */
640static UInt VG_(get_machine_model)(void)
641{
642   const char *search_MIPS_str = "MIPS";
643   const char *search_Broadcom_str = "Broadcom";
644   const char *search_Netlogic_str = "Netlogic";
645   const char *search_Cavium_str= "Cavium";
646   Int    n, fh;
647   SysRes fd;
648   SizeT  num_bytes, file_buf_size;
649   HChar  *file_buf;
650
651   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
652   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
653   if ( sr_isError(fd) ) return -1;
654
655   fh  = sr_Res(fd);
656
657   /* Determine the size of /proc/cpuinfo.
658      Work around broken-ness in /proc file system implementation.
659      fstat returns a zero size for /proc/cpuinfo although it is
660      claimed to be a regular file. */
661   num_bytes = 0;
662   file_buf_size = 1000;
663   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
664   while (42) {
665      n = VG_(read)(fh, file_buf, file_buf_size);
666      if (n < 0) break;
667
668      num_bytes += n;
669      if (n < file_buf_size) break;  /* reached EOF */
670   }
671
672   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
673
674   if (num_bytes > file_buf_size) {
675      VG_(free)( file_buf );
676      VG_(lseek)( fh, 0, VKI_SEEK_SET );
677      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
678      n = VG_(read)( fh, file_buf, num_bytes );
679      if (n < 0) num_bytes = 0;
680   }
681
682   file_buf[num_bytes] = '\0';
683   VG_(close)(fh);
684
685   /* Parse file */
686   if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
687       return VEX_PRID_COMP_BROADCOM;
688   if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL)
689       return VEX_PRID_COMP_NETLOGIC;
690   if (VG_(strstr)(file_buf, search_Cavium_str) != NULL)
691       return VEX_PRID_COMP_CAVIUM;
692   if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
693       return VEX_PRID_COMP_MIPS;
694
695   /* Did not find string in the proc file. */
696   return -1;
697}
698
699#endif
700
701/* Determine what insn set and insn set variant the host has, and
702   record it.  To be called once at system startup.  Returns False if
703   this a CPU incapable of running Valgrind.
704   Also determine information about the caches on this host. */
705
706Bool VG_(machine_get_hwcaps)( void )
707{
708   vg_assert(hwcaps_done == False);
709   hwcaps_done = True;
710
711   // Whack default settings into vai, so that we only need to fill in
712   // any interesting bits.
713   LibVEX_default_VexArchInfo(&vai);
714
715#if defined(VGA_x86)
716   { Bool have_sse1, have_sse2, have_cx8, have_lzcnt, have_mmxext;
717     UInt eax, ebx, ecx, edx, max_extended;
718     HChar vstr[13];
719     vstr[0] = 0;
720
721     if (!VG_(has_cpuid)())
722        /* we can't do cpuid at all.  Give up. */
723        return False;
724
725     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
726     if (eax < 1)
727        /* we can't ask for cpuid(x) for x > 0.  Give up. */
728        return False;
729
730     /* Get processor ID string, and max basic/extended index
731        values. */
732     VG_(memcpy)(&vstr[0], &ebx, 4);
733     VG_(memcpy)(&vstr[4], &edx, 4);
734     VG_(memcpy)(&vstr[8], &ecx, 4);
735     vstr[12] = 0;
736
737     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
738     max_extended = eax;
739
740     /* get capabilities bits into edx */
741     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
742
743     have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
744     have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
745
746     /* cmpxchg8b is a minimum requirement now; if we don't have it we
747        must simply give up.  But all CPUs since Pentium-I have it, so
748        that doesn't seem like much of a restriction. */
749     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
750     if (!have_cx8)
751        return False;
752
753     /* Figure out if this is an AMD that can do MMXEXT. */
754     have_mmxext = False;
755     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
756         && max_extended >= 0x80000001) {
757        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
758        /* Some older AMD processors support a sse1 subset (Integer SSE). */
759        have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0);
760     }
761
762     /* Figure out if this is an AMD or Intel that can do LZCNT. */
763     have_lzcnt = False;
764     if ((0 == VG_(strcmp)(vstr, "AuthenticAMD")
765          || 0 == VG_(strcmp)(vstr, "GenuineIntel"))
766         && max_extended >= 0x80000001) {
767        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
768        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
769     }
770
771     /* Intel processors don't define the mmxext extension, but since it
772        is just a sse1 subset always define it when we have sse1. */
773     if (have_sse1)
774        have_mmxext = True;
775
776     va = VexArchX86;
777     if (have_sse2 && have_sse1 && have_mmxext) {
778        vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
779        vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
780        vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
781        if (have_lzcnt)
782           vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
783        VG_(machine_x86_have_mxcsr) = 1;
784     } else if (have_sse1 && have_mmxext) {
785        vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT;
786        vai.hwcaps |= VEX_HWCAPS_X86_SSE1;
787        VG_(machine_x86_have_mxcsr) = 1;
788     } else if (have_mmxext) {
789        vai.hwcaps  = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/
790        VG_(machine_x86_have_mxcsr) = 0;
791     } else {
792       vai.hwcaps = 0; /*baseline - no sse at all*/
793       VG_(machine_x86_have_mxcsr) = 0;
794     }
795
796     VG_(machine_get_cache_info)(&vai);
797
798     return True;
799   }
800
801#elif defined(VGA_amd64)
802   { Bool have_sse3, have_cx8, have_cx16;
803     Bool have_lzcnt, have_avx, have_bmi, have_avx2;
804     Bool have_rdtscp;
805     UInt eax, ebx, ecx, edx, max_basic, max_extended;
806     HChar vstr[13];
807     vstr[0] = 0;
808
809     if (!VG_(has_cpuid)())
810        /* we can't do cpuid at all.  Give up. */
811        return False;
812
813     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
814     max_basic = eax;
815     if (max_basic < 1)
816        /* we can't ask for cpuid(x) for x > 0.  Give up. */
817        return False;
818
819     /* Get processor ID string, and max basic/extended index
820        values. */
821     VG_(memcpy)(&vstr[0], &ebx, 4);
822     VG_(memcpy)(&vstr[4], &edx, 4);
823     VG_(memcpy)(&vstr[8], &ecx, 4);
824     vstr[12] = 0;
825
826     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
827     max_extended = eax;
828
829     /* get capabilities bits into edx */
830     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
831
832     // we assume that SSE1 and SSE2 are available by default
833     have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
834     // ssse3   is ecx:9
835     // sse41   is ecx:19
836     // sse42   is ecx:20
837
838     // osxsave is ecx:27
839     // avx     is ecx:28
840     // fma     is ecx:12
841     have_avx = False;
842     /* have_fma = False; */
843     if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) {
844        /* processor supports AVX instructions and XGETBV is enabled
845           by OS */
846        ULong w;
847        __asm__ __volatile__("movq $0,%%rcx ; "
848                             ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
849                             "movq %%rax,%0"
850                             :/*OUT*/"=r"(w) :/*IN*/
851                             :/*TRASH*/"rdx","rcx");
852        if ((w & 6) == 6) {
853           /* OS has enabled both XMM and YMM state support */
854           have_avx = True;
855           /* have_fma = (ecx & (1<<12)) != 0; */
856           /* have_fma: Probably correct, but gcc complains due to
857              unusedness. &*/
858        }
859     }
860
861     /* cmpxchg8b is a minimum requirement now; if we don't have it we
862        must simply give up.  But all CPUs since Pentium-I have it, so
863        that doesn't seem like much of a restriction. */
864     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
865     if (!have_cx8)
866        return False;
867
868     /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
869     have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
870
871     /* Figure out if this CPU can do LZCNT. */
872     have_lzcnt = False;
873     if (max_extended >= 0x80000001) {
874        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
875        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
876     }
877
878     /* Can we do RDTSCP? */
879     have_rdtscp = False;
880     if (max_extended >= 0x80000001) {
881        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
882        have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
883     }
884
885     /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */
886     have_bmi = False;
887     have_avx2 = False;
888     if (have_avx && max_basic >= 7) {
889        VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
890        have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
891        have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */
892     }
893
894     va         = VexArchAMD64;
895     vai.hwcaps = (have_sse3   ? VEX_HWCAPS_AMD64_SSE3   : 0)
896                | (have_cx16   ? VEX_HWCAPS_AMD64_CX16   : 0)
897                | (have_lzcnt  ? VEX_HWCAPS_AMD64_LZCNT  : 0)
898                | (have_avx    ? VEX_HWCAPS_AMD64_AVX    : 0)
899                | (have_bmi    ? VEX_HWCAPS_AMD64_BMI    : 0)
900                | (have_avx2   ? VEX_HWCAPS_AMD64_AVX2   : 0)
901                | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
902
903     VG_(machine_get_cache_info)(&vai);
904
905     return True;
906   }
907
908#elif defined(VGA_ppc32)
909   {
910     /* Find out which subset of the ppc32 instruction set is supported by
911        verifying whether various ppc32 instructions generate a SIGILL
912        or a SIGFPE. An alternative approach is to check the AT_HWCAP and
913        AT_PLATFORM entries in the ELF auxiliary table -- see also
914        the_iifii.client_auxv in m_main.c.
915      */
916     vki_sigset_t          saved_set, tmp_set;
917     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
918     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
919
920     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
921     volatile Bool have_isa_2_07;
922     Int r;
923
924     /* This is a kludge.  Really we ought to back-convert saved_act
925        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
926        since that's a no-op on all ppc32 platforms so far supported,
927        it's not worth the typing effort.  At least include most basic
928        sanity check: */
929     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
930
931     VG_(sigemptyset)(&tmp_set);
932     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
933     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
934
935     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
936     vg_assert(r == 0);
937
938     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
939     vg_assert(r == 0);
940     tmp_sigill_act = saved_sigill_act;
941
942     r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
943     vg_assert(r == 0);
944     tmp_sigfpe_act = saved_sigfpe_act;
945
946     /* NODEFER: signal handler does not return (from the kernel's point of
947        view), hence if it is to successfully catch a signal more than once,
948        we need the NODEFER flag. */
949     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
950     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
951     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
952     tmp_sigill_act.ksa_handler = handler_unsup_insn;
953     r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
954     vg_assert(r == 0);
955
956     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
957     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
958     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
959     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
960     r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
961     vg_assert(r == 0);
962
963     /* standard FP insns */
964     have_F = True;
965     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
966        have_F = False;
967     } else {
968        __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
969     }
970
971     /* Altivec insns */
972     have_V = True;
973     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
974        have_V = False;
975     } else {
976        /* Unfortunately some older assemblers don't speak Altivec (or
977           choose not to), so to be safe we directly emit the 32-bit
978           word corresponding to "vor 0,0,0".  This fixes a build
979           problem that happens on Debian 3.1 (ppc32), and probably
980           various other places. */
981        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
982     }
983
984     /* General-Purpose optional (fsqrt, fsqrts) */
985     have_FX = True;
986     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
987        have_FX = False;
988     } else {
989        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
990     }
991
992     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
993     have_GX = True;
994     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
995        have_GX = False;
996     } else {
997        __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
998     }
999
1000     /* VSX support implies Power ISA 2.06 */
1001     have_VX = True;
1002     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1003        have_VX = False;
1004     } else {
1005        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1006     }
1007
1008     /* Check for Decimal Floating Point (DFP) support. */
1009     have_DFP = True;
1010     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1011        have_DFP = False;
1012     } else {
1013        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1014     }
1015
1016     /* Check for ISA 2.07 support. */
1017     have_isa_2_07 = True;
1018     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1019        have_isa_2_07 = False;
1020     } else {
1021        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1022     }
1023
1024     /* determine dcbz/dcbzl sizes while we still have the signal
1025      * handlers registered */
1026     find_ppc_dcbz_sz(&vai);
1027
1028     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1029     vg_assert(r == 0);
1030     r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1031     vg_assert(r == 0);
1032     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1033     vg_assert(r == 0);
1034     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1035                    (Int)have_F, (Int)have_V, (Int)have_FX,
1036                    (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1037                    (Int)have_isa_2_07);
1038     /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
1039     if (have_V && !have_F)
1040        have_V = False;
1041     if (have_FX && !have_F)
1042        have_FX = False;
1043     if (have_GX && !have_F)
1044        have_GX = False;
1045
1046     VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
1047     VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
1048
1049     va = VexArchPPC32;
1050
1051     vai.hwcaps = 0;
1052     if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
1053     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
1054     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
1055     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
1056     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
1057     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
1058     if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07;
1059
1060     VG_(machine_get_cache_info)(&vai);
1061
1062     /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
1063        called before we're ready to go. */
1064     return True;
1065   }
1066
1067#elif defined(VGA_ppc64)
1068   {
1069     /* Same instruction set detection algorithm as for ppc32. */
1070     vki_sigset_t          saved_set, tmp_set;
1071     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1072     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1073
1074     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1075     volatile Bool have_isa_2_07;
1076     Int r;
1077
1078     /* This is a kludge.  Really we ought to back-convert saved_act
1079        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1080        since that's a no-op on all ppc64 platforms so far supported,
1081        it's not worth the typing effort.  At least include most basic
1082        sanity check: */
1083     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1084
1085     VG_(sigemptyset)(&tmp_set);
1086     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1087     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1088
1089     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1090     vg_assert(r == 0);
1091
1092     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1093     vg_assert(r == 0);
1094     tmp_sigill_act = saved_sigill_act;
1095
1096     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1097     tmp_sigfpe_act = saved_sigfpe_act;
1098
1099     /* NODEFER: signal handler does not return (from the kernel's point of
1100        view), hence if it is to successfully catch a signal more than once,
1101        we need the NODEFER flag. */
1102     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1103     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1104     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1105     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1106     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1107
1108     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1109     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1110     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1111     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1112     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1113
1114     /* standard FP insns */
1115     have_F = True;
1116     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1117        have_F = False;
1118     } else {
1119        __asm__ __volatile__("fmr 0,0");
1120     }
1121
1122     /* Altivec insns */
1123     have_V = True;
1124     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1125        have_V = False;
1126     } else {
1127        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1128     }
1129
1130     /* General-Purpose optional (fsqrt, fsqrts) */
1131     have_FX = True;
1132     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1133        have_FX = False;
1134     } else {
1135        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1136     }
1137
1138     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1139     have_GX = True;
1140     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1141        have_GX = False;
1142     } else {
1143        __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1144     }
1145
1146     /* VSX support implies Power ISA 2.06 */
1147     have_VX = True;
1148     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1149        have_VX = False;
1150     } else {
1151        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1152     }
1153
1154     /* Check for Decimal Floating Point (DFP) support. */
1155     have_DFP = True;
1156     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1157        have_DFP = False;
1158     } else {
1159        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1160     }
1161
1162     /* Check for ISA 2.07 support. */
1163     have_isa_2_07 = True;
1164     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1165        have_isa_2_07 = False;
1166     } else {
1167        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
1168     }
1169
1170     /* determine dcbz/dcbzl sizes while we still have the signal
1171      * handlers registered */
1172     find_ppc_dcbz_sz(&vai);
1173
1174     VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1175     VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1176     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1177     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n",
1178                    (Int)have_F, (Int)have_V, (Int)have_FX,
1179                    (Int)have_GX, (Int)have_VX, (Int)have_DFP,
1180                    (Int)have_isa_2_07);
1181     /* on ppc64, if we don't even have FP, just give up. */
1182     if (!have_F)
1183        return False;
1184
1185     VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1186
1187     va = VexArchPPC64;
1188
1189     vai.hwcaps = 0;
1190     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1191     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1192     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1193     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1194     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1195     if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07;
1196
1197     VG_(machine_get_cache_info)(&vai);
1198
1199     /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1200        called before we're ready to go. */
1201     return True;
1202   }
1203
1204#elif defined(VGA_s390x)
1205
1206#  include "libvex_s390x_common.h"
1207
1208   {
1209     /* Instruction set detection code borrowed from ppc above. */
1210     vki_sigset_t          saved_set, tmp_set;
1211     vki_sigaction_fromK_t saved_sigill_act;
1212     vki_sigaction_toK_t     tmp_sigill_act;
1213
1214     volatile Bool have_LDISP, have_STFLE;
1215     Int i, r, model;
1216
1217     /* If the model is "unknown" don't treat this as an error. Assume
1218        this is a brand-new machine model for which we don't have the
1219        identification yet. Keeping fingers crossed. */
1220     model = VG_(get_machine_model)();
1221
1222     /* Unblock SIGILL and stash away the old action for that signal */
1223     VG_(sigemptyset)(&tmp_set);
1224     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1225
1226     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1227     vg_assert(r == 0);
1228
1229     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1230     vg_assert(r == 0);
1231     tmp_sigill_act = saved_sigill_act;
1232
1233     /* NODEFER: signal handler does not return (from the kernel's point of
1234        view), hence if it is to successfully catch a signal more than once,
1235        we need the NODEFER flag. */
1236     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1237     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1238     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1239     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1240     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1241
1242     /* Determine hwcaps. Note, we cannot use the stfle insn because it
1243        is not supported on z900. */
1244
1245     have_LDISP = True;
1246     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1247        have_LDISP = False;
1248     } else {
1249       /* BASR loads the address of the next insn into r1. Needed to avoid
1250          a segfault in XY. */
1251        __asm__ __volatile__("basr %%r1,%%r0\n\t"
1252                             ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
1253                             ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1254     }
1255
1256     /* Check availability og STFLE. If available store facility bits
1257        in hoststfle. */
1258     ULong hoststfle[S390_NUM_FACILITY_DW];
1259
1260     for (i = 0; i < S390_NUM_FACILITY_DW; ++i)
1261        hoststfle[i] = 0;
1262
1263     have_STFLE = True;
1264     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1265        have_STFLE = False;
1266     } else {
1267         register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1;
1268
1269         __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
1270                              : "=m" (hoststfle), "+d"(reg0)
1271                              : : "cc", "memory");
1272     }
1273
1274     /* Restore signals */
1275     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1276     vg_assert(r == 0);
1277     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1278     vg_assert(r == 0);
1279     va = VexArchS390X;
1280
1281     vai.hwcaps = model;
1282     if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1283     if (have_LDISP) {
1284        /* Use long displacement only on machines >= z990. For all other
1285           machines it is millicoded and therefore slow. */
1286        if (model >= VEX_S390X_MODEL_Z990)
1287           vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1288     }
1289
1290     /* Detect presence of certain facilities using the STFLE insn.
1291        Note, that these facilities were introduced at the same time or later
1292        as STFLE, so the absence of STLFE implies the absence of the facility
1293        we're trying to detect. */
1294     struct fac_hwcaps_map {
1295        UInt installed;
1296        UInt facility_bit;
1297        UInt hwcaps_bit;
1298        const HChar name[6];   // may need adjustment for new facility names
1299     } fac_hwcaps[] = {
1300        { False, S390_FAC_EIMM,  VEX_HWCAPS_S390X_EIMM,  "EIMM"  },
1301        { False, S390_FAC_GIE,   VEX_HWCAPS_S390X_GIE,   "GIE"   },
1302        { False, S390_FAC_DFP,   VEX_HWCAPS_S390X_DFP,   "DFP"   },
1303        { False, S390_FAC_FPSE,  VEX_HWCAPS_S390X_FGX,   "FGX"   },
1304        { False, S390_FAC_ETF2,  VEX_HWCAPS_S390X_ETF2,  "ETF2"  },
1305        { False, S390_FAC_ETF3,  VEX_HWCAPS_S390X_ETF3,  "ETF3"  },
1306        { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" },
1307        { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" },
1308        { False, S390_FAC_LSC,   VEX_HWCAPS_S390X_LSC,   "LSC"   },
1309        { False, S390_FAC_PFPO,  VEX_HWCAPS_S390X_PFPO,  "PFPO"  },
1310     };
1311
1312     /* Set hwcaps according to the detected facilities */
1313     for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1314        vg_assert(fac_hwcaps[i].facility_bit <= 63);  // for now
1315        if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) {
1316           fac_hwcaps[i].installed = True;
1317           vai.hwcaps |= fac_hwcaps[i].hwcaps_bit;
1318        }
1319     }
1320
1321     /* Build up a string showing the probed-for facilities */
1322     HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) *
1323                   (sizeof fac_hwcaps[0].name + 3) + //  %s %d
1324                   7 + 1 + 4 + 2  // machine %4d
1325                   + 1];  // \0
1326     HChar *p = fac_str;
1327     p += VG_(sprintf)(p, "machine %4d  ", model);
1328     for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) {
1329        p += VG_(sprintf)(p, " %s %1d", fac_hwcaps[i].name,
1330                          fac_hwcaps[i].installed);
1331     }
1332     *p++ = '\0';
1333
1334     VG_(debugLog)(1, "machine", "%s\n", fac_str);
1335     VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1336
1337     VG_(machine_get_cache_info)(&vai);
1338
1339     return True;
1340   }
1341
1342#elif defined(VGA_arm)
1343   {
1344     /* Same instruction set detection algorithm as for ppc32. */
1345     vki_sigset_t          saved_set, tmp_set;
1346     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1347     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1348
1349     volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1350     volatile Int archlevel;
1351     Int r;
1352
1353     /* This is a kludge.  Really we ought to back-convert saved_act
1354        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1355        since that's a no-op on all ppc64 platforms so far supported,
1356        it's not worth the typing effort.  At least include most basic
1357        sanity check: */
1358     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1359
1360     VG_(sigemptyset)(&tmp_set);
1361     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1362     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1363
1364     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1365     vg_assert(r == 0);
1366
1367     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1368     vg_assert(r == 0);
1369     tmp_sigill_act = saved_sigill_act;
1370
1371     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1372     tmp_sigfpe_act = saved_sigfpe_act;
1373
1374     /* NODEFER: signal handler does not return (from the kernel's point of
1375        view), hence if it is to successfully catch a signal more than once,
1376        we need the NODEFER flag. */
1377     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1378     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1379     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1380     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1381     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1382
1383     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1384     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1385     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1386     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1387     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1388
1389     /* VFP insns */
1390     have_VFP = True;
1391     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1392        have_VFP = False;
1393     } else {
1394        __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1395     }
1396     /* There are several generation of VFP extension but they differs very
1397        little so for now we will not distinguish them. */
1398     have_VFP2 = have_VFP;
1399     have_VFP3 = have_VFP;
1400
1401     /* NEON insns */
1402     have_NEON = True;
1403     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1404        have_NEON = False;
1405     } else {
1406        __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1407     }
1408
1409     /* ARM architecture level */
1410     archlevel = 5; /* v5 will be base level */
1411     if (archlevel < 7) {
1412        archlevel = 7;
1413        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1414           archlevel = 5;
1415        } else {
1416           __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1417        }
1418     }
1419     if (archlevel < 6) {
1420        archlevel = 6;
1421        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1422           archlevel = 5;
1423        } else {
1424           __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1425        }
1426     }
1427
1428     VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1429     VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1430     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1431     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1432     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1433
1434     VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1435           archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1436           (Int)have_NEON);
1437
1438     VG_(machine_arm_archlevel) = archlevel;
1439
1440     va = VexArchARM;
1441
1442     vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1443     if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1444     if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1445     if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1446     if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1447
1448     VG_(machine_get_cache_info)(&vai);
1449
1450     return True;
1451   }
1452
1453#elif defined(VGA_arm64)
1454   {
1455     va = VexArchARM64;
1456
1457     /* So far there are no variants. */
1458     vai.hwcaps = 0;
1459
1460     VG_(machine_get_cache_info)(&vai);
1461
1462     /* 0 denotes 'not set'.  The range of legitimate values here,
1463        after being set that is, is 2 though 17 inclusive. */
1464     vg_assert(vai.arm64_dMinLine_lg2_szB == 0);
1465     vg_assert(vai.arm64_iMinLine_lg2_szB == 0);
1466     ULong ctr_el0;
1467     __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0));
1468     vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2;
1469     vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >>  0) & 0xF) + 2;
1470     VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, "
1471                      "ctr_el0.iMinLine_szB = %d\n",
1472                   1 << vai.arm64_dMinLine_lg2_szB,
1473                   1 << vai.arm64_iMinLine_lg2_szB);
1474
1475     return True;
1476   }
1477
1478#elif defined(VGA_mips32)
1479   {
1480     /* Define the position of F64 bit in FIR register. */
1481#    define FP64 22
1482     va = VexArchMIPS32;
1483     UInt model = VG_(get_machine_model)();
1484     if (model == -1)
1485         return False;
1486
1487     vai.hwcaps = model;
1488
1489     /* Same instruction set detection algorithm as for ppc32/arm... */
1490     vki_sigset_t          saved_set, tmp_set;
1491     vki_sigaction_fromK_t saved_sigill_act;
1492     vki_sigaction_toK_t   tmp_sigill_act;
1493
1494     volatile Bool have_DSP, have_DSPr2;
1495     Int r;
1496
1497     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1498
1499     VG_(sigemptyset)(&tmp_set);
1500     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1501
1502     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1503     vg_assert(r == 0);
1504
1505     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1506     vg_assert(r == 0);
1507     tmp_sigill_act = saved_sigill_act;
1508
1509     /* NODEFER: signal handler does not return (from the kernel's point of
1510        view), hence if it is to successfully catch a signal more than once,
1511        we need the NODEFER flag. */
1512     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1513     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1514     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1515     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1516     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1517
1518     if (model == VEX_PRID_COMP_MIPS) {
1519        /* DSPr2 instructions. */
1520        have_DSPr2 = True;
1521        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1522           have_DSPr2 = False;
1523        } else {
1524           __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */
1525        }
1526        if (have_DSPr2) {
1527           /* We assume it's 74K, since it can run DSPr2. */
1528           vai.hwcaps |= VEX_PRID_IMP_74K;
1529        } else {
1530           /* DSP instructions. */
1531           have_DSP = True;
1532           if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1533              have_DSP = False;
1534           } else {
1535              __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */
1536           }
1537           if (have_DSP) {
1538              /* We assume it's 34K, since it has support for DSP. */
1539              vai.hwcaps |= VEX_PRID_IMP_34K;
1540           }
1541        }
1542     }
1543
1544     /* Check if CPU has FPU and 32 dbl. prec. FP registers */
1545     int FIR = 0;
1546     __asm__ __volatile__(
1547        "cfc1 %0, $0"  "\n\t"
1548        : "=r" (FIR)
1549     );
1550     if (FIR & (1 << FP64)) {
1551        vai.hwcaps |= VEX_PRID_CPU_32FPR;
1552     }
1553
1554     VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1555     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1556     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1557
1558     VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1559     VG_(machine_get_cache_info)(&vai);
1560
1561     return True;
1562   }
1563
1564#elif defined(VGA_mips64)
1565   {
1566     va = VexArchMIPS64;
1567     UInt model = VG_(get_machine_model)();
1568     if (model== -1)
1569         return False;
1570
1571     vai.hwcaps = model;
1572
1573     VG_(machine_get_cache_info)(&vai);
1574
1575     return True;
1576   }
1577
1578#else
1579#  error "Unknown arch"
1580#endif
1581}
1582
1583/* Notify host cpu instruction cache line size. */
1584#if defined(VGA_ppc32)
1585void VG_(machine_ppc32_set_clszB)( Int szB )
1586{
1587   vg_assert(hwcaps_done);
1588
1589   /* Either the value must not have been set yet (zero) or we can
1590      tolerate it being set to the same value multiple times, as the
1591      stack scanning logic in m_main is a bit stupid. */
1592   vg_assert(vai.ppc_icache_line_szB == 0
1593             || vai.ppc_icache_line_szB == szB);
1594
1595   vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1596   vai.ppc_icache_line_szB = szB;
1597}
1598#endif
1599
1600
1601/* Notify host cpu instruction cache line size. */
1602#if defined(VGA_ppc64)
1603void VG_(machine_ppc64_set_clszB)( Int szB )
1604{
1605   vg_assert(hwcaps_done);
1606
1607   /* Either the value must not have been set yet (zero) or we can
1608      tolerate it being set to the same value multiple times, as the
1609      stack scanning logic in m_main is a bit stupid. */
1610   vg_assert(vai.ppc_icache_line_szB == 0
1611             || vai.ppc_icache_line_szB == szB);
1612
1613   vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128);
1614   vai.ppc_icache_line_szB = szB;
1615}
1616#endif
1617
1618
1619/* Notify host's ability to handle NEON instructions. */
1620#if defined(VGA_arm)
1621void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1622{
1623   vg_assert(hwcaps_done);
1624   /* There's nothing else we can sanity check. */
1625
1626   if (has_neon) {
1627      vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1628   } else {
1629      vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1630   }
1631}
1632#endif
1633
1634
1635/* Fetch host cpu info, once established. */
1636void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1637                                   /*OUT*/VexArchInfo* pVai )
1638{
1639   vg_assert(hwcaps_done);
1640   if (pVa)  *pVa  = va;
1641   if (pVai) *pVai = vai;
1642}
1643
1644
1645/* Returns the size of the largest guest register that we will
1646   simulate in this run.  This depends on both the guest architecture
1647   and on the specific capabilities we are simulating for that guest
1648   (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
1649   or 32.  General rule: if in doubt, return a value larger than
1650   reality.
1651
1652   This information is needed by Cachegrind and Callgrind to decide
1653   what the minimum cache line size they are prepared to simulate is.
1654   Basically require that the minimum cache line size is at least as
1655   large as the largest register that might get transferred to/from
1656   memory, so as to guarantee that any such transaction can straddle
1657   at most 2 cache lines.
1658*/
1659Int VG_(machine_get_size_of_largest_guest_register) ( void )
1660{
1661   vg_assert(hwcaps_done);
1662   /* Once hwcaps_done is True, we can fish around inside va/vai to
1663      find the information we need. */
1664
1665#  if defined(VGA_x86)
1666   vg_assert(va == VexArchX86);
1667   /* We don't support AVX, so 32 is out.  At the other end, even if
1668      we don't support any SSE, the X87 can generate 10 byte
1669      transfers, so let's say 16 to be on the safe side.  Hence the
1670      answer is always 16. */
1671   return 16;
1672
1673#  elif defined(VGA_amd64)
1674   /* if AVX then 32 else 16 */
1675   return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1676
1677#  elif defined(VGA_ppc32)
1678   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1679   if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1680   if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1681   if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1682   return 8;
1683
1684#  elif defined(VGA_ppc64)
1685   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1686   if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1687   if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1688   if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1689   return 8;
1690
1691#  elif defined(VGA_s390x)
1692   return 8;
1693
1694#  elif defined(VGA_arm)
1695   /* Really it depends whether or not we have NEON, but let's just
1696      assume we always do. */
1697   return 16;
1698
1699#  elif defined(VGA_arm64)
1700   /* ARM64 always has Neon, AFAICS. */
1701   return 16;
1702
1703#  elif defined(VGA_mips32)
1704   /* The guest state implies 4, but that can't really be true, can
1705      it? */
1706   return 8;
1707
1708#  elif defined(VGA_mips64)
1709   return 8;
1710
1711#  else
1712#    error "Unknown arch"
1713#  endif
1714}
1715
1716
1717// Given a pointer to a function as obtained by "& functionname" in C,
1718// produce a pointer to the actual entry point for the function.
1719void* VG_(fnptr_to_fnentry)( void* f )
1720{
1721#  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1722      || defined(VGP_arm_linux)                           \
1723      || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
1724      || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \
1725      || defined(VGP_mips64_linux) || defined(VGP_arm64_linux)
1726   return f;
1727#  elif defined(VGP_ppc64_linux)
1728   /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1729      3-word function descriptor, of which the first word is the entry
1730      address. */
1731   UWord* descr = (UWord*)f;
1732   return (void*)(descr[0]);
1733#  else
1734#    error "Unknown platform"
1735#  endif
1736}
1737
1738/*--------------------------------------------------------------------*/
1739/*--- end                                                          ---*/
1740/*--------------------------------------------------------------------*/
1741