m_machine.c revision c7ffc94890e120c72d5802487b1092425323dc02
1
2/*--------------------------------------------------------------------*/
3/*--- Machine-related stuff.                           m_machine.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2000-2010 Julian Seward
11      jseward@acm.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "pub_core_basics.h"
32#include "pub_core_vki.h"
33#include "pub_core_threadstate.h"
34#include "pub_core_libcassert.h"
35#include "pub_core_libcbase.h"
36#include "pub_core_machine.h"
37#include "pub_core_cpuid.h"
38#include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
39#include "pub_core_debuglog.h"
40
41
42#define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
43#define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
44#define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
45
46Addr VG_(get_IP) ( ThreadId tid ) {
47   return INSTR_PTR( VG_(threads)[tid].arch );
48}
49Addr VG_(get_SP) ( ThreadId tid ) {
50   return STACK_PTR( VG_(threads)[tid].arch );
51}
52Addr VG_(get_FP) ( ThreadId tid ) {
53   return FRAME_PTR( VG_(threads)[tid].arch );
54}
55
56void VG_(set_IP) ( ThreadId tid, Addr ip ) {
57   INSTR_PTR( VG_(threads)[tid].arch ) = ip;
58}
59void VG_(set_SP) ( ThreadId tid, Addr sp ) {
60   STACK_PTR( VG_(threads)[tid].arch ) = sp;
61}
62
63void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
64                                ThreadId tid )
65{
66#  if defined(VGA_x86)
67   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
68   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
69   regs->misc.X86.r_ebp
70      = VG_(threads)[tid].arch.vex.guest_EBP;
71#  elif defined(VGA_amd64)
72   regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
73   regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
74   regs->misc.AMD64.r_rbp
75      = VG_(threads)[tid].arch.vex.guest_RBP;
76#  elif defined(VGA_ppc32)
77   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
78   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
79   regs->misc.PPC32.r_lr
80      = VG_(threads)[tid].arch.vex.guest_LR;
81#  elif defined(VGA_ppc64)
82   regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
83   regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
84   regs->misc.PPC64.r_lr
85      = VG_(threads)[tid].arch.vex.guest_LR;
86#  elif defined(VGA_arm)
87   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
88   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
89   regs->misc.ARM.r14
90      = VG_(threads)[tid].arch.vex.guest_R14;
91   regs->misc.ARM.r12
92      = VG_(threads)[tid].arch.vex.guest_R12;
93   regs->misc.ARM.r11
94      = VG_(threads)[tid].arch.vex.guest_R11;
95   regs->misc.ARM.r7
96      = VG_(threads)[tid].arch.vex.guest_R7;
97#  elif defined(VGA_s390x)
98   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
99   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
100   regs->misc.S390X.r_fp
101      = VG_(threads)[tid].arch.vex.guest_r11;
102   regs->misc.S390X.r_lr
103      = VG_(threads)[tid].arch.vex.guest_r14;
104#  else
105#    error "Unknown arch"
106#  endif
107}
108
109
110void VG_(set_syscall_return_shadows) ( ThreadId tid,
111                                       /* shadow vals for the result */
112                                       UWord s1res, UWord s2res,
113                                       /* shadow vals for the error val */
114                                       UWord s1err, UWord s2err )
115{
116#  if defined(VGP_x86_linux)
117   VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
118   VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
119#  elif defined(VGP_amd64_linux)
120   VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
121   VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
122#  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
123   VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
124   VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
125#  elif defined(VGP_arm_linux)
126   VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
127   VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
128#  elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
129   VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
130   VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
131   VG_(threads)[tid].arch.vex_shadow1.guest_GPR4 = s1err;
132   VG_(threads)[tid].arch.vex_shadow2.guest_GPR4 = s2err;
133#  elif defined(VGO_darwin)
134   // GrP fixme darwin syscalls may return more values (2 registers plus error)
135#  elif defined(VGP_s390x_linux)
136   VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
137   VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
138#  else
139#    error "Unknown plat"
140#  endif
141}
142
143void
144VG_(get_shadow_regs_area) ( ThreadId tid,
145                            /*DST*/UChar* dst,
146                            /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
147{
148   void*        src;
149   ThreadState* tst;
150   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
151   vg_assert(VG_(is_valid_tid)(tid));
152   // Bounds check
153   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
154   vg_assert(offset + size <= sizeof(VexGuestArchState));
155   // Copy
156   tst = & VG_(threads)[tid];
157   src = NULL;
158   switch (shadowNo) {
159      case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
160      case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
161      case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
162   }
163   tl_assert(src != NULL);
164   VG_(memcpy)( dst, src, size);
165}
166
167void
168VG_(set_shadow_regs_area) ( ThreadId tid,
169                            /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
170                            /*SRC*/const UChar* src )
171{
172   void*        dst;
173   ThreadState* tst;
174   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
175   vg_assert(VG_(is_valid_tid)(tid));
176   // Bounds check
177   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
178   vg_assert(offset + size <= sizeof(VexGuestArchState));
179   // Copy
180   tst = & VG_(threads)[tid];
181   dst = NULL;
182   switch (shadowNo) {
183      case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
184      case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
185      case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
186   }
187   tl_assert(dst != NULL);
188   VG_(memcpy)( dst, src, size);
189}
190
191
192static void apply_to_GPs_of_tid(VexGuestArchState* vex, void (*f)(Addr))
193{
194#if defined(VGA_x86)
195   (*f)(vex->guest_EAX);
196   (*f)(vex->guest_ECX);
197   (*f)(vex->guest_EDX);
198   (*f)(vex->guest_EBX);
199   (*f)(vex->guest_ESI);
200   (*f)(vex->guest_EDI);
201   (*f)(vex->guest_ESP);
202   (*f)(vex->guest_EBP);
203#elif defined(VGA_amd64)
204   (*f)(vex->guest_RAX);
205   (*f)(vex->guest_RCX);
206   (*f)(vex->guest_RDX);
207   (*f)(vex->guest_RBX);
208   (*f)(vex->guest_RSI);
209   (*f)(vex->guest_RDI);
210   (*f)(vex->guest_RSP);
211   (*f)(vex->guest_RBP);
212   (*f)(vex->guest_R8);
213   (*f)(vex->guest_R9);
214   (*f)(vex->guest_R10);
215   (*f)(vex->guest_R11);
216   (*f)(vex->guest_R12);
217   (*f)(vex->guest_R13);
218   (*f)(vex->guest_R14);
219   (*f)(vex->guest_R15);
220#elif defined(VGA_ppc32) || defined(VGA_ppc64)
221   (*f)(vex->guest_GPR0);
222   (*f)(vex->guest_GPR1);
223   (*f)(vex->guest_GPR2);
224   (*f)(vex->guest_GPR3);
225   (*f)(vex->guest_GPR4);
226   (*f)(vex->guest_GPR5);
227   (*f)(vex->guest_GPR6);
228   (*f)(vex->guest_GPR7);
229   (*f)(vex->guest_GPR8);
230   (*f)(vex->guest_GPR9);
231   (*f)(vex->guest_GPR10);
232   (*f)(vex->guest_GPR11);
233   (*f)(vex->guest_GPR12);
234   (*f)(vex->guest_GPR13);
235   (*f)(vex->guest_GPR14);
236   (*f)(vex->guest_GPR15);
237   (*f)(vex->guest_GPR16);
238   (*f)(vex->guest_GPR17);
239   (*f)(vex->guest_GPR18);
240   (*f)(vex->guest_GPR19);
241   (*f)(vex->guest_GPR20);
242   (*f)(vex->guest_GPR21);
243   (*f)(vex->guest_GPR22);
244   (*f)(vex->guest_GPR23);
245   (*f)(vex->guest_GPR24);
246   (*f)(vex->guest_GPR25);
247   (*f)(vex->guest_GPR26);
248   (*f)(vex->guest_GPR27);
249   (*f)(vex->guest_GPR28);
250   (*f)(vex->guest_GPR29);
251   (*f)(vex->guest_GPR30);
252   (*f)(vex->guest_GPR31);
253   (*f)(vex->guest_CTR);
254   (*f)(vex->guest_LR);
255#elif defined(VGA_arm)
256   (*f)(vex->guest_R0);
257   (*f)(vex->guest_R1);
258   (*f)(vex->guest_R2);
259   (*f)(vex->guest_R3);
260   (*f)(vex->guest_R4);
261   (*f)(vex->guest_R5);
262   (*f)(vex->guest_R6);
263   (*f)(vex->guest_R8);
264   (*f)(vex->guest_R9);
265   (*f)(vex->guest_R10);
266   (*f)(vex->guest_R11);
267   (*f)(vex->guest_R12);
268   (*f)(vex->guest_R13);
269   (*f)(vex->guest_R14);
270#elif defined(VGA_s390x)
271   (*f)(vex->guest_r0);
272   (*f)(vex->guest_r1);
273   (*f)(vex->guest_r2);
274   (*f)(vex->guest_r3);
275   (*f)(vex->guest_r4);
276   (*f)(vex->guest_r5);
277   (*f)(vex->guest_r6);
278   (*f)(vex->guest_r7);
279   (*f)(vex->guest_r8);
280   (*f)(vex->guest_r9);
281   (*f)(vex->guest_r10);
282   (*f)(vex->guest_r11);
283   (*f)(vex->guest_r12);
284   (*f)(vex->guest_r13);
285   (*f)(vex->guest_r14);
286   (*f)(vex->guest_r15);
287#else
288#  error Unknown arch
289#endif
290}
291
292
293void VG_(apply_to_GP_regs)(void (*f)(UWord))
294{
295   ThreadId tid;
296
297   for (tid = 1; tid < VG_N_THREADS; tid++) {
298      if (VG_(is_valid_tid)(tid)) {
299         ThreadState* tst = VG_(get_ThreadState)(tid);
300         apply_to_GPs_of_tid(&(tst->arch.vex), f);
301      }
302   }
303}
304
305void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
306{
307   *tid = (ThreadId)(-1);
308}
309
310Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
311                            /*OUT*/Addr* stack_min,
312                            /*OUT*/Addr* stack_max)
313{
314   ThreadId i;
315   for (i = (*tid)+1; i < VG_N_THREADS; i++) {
316      if (i == VG_INVALID_THREADID)
317         continue;
318      if (VG_(threads)[i].status != VgTs_Empty) {
319         *tid       = i;
320         *stack_min = VG_(get_SP)(i);
321         *stack_max = VG_(threads)[i].client_stack_highest_word;
322         return True;
323      }
324   }
325   return False;
326}
327
328Addr VG_(thread_get_stack_max)(ThreadId tid)
329{
330   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
331   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
332   return VG_(threads)[tid].client_stack_highest_word;
333}
334
335SizeT VG_(thread_get_stack_size)(ThreadId tid)
336{
337   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
338   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
339   return VG_(threads)[tid].client_stack_szB;
340}
341
342Addr VG_(thread_get_altstack_min)(ThreadId tid)
343{
344   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
345   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
346   return (Addr)VG_(threads)[tid].altstack.ss_sp;
347}
348
349SizeT VG_(thread_get_altstack_size)(ThreadId tid)
350{
351   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
352   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
353   return VG_(threads)[tid].altstack.ss_size;
354}
355
356//-------------------------------------------------------------
357/* Details about the capabilities of the underlying (host) CPU.  These
358   details are acquired by (1) enquiring with the CPU at startup, or
359   (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
360   line size).  It's a bit nasty in the sense that there's no obvious
361   way to stop uses of some of this info before it's ready to go.
362
363   Current dependencies are:
364
365   x86:   initially:  call VG_(machine_get_hwcaps)
366
367          then safe to use VG_(machine_get_VexArchInfo)
368                       and VG_(machine_x86_have_mxcsr)
369   -------------
370   amd64: initially:  call VG_(machine_get_hwcaps)
371
372          then safe to use VG_(machine_get_VexArchInfo)
373   -------------
374   ppc32: initially:  call VG_(machine_get_hwcaps)
375                      call VG_(machine_ppc32_set_clszB)
376
377          then safe to use VG_(machine_get_VexArchInfo)
378                       and VG_(machine_ppc32_has_FP)
379                       and VG_(machine_ppc32_has_VMX)
380   -------------
381   ppc64: initially:  call VG_(machine_get_hwcaps)
382                      call VG_(machine_ppc64_set_clszB)
383
384          then safe to use VG_(machine_get_VexArchInfo)
385                       and VG_(machine_ppc64_has_VMX)
386
387   -------------
388   s390x: initially:  call VG_(machine_get_hwcaps)
389
390          then safe to use VG_(machine_get_VexArchInfo)
391
392   VG_(machine_get_hwcaps) may use signals (although it attempts to
393   leave signal state unchanged) and therefore should only be
394   called before m_main sets up the client's signal state.
395*/
396
397/* --------- State --------- */
398static Bool hwcaps_done = False;
399
400/* --- all archs --- */
401static VexArch     va;
402static VexArchInfo vai;
403
404#if defined(VGA_x86)
405UInt VG_(machine_x86_have_mxcsr) = 0;
406#endif
407#if defined(VGA_ppc32)
408UInt VG_(machine_ppc32_has_FP)  = 0;
409UInt VG_(machine_ppc32_has_VMX) = 0;
410#endif
411#if defined(VGA_ppc64)
412ULong VG_(machine_ppc64_has_VMX) = 0;
413#endif
414#if defined(VGA_arm)
415Int VG_(machine_arm_archlevel) = 4;
416#endif
417
418/* fixs390: anything for s390x here ? */
419
420/* For hwcaps detection on ppc32/64 and arm we'll need to do SIGILL
421   testing, so we need a jmp_buf. */
422#if defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm) || defined(VGA_s390x)
423#include <setjmp.h> // For jmp_buf
424static jmp_buf env_unsup_insn;
425static void handler_unsup_insn ( Int x ) { __builtin_longjmp(env_unsup_insn,1); }
426#endif
427
428
429/* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
430 * handlers are installed.  Determines the the sizes affected by dcbz
431 * and dcbzl instructions and updates the given VexArchInfo structure
432 * accordingly.
433 *
434 * Not very defensive: assumes that as long as the dcbz/dcbzl
435 * instructions don't raise a SIGILL, that they will zero an aligned,
436 * contiguous block of memory of a sensible size. */
437#if defined(VGA_ppc32) || defined(VGA_ppc64)
438static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
439{
440   Int dcbz_szB = 0;
441   Int dcbzl_szB;
442#  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
443   char test_block[4*MAX_DCBZL_SZB];
444   char *aligned = test_block;
445   Int i;
446
447   /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
448   aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
449   vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
450
451   /* dcbz often clears 32B, although sometimes whatever the native cache
452    * block size is */
453   VG_(memset)(test_block, 0xff, sizeof(test_block));
454   __asm__ __volatile__("dcbz 0,%0"
455                        : /*out*/
456                        : "r" (aligned) /*in*/
457                        : "memory" /*clobber*/);
458   for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
459      if (!test_block[i])
460         ++dcbz_szB;
461   }
462   vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
463
464   /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
465   if (__builtin_setjmp(env_unsup_insn)) {
466      dcbzl_szB = 0; /* indicates unsupported */
467   }
468   else {
469      VG_(memset)(test_block, 0xff, sizeof(test_block));
470      /* some older assemblers won't understand the dcbzl instruction
471       * variant, so we directly emit the instruction ourselves */
472      __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
473                           : /*out*/
474                           : "r" (aligned) /*in*/
475                           : "memory", "r9" /*clobber*/);
476      for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
477         if (!test_block[i])
478            ++dcbzl_szB;
479      }
480      vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
481   }
482
483   arch_info->ppc_dcbz_szB  = dcbz_szB;
484   arch_info->ppc_dcbzl_szB = dcbzl_szB;
485
486   VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
487                 dcbz_szB, dcbzl_szB);
488#  undef MAX_DCBZL_SZB
489}
490#endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
491
492
493
494/* Determine what insn set and insn set variant the host has, and
495   record it.  To be called once at system startup.  Returns False if
496   this a CPU incapable of running Valgrind. */
497
498Bool VG_(machine_get_hwcaps)( void )
499{
500   vg_assert(hwcaps_done == False);
501   hwcaps_done = True;
502
503   // Whack default settings into vai, so that we only need to fill in
504   // any interesting bits.
505   LibVEX_default_VexArchInfo(&vai);
506
507#if defined(VGA_x86)
508   { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
509     UInt eax, ebx, ecx, edx, max_extended;
510     UChar vstr[13];
511     vstr[0] = 0;
512
513     if (!VG_(has_cpuid)())
514        /* we can't do cpuid at all.  Give up. */
515        return False;
516
517     VG_(cpuid)(0, &eax, &ebx, &ecx, &edx);
518     if (eax < 1)
519        /* we can't ask for cpuid(x) for x > 0.  Give up. */
520        return False;
521
522     /* Get processor ID string, and max basic/extended index
523        values. */
524     VG_(memcpy)(&vstr[0], &ebx, 4);
525     VG_(memcpy)(&vstr[4], &edx, 4);
526     VG_(memcpy)(&vstr[8], &ecx, 4);
527     vstr[12] = 0;
528
529     VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx);
530     max_extended = eax;
531
532     /* get capabilities bits into edx */
533     VG_(cpuid)(1, &eax, &ebx, &ecx, &edx);
534
535     have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
536     have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
537
538     /* cmpxchg8b is a minimum requirement now; if we don't have it we
539        must simply give up.  But all CPUs since Pentium-I have it, so
540        that doesn't seem like much of a restriction. */
541     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
542     if (!have_cx8)
543        return False;
544
545     /* Figure out if this is an AMD that can do LZCNT. */
546     have_lzcnt = False;
547     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
548         && max_extended >= 0x80000001) {
549        VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx);
550        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
551     }
552
553     if (have_sse2 && have_sse1) {
554        va          = VexArchX86;
555        vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
556        vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
557        if (have_lzcnt)
558           vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
559        VG_(machine_x86_have_mxcsr) = 1;
560        return True;
561     }
562
563     if (have_sse1) {
564        va          = VexArchX86;
565        vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
566        VG_(machine_x86_have_mxcsr) = 1;
567        return True;
568     }
569
570     va         = VexArchX86;
571     vai.hwcaps = 0; /*baseline - no sse at all*/
572     VG_(machine_x86_have_mxcsr) = 0;
573     return True;
574   }
575
576#elif defined(VGA_amd64)
577   { Bool have_sse3, have_cx8, have_cx16;
578     Bool have_lzcnt;
579     UInt eax, ebx, ecx, edx, max_extended;
580     UChar vstr[13];
581     vstr[0] = 0;
582
583     if (!VG_(has_cpuid)())
584        /* we can't do cpuid at all.  Give up. */
585        return False;
586
587     VG_(cpuid)(0, &eax, &ebx, &ecx, &edx);
588     if (eax < 1)
589        /* we can't ask for cpuid(x) for x > 0.  Give up. */
590        return False;
591
592     /* Get processor ID string, and max basic/extended index
593        values. */
594     VG_(memcpy)(&vstr[0], &ebx, 4);
595     VG_(memcpy)(&vstr[4], &edx, 4);
596     VG_(memcpy)(&vstr[8], &ecx, 4);
597     vstr[12] = 0;
598
599     VG_(cpuid)(0x80000000, &eax, &ebx, &ecx, &edx);
600     max_extended = eax;
601
602     /* get capabilities bits into edx */
603     VG_(cpuid)(1, &eax, &ebx, &ecx, &edx);
604
605     // we assume that SSE1 and SSE2 are available by default
606     have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
607     // ssse3  is ecx:9
608     // sse41  is ecx:19
609     // sse42  is ecx:20
610
611     /* cmpxchg8b is a minimum requirement now; if we don't have it we
612        must simply give up.  But all CPUs since Pentium-I have it, so
613        that doesn't seem like much of a restriction. */
614     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
615     if (!have_cx8)
616        return False;
617
618     /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
619     have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
620
621     /* Figure out if this is an AMD that can do LZCNT. */
622     have_lzcnt = False;
623     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
624         && max_extended >= 0x80000001) {
625        VG_(cpuid)(0x80000001, &eax, &ebx, &ecx, &edx);
626        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
627     }
628
629     va         = VexArchAMD64;
630     vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
631                  | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
632                  | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0);
633     return True;
634   }
635
636#elif defined(VGA_ppc32)
637   {
638     /* Find out which subset of the ppc32 instruction set is supported by
639        verifying whether various ppc32 instructions generate a SIGILL
640        or a SIGFPE. An alternative approach is to check the AT_HWCAP and
641        AT_PLATFORM entries in the ELF auxiliary table -- see also
642        the_iifii.client_auxv in m_main.c.
643      */
644     vki_sigset_t          saved_set, tmp_set;
645     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
646     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
647
648     volatile Bool have_F, have_V, have_FX, have_GX;
649     Int r;
650
651     /* This is a kludge.  Really we ought to back-convert saved_act
652        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
653        since that's a no-op on all ppc32 platforms so far supported,
654        it's not worth the typing effort.  At least include most basic
655        sanity check: */
656     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
657
658     VG_(sigemptyset)(&tmp_set);
659     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
660     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
661
662     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
663     vg_assert(r == 0);
664
665     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
666     vg_assert(r == 0);
667     tmp_sigill_act = saved_sigill_act;
668
669     r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
670     vg_assert(r == 0);
671     tmp_sigfpe_act = saved_sigfpe_act;
672
673     /* NODEFER: signal handler does not return (from the kernel's point of
674        view), hence if it is to successfully catch a signal more than once,
675        we need the NODEFER flag. */
676     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
677     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
678     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
679     tmp_sigill_act.ksa_handler = handler_unsup_insn;
680     r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
681     vg_assert(r == 0);
682
683     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
684     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
685     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
686     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
687     r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
688     vg_assert(r == 0);
689
690     /* standard FP insns */
691     have_F = True;
692     if (__builtin_setjmp(env_unsup_insn)) {
693        have_F = False;
694     } else {
695        __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
696     }
697
698     /* Altivec insns */
699     have_V = True;
700     if (__builtin_setjmp(env_unsup_insn)) {
701        have_V = False;
702     } else {
703        /* Unfortunately some older assemblers don't speak Altivec (or
704           choose not to), so to be safe we directly emit the 32-bit
705           word corresponding to "vor 0,0,0".  This fixes a build
706           problem that happens on Debian 3.1 (ppc32), and probably
707           various other places. */
708        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
709     }
710
711     /* General-Purpose optional (fsqrt, fsqrts) */
712     have_FX = True;
713     if (__builtin_setjmp(env_unsup_insn)) {
714        have_FX = False;
715     } else {
716        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
717     }
718
719     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
720     have_GX = True;
721     if (__builtin_setjmp(env_unsup_insn)) {
722        have_GX = False;
723     } else {
724        __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
725     }
726
727     /* determine dcbz/dcbzl sizes while we still have the signal
728      * handlers registered */
729     find_ppc_dcbz_sz(&vai);
730
731     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
732     vg_assert(r == 0);
733     r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
734     vg_assert(r == 0);
735     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
736     vg_assert(r == 0);
737     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d\n",
738                    (Int)have_F, (Int)have_V, (Int)have_FX, (Int)have_GX);
739     /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
740     if (have_V && !have_F)
741        have_V = False;
742     if (have_FX && !have_F)
743        have_FX = False;
744     if (have_GX && !have_F)
745        have_GX = False;
746
747     VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
748     VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
749
750     va = VexArchPPC32;
751
752     vai.hwcaps = 0;
753     if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
754     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
755     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
756     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
757
758     /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
759        called before we're ready to go. */
760     return True;
761   }
762
763#elif defined(VGA_ppc64)
764   {
765     /* Same instruction set detection algorithm as for ppc32. */
766     vki_sigset_t          saved_set, tmp_set;
767     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
768     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
769
770     volatile Bool have_F, have_V, have_FX, have_GX;
771     Int r;
772
773     /* This is a kludge.  Really we ought to back-convert saved_act
774        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
775        since that's a no-op on all ppc64 platforms so far supported,
776        it's not worth the typing effort.  At least include most basic
777        sanity check: */
778     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
779
780     VG_(sigemptyset)(&tmp_set);
781     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
782     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
783
784     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
785     vg_assert(r == 0);
786
787     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
788     vg_assert(r == 0);
789     tmp_sigill_act = saved_sigill_act;
790
791     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
792     tmp_sigfpe_act = saved_sigfpe_act;
793
794     /* NODEFER: signal handler does not return (from the kernel's point of
795        view), hence if it is to successfully catch a signal more than once,
796        we need the NODEFER flag. */
797     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
798     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
799     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
800     tmp_sigill_act.ksa_handler = handler_unsup_insn;
801     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
802
803     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
804     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
805     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
806     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
807     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
808
809     /* standard FP insns */
810     have_F = True;
811     if (__builtin_setjmp(env_unsup_insn)) {
812        have_F = False;
813     } else {
814        __asm__ __volatile__("fmr 0,0");
815     }
816
817     /* Altivec insns */
818     have_V = True;
819     if (__builtin_setjmp(env_unsup_insn)) {
820        have_V = False;
821     } else {
822        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
823     }
824
825     /* General-Purpose optional (fsqrt, fsqrts) */
826     have_FX = True;
827     if (__builtin_setjmp(env_unsup_insn)) {
828        have_FX = False;
829     } else {
830        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
831     }
832
833     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
834     have_GX = True;
835     if (__builtin_setjmp(env_unsup_insn)) {
836        have_GX = False;
837     } else {
838        __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
839     }
840
841     /* determine dcbz/dcbzl sizes while we still have the signal
842      * handlers registered */
843     find_ppc_dcbz_sz(&vai);
844
845     VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
846     VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
847     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
848     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d\n",
849                    (Int)have_F, (Int)have_V, (Int)have_FX, (Int)have_GX);
850     /* on ppc64, if we don't even have FP, just give up. */
851     if (!have_F)
852        return False;
853
854     VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
855
856     va = VexArchPPC64;
857
858     vai.hwcaps = 0;
859     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
860     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
861     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
862
863     /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
864        called before we're ready to go. */
865     return True;
866   }
867
868#elif defined(VGA_s390x)
869   {
870     /* Instruction set detection code borrowed from ppc above. */
871     vki_sigset_t          saved_set, tmp_set;
872     vki_sigaction_fromK_t saved_sigill_act;
873     vki_sigaction_toK_t     tmp_sigill_act;
874
875     volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP;
876     Int r;
877
878     /* Unblock SIGILL and stash away the old action for that signal */
879     VG_(sigemptyset)(&tmp_set);
880     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
881
882     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
883     vg_assert(r == 0);
884
885     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
886     vg_assert(r == 0);
887     tmp_sigill_act = saved_sigill_act;
888
889     /* NODEFER: signal handler does not return (from the kernel's point of
890        view), hence if it is to successfully catch a signal more than once,
891        we need the NODEFER flag. */
892     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
893     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
894     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
895     tmp_sigill_act.ksa_handler = handler_unsup_insn;
896     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
897
898     /* Determine hwcaps. Note, we cannot use the stfle insn because it
899        is not supported on z900. */
900
901     have_LDISP = True;
902     if (__builtin_setjmp(env_unsup_insn)) {
903        have_LDISP = False;
904     } else {
905       /* BASR loads the address of the next insn into r1. Needed to avoid
906          a segfault in XY. */
907        __asm__ __volatile__("basr %%r1,%%r0\n\t"
908                             ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
909                             ".short 0x0057" : : : "r0", "r1", "cc", "memory");
910     }
911
912     have_EIMM = True;
913     if (__builtin_setjmp(env_unsup_insn)) {
914        have_EIMM = False;
915     } else {
916        __asm__ __volatile__(".long  0xc0090000\n\t"  /* iilf r0,0 */
917                             ".short 0x0000" : : : "r0", "memory");
918     }
919
920     have_GIE = True;
921     if (__builtin_setjmp(env_unsup_insn)) {
922        have_GIE = False;
923     } else {
924        __asm__ __volatile__(".long  0xc2010000\n\t"  /* msfi r0,0 */
925                             ".short 0x0000" : : : "r0", "memory");
926     }
927
928     have_DFP = True;
929     if (__builtin_setjmp(env_unsup_insn)) {
930        have_DFP = False;
931     } else {
932        __asm__ __volatile__(".long 0xb3d20000"
933                               : : : "r0", "cc", "memory");  /* adtr r0,r0,r0 */
934     }
935
936     /* Restore signals */
937     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
938     vg_assert(r == 0);
939     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
940     vg_assert(r == 0);
941     VG_(debugLog)(1, "machine", "LDISP %d EIMM %d GIE %d DFP %d\n",
942                   have_LDISP, have_EIMM, have_GIE, have_DFP);
943
944     /* Check for long displacement facility which is required */
945     if (! have_LDISP) return False;
946
947     va = VexArchS390X;
948
949     vai.hwcaps = 0;
950     if (have_LDISP) vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
951     if (have_EIMM)  vai.hwcaps |= VEX_HWCAPS_S390X_EIMM;
952     if (have_GIE)   vai.hwcaps |= VEX_HWCAPS_S390X_GIE;
953     if (have_DFP)   vai.hwcaps |= VEX_HWCAPS_S390X_DFP;
954
955     return True;
956   }
957
958#elif defined(VGA_arm)
959   {
960     /* Same instruction set detection algorithm as for ppc32. */
961     vki_sigset_t          saved_set, tmp_set;
962     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
963     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
964
965     volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
966     volatile Int archlevel;
967     Int r;
968
969     /* This is a kludge.  Really we ought to back-convert saved_act
970        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
971        since that's a no-op on all ppc64 platforms so far supported,
972        it's not worth the typing effort.  At least include most basic
973        sanity check: */
974     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
975
976     VG_(sigemptyset)(&tmp_set);
977     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
978     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
979
980     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
981     vg_assert(r == 0);
982
983     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
984     vg_assert(r == 0);
985     tmp_sigill_act = saved_sigill_act;
986
987     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
988     tmp_sigfpe_act = saved_sigfpe_act;
989
990     /* NODEFER: signal handler does not return (from the kernel's point of
991        view), hence if it is to successfully catch a signal more than once,
992        we need the NODEFER flag. */
993     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
994     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
995     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
996     tmp_sigill_act.ksa_handler = handler_unsup_insn;
997     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
998
999     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1000     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1001     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1002     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1003     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1004
1005     /* VFP insns */
1006     have_VFP = True;
1007     if (__builtin_setjmp(env_unsup_insn)) {
1008        have_VFP = False;
1009     } else {
1010        __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1011     }
1012     /* There are several generation of VFP extension but they differs very
1013        little so for now we will not distinguish them. */
1014     have_VFP2 = have_VFP;
1015     have_VFP3 = have_VFP;
1016
1017     /* NEON insns */
1018     have_NEON = True;
1019     if (__builtin_setjmp(env_unsup_insn)) {
1020        have_NEON = False;
1021     } else {
1022        __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1023     }
1024
1025     /* ARM architecture level */
1026     archlevel = 5; /* v5 will be base level */
1027     if (archlevel < 7) {
1028        archlevel = 7;
1029        if (__builtin_setjmp(env_unsup_insn)) {
1030           archlevel = 5;
1031        } else {
1032           __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1033        }
1034     }
1035     if (archlevel < 6) {
1036        archlevel = 6;
1037        if (__builtin_setjmp(env_unsup_insn)) {
1038           archlevel = 5;
1039        } else {
1040           __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1041        }
1042     }
1043
1044     VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1045     VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1046     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1047     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1048     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1049
1050     VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1051           archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1052           (Int)have_NEON);
1053
1054     VG_(machine_arm_archlevel) = archlevel;
1055
1056     va = VexArchARM;
1057
1058     vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1059     if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1060     if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1061     if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1062     if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1063
1064     return True;
1065   }
1066
1067#else
1068#  error "Unknown arch"
1069#endif
1070}
1071
1072/* Notify host cpu cache line size. */
1073#if defined(VGA_ppc32)
1074void VG_(machine_ppc32_set_clszB)( Int szB )
1075{
1076   vg_assert(hwcaps_done);
1077
1078   /* Either the value must not have been set yet (zero) or we can
1079      tolerate it being set to the same value multiple times, as the
1080      stack scanning logic in m_main is a bit stupid. */
1081   vg_assert(vai.ppc_cache_line_szB == 0
1082             || vai.ppc_cache_line_szB == szB);
1083
1084   vg_assert(szB == 32 || szB == 64 || szB == 128);
1085   vai.ppc_cache_line_szB = szB;
1086}
1087#endif
1088
1089
1090/* Notify host cpu cache line size. */
1091#if defined(VGA_ppc64)
1092void VG_(machine_ppc64_set_clszB)( Int szB )
1093{
1094   vg_assert(hwcaps_done);
1095
1096   /* Either the value must not have been set yet (zero) or we can
1097      tolerate it being set to the same value multiple times, as the
1098      stack scanning logic in m_main is a bit stupid. */
1099   vg_assert(vai.ppc_cache_line_szB == 0
1100             || vai.ppc_cache_line_szB == szB);
1101
1102   vg_assert(szB == 32 || szB == 64 || szB == 128);
1103   vai.ppc_cache_line_szB = szB;
1104}
1105#endif
1106
1107
1108/* Notify host's ability to handle NEON instructions. */
1109#if defined(VGA_arm)
1110void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1111{
1112   vg_assert(hwcaps_done);
1113   /* There's nothing else we can sanity check. */
1114
1115   if (has_neon) {
1116      vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1117   } else {
1118      vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1119   }
1120}
1121#endif
1122
1123
1124/* Fetch host cpu info, once established. */
1125void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1126                                   /*OUT*/VexArchInfo* pVai )
1127{
1128   vg_assert(hwcaps_done);
1129   if (pVa)  *pVa  = va;
1130   if (pVai) *pVai = vai;
1131}
1132
1133
1134// Given a pointer to a function as obtained by "& functionname" in C,
1135// produce a pointer to the actual entry point for the function.
1136void* VG_(fnptr_to_fnentry)( void* f )
1137{
1138#if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1139    || defined(VGP_arm_linux)                           \
1140    || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
1141    || defined(VGP_s390x_linux)
1142   return f;
1143#elif defined(VGP_ppc64_linux) || defined(VGP_ppc32_aix5) \
1144                               || defined(VGP_ppc64_aix5)
1145   /* All other ppc variants use the AIX scheme, in which f is a
1146      pointer to a 3-word function descriptor, of which the first word
1147      is the entry address. */
1148   UWord* descr = (UWord*)f;
1149   return (void*)(descr[0]);
1150#else
1151#  error "Unknown platform"
1152#endif
1153}
1154
1155/*--------------------------------------------------------------------*/
1156/*--- end                                                          ---*/
1157/*--------------------------------------------------------------------*/
1158