1/*--------------------------------------------------------------------*/
2/*--- Machine-related stuff.                           m_machine.c ---*/
3/*--------------------------------------------------------------------*/
4
5/*
6   This file is part of Valgrind, a dynamic binary instrumentation
7   framework.
8
9   Copyright (C) 2000-2011 Julian Seward
10      jseward@acm.org
11
12   This program is free software; you can redistribute it and/or
13   modify it under the terms of the GNU General Public License as
14   published by the Free Software Foundation; either version 2 of the
15   License, or (at your option) any later version.
16
17   This program is distributed in the hope that it will be useful, but
18   WITHOUT ANY WARRANTY; without even the implied warranty of
19   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20   General Public License for more details.
21
22   You should have received a copy of the GNU General Public License
23   along with this program; if not, write to the Free Software
24   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25   02111-1307, USA.
26
27   The GNU General Public License is contained in the file COPYING.
28*/
29
30#include "pub_core_basics.h"
31#include "pub_core_vki.h"
32#include "pub_core_libcsetjmp.h"   // setjmp facilities
33#include "pub_core_threadstate.h"
34#include "pub_core_libcassert.h"
35#include "pub_core_libcbase.h"
36#include "pub_core_libcfile.h"
37#include "pub_core_mallocfree.h"
38#include "pub_core_machine.h"
39#include "pub_core_cpuid.h"
40#include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
41#include "pub_core_debuglog.h"
42
43
44#define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
45#define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
46#define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
47
48Addr VG_(get_IP) ( ThreadId tid ) {
49   return INSTR_PTR( VG_(threads)[tid].arch );
50}
51Addr VG_(get_SP) ( ThreadId tid ) {
52   return STACK_PTR( VG_(threads)[tid].arch );
53}
54Addr VG_(get_FP) ( ThreadId tid ) {
55   return FRAME_PTR( VG_(threads)[tid].arch );
56}
57
58void VG_(set_IP) ( ThreadId tid, Addr ip ) {
59   INSTR_PTR( VG_(threads)[tid].arch ) = ip;
60}
61void VG_(set_SP) ( ThreadId tid, Addr sp ) {
62   STACK_PTR( VG_(threads)[tid].arch ) = sp;
63}
64
65void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
66                                ThreadId tid )
67{
68#  if defined(VGA_x86)
69   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
70   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
71   regs->misc.X86.r_ebp
72      = VG_(threads)[tid].arch.vex.guest_EBP;
73#  elif defined(VGA_amd64)
74   regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
75   regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
76   regs->misc.AMD64.r_rbp
77      = VG_(threads)[tid].arch.vex.guest_RBP;
78#  elif defined(VGA_ppc32)
79   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
80   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
81   regs->misc.PPC32.r_lr
82      = VG_(threads)[tid].arch.vex.guest_LR;
83#  elif defined(VGA_ppc64)
84   regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
85   regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
86   regs->misc.PPC64.r_lr
87      = VG_(threads)[tid].arch.vex.guest_LR;
88#  elif defined(VGA_arm)
89   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
90   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
91   regs->misc.ARM.r14
92      = VG_(threads)[tid].arch.vex.guest_R14;
93   regs->misc.ARM.r12
94      = VG_(threads)[tid].arch.vex.guest_R12;
95   regs->misc.ARM.r11
96      = VG_(threads)[tid].arch.vex.guest_R11;
97   regs->misc.ARM.r7
98      = VG_(threads)[tid].arch.vex.guest_R7;
99#  elif defined(VGA_s390x)
100   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
101   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
102   regs->misc.S390X.r_fp
103      = VG_(threads)[tid].arch.vex.guest_r11;
104   regs->misc.S390X.r_lr
105      = VG_(threads)[tid].arch.vex.guest_r14;
106#  else
107#    error "Unknown arch"
108#  endif
109}
110
111
112void VG_(set_syscall_return_shadows) ( ThreadId tid,
113                                       /* shadow vals for the result */
114                                       UWord s1res, UWord s2res,
115                                       /* shadow vals for the error val */
116                                       UWord s1err, UWord s2err )
117{
118#  if defined(VGP_x86_linux)
119   VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
120   VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
121#  elif defined(VGP_amd64_linux)
122   VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
123   VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
124#  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
125   VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
126   VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
127#  elif defined(VGP_arm_linux)
128   VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
129   VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
130#  elif defined(VGO_darwin)
131   // GrP fixme darwin syscalls may return more values (2 registers plus error)
132#  elif defined(VGP_s390x_linux)
133   VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
134   VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
135#  else
136#    error "Unknown plat"
137#  endif
138}
139
140void
141VG_(get_shadow_regs_area) ( ThreadId tid,
142                            /*DST*/UChar* dst,
143                            /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
144{
145   void*        src;
146   ThreadState* tst;
147   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
148   vg_assert(VG_(is_valid_tid)(tid));
149   // Bounds check
150   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
151   vg_assert(offset + size <= sizeof(VexGuestArchState));
152   // Copy
153   tst = & VG_(threads)[tid];
154   src = NULL;
155   switch (shadowNo) {
156      case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
157      case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
158      case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
159   }
160   tl_assert(src != NULL);
161   VG_(memcpy)( dst, src, size);
162}
163
164void
165VG_(set_shadow_regs_area) ( ThreadId tid,
166                            /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
167                            /*SRC*/const UChar* src )
168{
169   void*        dst;
170   ThreadState* tst;
171   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
172   vg_assert(VG_(is_valid_tid)(tid));
173   // Bounds check
174   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
175   vg_assert(offset + size <= sizeof(VexGuestArchState));
176   // Copy
177   tst = & VG_(threads)[tid];
178   dst = NULL;
179   switch (shadowNo) {
180      case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
181      case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
182      case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
183   }
184   tl_assert(dst != NULL);
185   VG_(memcpy)( dst, src, size);
186}
187
188
189static void apply_to_GPs_of_tid(VexGuestArchState* vex, void (*f)(Addr))
190{
191#if defined(VGA_x86)
192   (*f)(vex->guest_EAX);
193   (*f)(vex->guest_ECX);
194   (*f)(vex->guest_EDX);
195   (*f)(vex->guest_EBX);
196   (*f)(vex->guest_ESI);
197   (*f)(vex->guest_EDI);
198   (*f)(vex->guest_ESP);
199   (*f)(vex->guest_EBP);
200#elif defined(VGA_amd64)
201   (*f)(vex->guest_RAX);
202   (*f)(vex->guest_RCX);
203   (*f)(vex->guest_RDX);
204   (*f)(vex->guest_RBX);
205   (*f)(vex->guest_RSI);
206   (*f)(vex->guest_RDI);
207   (*f)(vex->guest_RSP);
208   (*f)(vex->guest_RBP);
209   (*f)(vex->guest_R8);
210   (*f)(vex->guest_R9);
211   (*f)(vex->guest_R10);
212   (*f)(vex->guest_R11);
213   (*f)(vex->guest_R12);
214   (*f)(vex->guest_R13);
215   (*f)(vex->guest_R14);
216   (*f)(vex->guest_R15);
217#elif defined(VGA_ppc32) || defined(VGA_ppc64)
218   (*f)(vex->guest_GPR0);
219   (*f)(vex->guest_GPR1);
220   (*f)(vex->guest_GPR2);
221   (*f)(vex->guest_GPR3);
222   (*f)(vex->guest_GPR4);
223   (*f)(vex->guest_GPR5);
224   (*f)(vex->guest_GPR6);
225   (*f)(vex->guest_GPR7);
226   (*f)(vex->guest_GPR8);
227   (*f)(vex->guest_GPR9);
228   (*f)(vex->guest_GPR10);
229   (*f)(vex->guest_GPR11);
230   (*f)(vex->guest_GPR12);
231   (*f)(vex->guest_GPR13);
232   (*f)(vex->guest_GPR14);
233   (*f)(vex->guest_GPR15);
234   (*f)(vex->guest_GPR16);
235   (*f)(vex->guest_GPR17);
236   (*f)(vex->guest_GPR18);
237   (*f)(vex->guest_GPR19);
238   (*f)(vex->guest_GPR20);
239   (*f)(vex->guest_GPR21);
240   (*f)(vex->guest_GPR22);
241   (*f)(vex->guest_GPR23);
242   (*f)(vex->guest_GPR24);
243   (*f)(vex->guest_GPR25);
244   (*f)(vex->guest_GPR26);
245   (*f)(vex->guest_GPR27);
246   (*f)(vex->guest_GPR28);
247   (*f)(vex->guest_GPR29);
248   (*f)(vex->guest_GPR30);
249   (*f)(vex->guest_GPR31);
250   (*f)(vex->guest_CTR);
251   (*f)(vex->guest_LR);
252#elif defined(VGA_arm)
253   (*f)(vex->guest_R0);
254   (*f)(vex->guest_R1);
255   (*f)(vex->guest_R2);
256   (*f)(vex->guest_R3);
257   (*f)(vex->guest_R4);
258   (*f)(vex->guest_R5);
259   (*f)(vex->guest_R6);
260   (*f)(vex->guest_R8);
261   (*f)(vex->guest_R9);
262   (*f)(vex->guest_R10);
263   (*f)(vex->guest_R11);
264   (*f)(vex->guest_R12);
265   (*f)(vex->guest_R13);
266   (*f)(vex->guest_R14);
267#elif defined(VGA_s390x)
268   (*f)(vex->guest_r0);
269   (*f)(vex->guest_r1);
270   (*f)(vex->guest_r2);
271   (*f)(vex->guest_r3);
272   (*f)(vex->guest_r4);
273   (*f)(vex->guest_r5);
274   (*f)(vex->guest_r6);
275   (*f)(vex->guest_r7);
276   (*f)(vex->guest_r8);
277   (*f)(vex->guest_r9);
278   (*f)(vex->guest_r10);
279   (*f)(vex->guest_r11);
280   (*f)(vex->guest_r12);
281   (*f)(vex->guest_r13);
282   (*f)(vex->guest_r14);
283   (*f)(vex->guest_r15);
284#else
285#  error Unknown arch
286#endif
287}
288
289
290void VG_(apply_to_GP_regs)(void (*f)(UWord))
291{
292   ThreadId tid;
293
294   for (tid = 1; tid < VG_N_THREADS; tid++) {
295      if (VG_(is_valid_tid)(tid)) {
296         ThreadState* tst = VG_(get_ThreadState)(tid);
297         apply_to_GPs_of_tid(&(tst->arch.vex), f);
298      }
299   }
300}
301
302void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
303{
304   *tid = (ThreadId)(-1);
305}
306
307Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
308                            /*OUT*/Addr* stack_min,
309                            /*OUT*/Addr* stack_max)
310{
311   ThreadId i;
312   for (i = (*tid)+1; i < VG_N_THREADS; i++) {
313      if (i == VG_INVALID_THREADID)
314         continue;
315      if (VG_(threads)[i].status != VgTs_Empty) {
316         *tid       = i;
317         *stack_min = VG_(get_SP)(i);
318         *stack_max = VG_(threads)[i].client_stack_highest_word;
319         return True;
320      }
321   }
322   return False;
323}
324
325Addr VG_(thread_get_stack_max)(ThreadId tid)
326{
327   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
328   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
329   return VG_(threads)[tid].client_stack_highest_word;
330}
331
332SizeT VG_(thread_get_stack_size)(ThreadId tid)
333{
334   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
335   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
336   return VG_(threads)[tid].client_stack_szB;
337}
338
339Addr VG_(thread_get_altstack_min)(ThreadId tid)
340{
341   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
342   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
343   return (Addr)VG_(threads)[tid].altstack.ss_sp;
344}
345
346SizeT VG_(thread_get_altstack_size)(ThreadId tid)
347{
348   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
349   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
350   return VG_(threads)[tid].altstack.ss_size;
351}
352
353//-------------------------------------------------------------
354/* Details about the capabilities of the underlying (host) CPU.  These
355   details are acquired by (1) enquiring with the CPU at startup, or
356   (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
357   line size).  It's a bit nasty in the sense that there's no obvious
358   way to stop uses of some of this info before it's ready to go.
359
360   Current dependencies are:
361
362   x86:   initially:  call VG_(machine_get_hwcaps)
363
364          then safe to use VG_(machine_get_VexArchInfo)
365                       and VG_(machine_x86_have_mxcsr)
366   -------------
367   amd64: initially:  call VG_(machine_get_hwcaps)
368
369          then safe to use VG_(machine_get_VexArchInfo)
370   -------------
371   ppc32: initially:  call VG_(machine_get_hwcaps)
372                      call VG_(machine_ppc32_set_clszB)
373
374          then safe to use VG_(machine_get_VexArchInfo)
375                       and VG_(machine_ppc32_has_FP)
376                       and VG_(machine_ppc32_has_VMX)
377   -------------
378   ppc64: initially:  call VG_(machine_get_hwcaps)
379                      call VG_(machine_ppc64_set_clszB)
380
381          then safe to use VG_(machine_get_VexArchInfo)
382                       and VG_(machine_ppc64_has_VMX)
383
384   -------------
385   s390x: initially:  call VG_(machine_get_hwcaps)
386
387          then safe to use VG_(machine_get_VexArchInfo)
388
389   VG_(machine_get_hwcaps) may use signals (although it attempts to
390   leave signal state unchanged) and therefore should only be
391   called before m_main sets up the client's signal state.
392*/
393
394/* --------- State --------- */
395static Bool hwcaps_done = False;
396
397/* --- all archs --- */
398static VexArch     va;
399static VexArchInfo vai;
400
401#if defined(VGA_x86)
402UInt VG_(machine_x86_have_mxcsr) = 0;
403#endif
404#if defined(VGA_ppc32)
405UInt VG_(machine_ppc32_has_FP)  = 0;
406UInt VG_(machine_ppc32_has_VMX) = 0;
407#endif
408#if defined(VGA_ppc64)
409ULong VG_(machine_ppc64_has_VMX) = 0;
410#endif
411#if defined(VGA_arm)
412Int VG_(machine_arm_archlevel) = 4;
413#endif
414
415/* fixs390: anything for s390x here ? */
416
417/* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
418   testing, so we need a VG_MINIMAL_JMP_BUF. */
419#if defined(VGA_ppc32) || defined(VGA_ppc64) \
420    || defined(VGA_arm) || defined(VGA_s390x)
421#include "pub_tool_libcsetjmp.h"
422static VG_MINIMAL_JMP_BUF(env_unsup_insn);
423static void handler_unsup_insn ( Int x ) {
424   VG_MINIMAL_LONGJMP(env_unsup_insn);
425}
426#endif
427
428
429/* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
430 * handlers are installed.  Determines the the sizes affected by dcbz
431 * and dcbzl instructions and updates the given VexArchInfo structure
432 * accordingly.
433 *
434 * Not very defensive: assumes that as long as the dcbz/dcbzl
435 * instructions don't raise a SIGILL, that they will zero an aligned,
436 * contiguous block of memory of a sensible size. */
437#if defined(VGA_ppc32) || defined(VGA_ppc64)
438static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
439{
440   Int dcbz_szB = 0;
441   Int dcbzl_szB;
442#  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
443   char test_block[4*MAX_DCBZL_SZB];
444   char *aligned = test_block;
445   Int i;
446
447   /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
448   aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
449   vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
450
451   /* dcbz often clears 32B, although sometimes whatever the native cache
452    * block size is */
453   VG_(memset)(test_block, 0xff, sizeof(test_block));
454   __asm__ __volatile__("dcbz 0,%0"
455                        : /*out*/
456                        : "r" (aligned) /*in*/
457                        : "memory" /*clobber*/);
458   for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
459      if (!test_block[i])
460         ++dcbz_szB;
461   }
462   vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
463
464   /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
465   if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
466      dcbzl_szB = 0; /* indicates unsupported */
467   }
468   else {
469      VG_(memset)(test_block, 0xff, sizeof(test_block));
470      /* some older assemblers won't understand the dcbzl instruction
471       * variant, so we directly emit the instruction ourselves */
472      __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
473                           : /*out*/
474                           : "r" (aligned) /*in*/
475                           : "memory", "r9" /*clobber*/);
476      for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
477         if (!test_block[i])
478            ++dcbzl_szB;
479      }
480      vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
481   }
482
483   arch_info->ppc_dcbz_szB  = dcbz_szB;
484   arch_info->ppc_dcbzl_szB = dcbzl_szB;
485
486   VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
487                 dcbz_szB, dcbzl_szB);
488#  undef MAX_DCBZL_SZB
489}
490#endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
491
492#ifdef VGA_s390x
493
494/* Read /proc/cpuinfo. Look for lines like these
495
496   processor 0: version = FF,  identification = 0117C9,  machine = 2064
497
498   and return the machine model or VEX_S390X_MODEL_INVALID on error. */
499
500static UInt VG_(get_machine_model)(void)
501{
502   static struct model_map {
503      HChar name[5];
504      UInt  id;
505   } model_map[] = {
506      { "2064", VEX_S390X_MODEL_Z900 },
507      { "2066", VEX_S390X_MODEL_Z800 },
508      { "2084", VEX_S390X_MODEL_Z990 },
509      { "2086", VEX_S390X_MODEL_Z890 },
510      { "2094", VEX_S390X_MODEL_Z9_EC },
511      { "2096", VEX_S390X_MODEL_Z9_BC },
512      { "2097", VEX_S390X_MODEL_Z10_EC },
513      { "2098", VEX_S390X_MODEL_Z10_BC },
514      { "2817", VEX_S390X_MODEL_Z196 },
515      { "2818", VEX_S390X_MODEL_Z114 },
516   };
517
518   Int    model, n, fh;
519   SysRes fd;
520   SizeT  num_bytes, file_buf_size;
521   HChar *p, *m, *model_name, *file_buf;
522
523   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
524   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
525   if ( sr_isError(fd) ) return VEX_S390X_MODEL_INVALID;
526
527   fh  = sr_Res(fd);
528
529   /* Determine the size of /proc/cpuinfo.
530      Work around broken-ness in /proc file system implementation.
531      fstat returns a zero size for /proc/cpuinfo although it is
532      claimed to be a regular file. */
533   num_bytes = 0;
534   file_buf_size = 1000;
535   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
536   while (42) {
537      n = VG_(read)(fh, file_buf, file_buf_size);
538      if (n < 0) break;
539
540      num_bytes += n;
541      if (n < file_buf_size) break;  /* reached EOF */
542   }
543
544   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
545
546   if (num_bytes > file_buf_size) {
547      VG_(free)( file_buf );
548      VG_(lseek)( fh, 0, VKI_SEEK_SET );
549      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
550      n = VG_(read)( fh, file_buf, num_bytes );
551      if (n < 0) num_bytes = 0;
552   }
553
554   file_buf[num_bytes] = '\0';
555   VG_(close)(fh);
556
557   /* Parse file */
558   model = VEX_S390X_MODEL_INVALID;
559   for (p = file_buf; *p; ++p) {
560      /* Beginning of line */
561     if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
562
563     m = VG_(strstr)( p, "machine" );
564     if (m == NULL) continue;
565
566     p = m + sizeof "machine" - 1;
567     while ( VG_(isspace)( *p ) || *p == '=') {
568       if (*p == '\n') goto next_line;
569       ++p;
570     }
571
572     model_name = p;
573     for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
574       struct model_map *mm = model_map + n;
575       SizeT len = VG_(strlen)( mm->name );
576       if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
577            VG_(isspace)( model_name[len] )) {
578         if (mm->id < model) model = mm->id;
579         p = model_name + len;
580         break;
581       }
582     }
583     /* Skip until end-of-line */
584     while (*p != '\n')
585       ++p;
586   next_line: ;
587   }
588
589   VG_(free)( file_buf );
590   VG_(debugLog)(1, "machine", "model = %s\n", model_map[model].name);
591
592   return model;
593}
594
595#endif /* VGA_s390x */
596
597/* Determine what insn set and insn set variant the host has, and
598   record it.  To be called once at system startup.  Returns False if
599   this a CPU incapable of running Valgrind. */
600
601Bool VG_(machine_get_hwcaps)( void )
602{
603   vg_assert(hwcaps_done == False);
604   hwcaps_done = True;
605
606   // Whack default settings into vai, so that we only need to fill in
607   // any interesting bits.
608   LibVEX_default_VexArchInfo(&vai);
609
610#if defined(VGA_x86)
611   { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
612     UInt eax, ebx, ecx, edx, max_extended;
613     UChar vstr[13];
614     vstr[0] = 0;
615
616     if (!VG_(has_cpuid)())
617        /* we can't do cpuid at all.  Give up. */
618        return False;
619
620     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
621     if (eax < 1)
622        /* we can't ask for cpuid(x) for x > 0.  Give up. */
623        return False;
624
625     /* Get processor ID string, and max basic/extended index
626        values. */
627     VG_(memcpy)(&vstr[0], &ebx, 4);
628     VG_(memcpy)(&vstr[4], &edx, 4);
629     VG_(memcpy)(&vstr[8], &ecx, 4);
630     vstr[12] = 0;
631
632     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
633     max_extended = eax;
634
635     /* get capabilities bits into edx */
636     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
637
638     have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
639     have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
640
641     /* cmpxchg8b is a minimum requirement now; if we don't have it we
642        must simply give up.  But all CPUs since Pentium-I have it, so
643        that doesn't seem like much of a restriction. */
644     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
645     if (!have_cx8)
646        return False;
647
648     /* Figure out if this is an AMD that can do LZCNT. */
649     have_lzcnt = False;
650     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
651         && max_extended >= 0x80000001) {
652        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
653        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
654     }
655
656     if (have_sse2 && have_sse1) {
657        va          = VexArchX86;
658        vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
659        vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
660        if (have_lzcnt)
661           vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
662        VG_(machine_x86_have_mxcsr) = 1;
663        return True;
664     }
665
666     if (have_sse1) {
667        va          = VexArchX86;
668        vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
669        VG_(machine_x86_have_mxcsr) = 1;
670        return True;
671     }
672
673     va         = VexArchX86;
674     vai.hwcaps = 0; /*baseline - no sse at all*/
675     VG_(machine_x86_have_mxcsr) = 0;
676     return True;
677   }
678
679#elif defined(VGA_amd64)
680   { Bool have_sse3, have_cx8, have_cx16;
681     Bool have_lzcnt;
682     UInt eax, ebx, ecx, edx, max_extended;
683     UChar vstr[13];
684     vstr[0] = 0;
685
686     if (!VG_(has_cpuid)())
687        /* we can't do cpuid at all.  Give up. */
688        return False;
689
690     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
691     if (eax < 1)
692        /* we can't ask for cpuid(x) for x > 0.  Give up. */
693        return False;
694
695     /* Get processor ID string, and max basic/extended index
696        values. */
697     VG_(memcpy)(&vstr[0], &ebx, 4);
698     VG_(memcpy)(&vstr[4], &edx, 4);
699     VG_(memcpy)(&vstr[8], &ecx, 4);
700     vstr[12] = 0;
701
702     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
703     max_extended = eax;
704
705     /* get capabilities bits into edx */
706     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
707
708     // we assume that SSE1 and SSE2 are available by default
709     have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
710     // ssse3  is ecx:9
711     // sse41  is ecx:19
712     // sse42  is ecx:20
713
714     /* cmpxchg8b is a minimum requirement now; if we don't have it we
715        must simply give up.  But all CPUs since Pentium-I have it, so
716        that doesn't seem like much of a restriction. */
717     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
718     if (!have_cx8)
719        return False;
720
721     /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
722     have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
723
724     /* Figure out if this is an AMD that can do LZCNT. */
725     have_lzcnt = False;
726     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
727         && max_extended >= 0x80000001) {
728        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
729        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
730     }
731
732     va         = VexArchAMD64;
733     vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
734                  | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
735                  | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0);
736     return True;
737   }
738
739#elif defined(VGA_ppc32)
740   {
741     /* Find out which subset of the ppc32 instruction set is supported by
742        verifying whether various ppc32 instructions generate a SIGILL
743        or a SIGFPE. An alternative approach is to check the AT_HWCAP and
744        AT_PLATFORM entries in the ELF auxiliary table -- see also
745        the_iifii.client_auxv in m_main.c.
746      */
747     vki_sigset_t          saved_set, tmp_set;
748     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
749     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
750
751     volatile Bool have_F, have_V, have_FX, have_GX, have_VX;
752     Int r;
753
754     /* This is a kludge.  Really we ought to back-convert saved_act
755        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
756        since that's a no-op on all ppc32 platforms so far supported,
757        it's not worth the typing effort.  At least include most basic
758        sanity check: */
759     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
760
761     VG_(sigemptyset)(&tmp_set);
762     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
763     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
764
765     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
766     vg_assert(r == 0);
767
768     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
769     vg_assert(r == 0);
770     tmp_sigill_act = saved_sigill_act;
771
772     r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
773     vg_assert(r == 0);
774     tmp_sigfpe_act = saved_sigfpe_act;
775
776     /* NODEFER: signal handler does not return (from the kernel's point of
777        view), hence if it is to successfully catch a signal more than once,
778        we need the NODEFER flag. */
779     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
780     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
781     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
782     tmp_sigill_act.ksa_handler = handler_unsup_insn;
783     r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
784     vg_assert(r == 0);
785
786     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
787     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
788     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
789     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
790     r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
791     vg_assert(r == 0);
792
793     /* standard FP insns */
794     have_F = True;
795     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
796        have_F = False;
797     } else {
798        __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
799     }
800
801     /* Altivec insns */
802     have_V = True;
803     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
804        have_V = False;
805     } else {
806        /* Unfortunately some older assemblers don't speak Altivec (or
807           choose not to), so to be safe we directly emit the 32-bit
808           word corresponding to "vor 0,0,0".  This fixes a build
809           problem that happens on Debian 3.1 (ppc32), and probably
810           various other places. */
811        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
812     }
813
814     /* General-Purpose optional (fsqrt, fsqrts) */
815     have_FX = True;
816     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
817        have_FX = False;
818     } else {
819        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
820     }
821
822     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
823     have_GX = True;
824     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
825        have_GX = False;
826     } else {
827        __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
828     }
829
830     /* VSX support implies Power ISA 2.06 */
831     have_VX = True;
832     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
833        have_VX = False;
834     } else {
835        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
836     }
837
838
839     /* determine dcbz/dcbzl sizes while we still have the signal
840      * handlers registered */
841     find_ppc_dcbz_sz(&vai);
842
843     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
844     vg_assert(r == 0);
845     r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
846     vg_assert(r == 0);
847     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
848     vg_assert(r == 0);
849     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d\n",
850                    (Int)have_F, (Int)have_V, (Int)have_FX,
851                    (Int)have_GX, (Int)have_VX);
852     /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
853     if (have_V && !have_F)
854        have_V = False;
855     if (have_FX && !have_F)
856        have_FX = False;
857     if (have_GX && !have_F)
858        have_GX = False;
859
860     VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
861     VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
862
863     va = VexArchPPC32;
864
865     vai.hwcaps = 0;
866     if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
867     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
868     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
869     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
870     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
871
872     /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
873        called before we're ready to go. */
874     return True;
875   }
876
877#elif defined(VGA_ppc64)
878   {
879     /* Same instruction set detection algorithm as for ppc32. */
880     vki_sigset_t          saved_set, tmp_set;
881     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
882     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
883
884     volatile Bool have_F, have_V, have_FX, have_GX, have_VX;
885     Int r;
886
887     /* This is a kludge.  Really we ought to back-convert saved_act
888        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
889        since that's a no-op on all ppc64 platforms so far supported,
890        it's not worth the typing effort.  At least include most basic
891        sanity check: */
892     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
893
894     VG_(sigemptyset)(&tmp_set);
895     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
896     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
897
898     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
899     vg_assert(r == 0);
900
901     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
902     vg_assert(r == 0);
903     tmp_sigill_act = saved_sigill_act;
904
905     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
906     tmp_sigfpe_act = saved_sigfpe_act;
907
908     /* NODEFER: signal handler does not return (from the kernel's point of
909        view), hence if it is to successfully catch a signal more than once,
910        we need the NODEFER flag. */
911     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
912     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
913     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
914     tmp_sigill_act.ksa_handler = handler_unsup_insn;
915     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
916
917     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
918     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
919     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
920     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
921     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
922
923     /* standard FP insns */
924     have_F = True;
925     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
926        have_F = False;
927     } else {
928        __asm__ __volatile__("fmr 0,0");
929     }
930
931     /* Altivec insns */
932     have_V = True;
933     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
934        have_V = False;
935     } else {
936        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
937     }
938
939     /* General-Purpose optional (fsqrt, fsqrts) */
940     have_FX = True;
941     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
942        have_FX = False;
943     } else {
944        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
945     }
946
947     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
948     have_GX = True;
949     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
950        have_GX = False;
951     } else {
952        __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
953     }
954
955     /* VSX support implies Power ISA 2.06 */
956     have_VX = True;
957     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
958        have_VX = False;
959     } else {
960        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
961     }
962
963     /* determine dcbz/dcbzl sizes while we still have the signal
964      * handlers registered */
965     find_ppc_dcbz_sz(&vai);
966
967     VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
968     VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
969     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
970     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d\n",
971                    (Int)have_F, (Int)have_V, (Int)have_FX,
972                    (Int)have_GX, (Int)have_VX);
973     /* on ppc64, if we don't even have FP, just give up. */
974     if (!have_F)
975        return False;
976
977     VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
978
979     va = VexArchPPC64;
980
981     vai.hwcaps = 0;
982     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
983     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
984     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
985     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
986
987     /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
988        called before we're ready to go. */
989     return True;
990   }
991
992#elif defined(VGA_s390x)
993   {
994     /* Instruction set detection code borrowed from ppc above. */
995     vki_sigset_t          saved_set, tmp_set;
996     vki_sigaction_fromK_t saved_sigill_act;
997     vki_sigaction_toK_t     tmp_sigill_act;
998
999     volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX;
1000     Int r, model;
1001
1002     /* Unblock SIGILL and stash away the old action for that signal */
1003     VG_(sigemptyset)(&tmp_set);
1004     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1005
1006     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1007     vg_assert(r == 0);
1008
1009     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1010     vg_assert(r == 0);
1011     tmp_sigill_act = saved_sigill_act;
1012
1013     /* NODEFER: signal handler does not return (from the kernel's point of
1014        view), hence if it is to successfully catch a signal more than once,
1015        we need the NODEFER flag. */
1016     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1017     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1018     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1019     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1020     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1021
1022     /* Determine hwcaps. Note, we cannot use the stfle insn because it
1023        is not supported on z900. */
1024
1025     have_LDISP = True;
1026     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1027        have_LDISP = False;
1028     } else {
1029       /* BASR loads the address of the next insn into r1. Needed to avoid
1030          a segfault in XY. */
1031        __asm__ __volatile__("basr %%r1,%%r0\n\t"
1032                             ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
1033                             ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1034     }
1035
1036     have_EIMM = True;
1037     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1038        have_EIMM = False;
1039     } else {
1040        __asm__ __volatile__(".long  0xc0090000\n\t"  /* iilf r0,0 */
1041                             ".short 0x0000" : : : "r0", "memory");
1042     }
1043
1044     have_GIE = True;
1045     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1046        have_GIE = False;
1047     } else {
1048        __asm__ __volatile__(".long  0xc2010000\n\t"  /* msfi r0,0 */
1049                             ".short 0x0000" : : : "r0", "memory");
1050     }
1051
1052     have_DFP = True;
1053     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1054        have_DFP = False;
1055     } else {
1056        __asm__ __volatile__(".long 0xb3d20000"
1057                               : : : "r0", "cc", "memory");  /* adtr r0,r0,r0 */
1058     }
1059
1060     have_FGX = True;
1061     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1062        have_FGX = False;
1063     } else {
1064        __asm__ __volatile__(".long 0xb3cd0000" : : : "r0");  /* lgdr r0,f0 */
1065     }
1066
1067     /* Restore signals */
1068     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1069     vg_assert(r == 0);
1070     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1071     vg_assert(r == 0);
1072     va = VexArchS390X;
1073
1074     model = VG_(get_machine_model)();
1075
1076     VG_(debugLog)(1, "machine", "machine %d  LDISP %d EIMM %d GIE %d DFP %d "
1077                   "FGX %d\n", model, have_LDISP, have_EIMM, have_GIE,
1078                   have_DFP, have_FGX);
1079
1080     if (model == VEX_S390X_MODEL_INVALID) return False;
1081
1082     vai.hwcaps = model;
1083     if (have_LDISP) {
1084        /* Use long displacement only on machines >= z990. For all other machines
1085           it is millicoded and therefore slow. */
1086        if (model >= VEX_S390X_MODEL_Z990)
1087           vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1088     }
1089     if (have_EIMM)  vai.hwcaps |= VEX_HWCAPS_S390X_EIMM;
1090     if (have_GIE)   vai.hwcaps |= VEX_HWCAPS_S390X_GIE;
1091     if (have_DFP)   vai.hwcaps |= VEX_HWCAPS_S390X_DFP;
1092     if (have_FGX)   vai.hwcaps |= VEX_HWCAPS_S390X_FGX;
1093
1094     VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1095
1096     return True;
1097   }
1098
1099#elif defined(VGA_arm)
1100   {
1101     /* Same instruction set detection algorithm as for ppc32. */
1102     vki_sigset_t          saved_set, tmp_set;
1103     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1104     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1105
1106     volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1107     volatile Int archlevel;
1108     Int r;
1109
1110     /* This is a kludge.  Really we ought to back-convert saved_act
1111        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1112        since that's a no-op on all ppc64 platforms so far supported,
1113        it's not worth the typing effort.  At least include most basic
1114        sanity check: */
1115     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1116
1117     VG_(sigemptyset)(&tmp_set);
1118     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1119     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1120
1121     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1122     vg_assert(r == 0);
1123
1124     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1125     vg_assert(r == 0);
1126     tmp_sigill_act = saved_sigill_act;
1127
1128     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1129     tmp_sigfpe_act = saved_sigfpe_act;
1130
1131     /* NODEFER: signal handler does not return (from the kernel's point of
1132        view), hence if it is to successfully catch a signal more than once,
1133        we need the NODEFER flag. */
1134     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1135     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1136     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1137     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1138     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1139
1140     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1141     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1142     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1143     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1144     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1145
1146     /* VFP insns */
1147     have_VFP = True;
1148     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1149        have_VFP = False;
1150     } else {
1151        __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1152     }
1153     /* There are several generation of VFP extension but they differs very
1154        little so for now we will not distinguish them. */
1155     have_VFP2 = have_VFP;
1156     have_VFP3 = have_VFP;
1157
1158     /* NEON insns */
1159     have_NEON = True;
1160     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1161        have_NEON = False;
1162     } else {
1163        __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1164     }
1165
1166     /* ARM architecture level */
1167     archlevel = 5; /* v5 will be base level */
1168     if (archlevel < 7) {
1169        archlevel = 7;
1170        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1171           archlevel = 5;
1172        } else {
1173           __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1174        }
1175     }
1176     if (archlevel < 6) {
1177        archlevel = 6;
1178        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1179           archlevel = 5;
1180        } else {
1181           __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1182        }
1183     }
1184
1185     VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1186     VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1187     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1188     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1189     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1190
1191     VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1192           archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1193           (Int)have_NEON);
1194
1195     VG_(machine_arm_archlevel) = archlevel;
1196
1197     va = VexArchARM;
1198
1199     vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1200     if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1201     if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1202     if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1203     if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1204
1205     return True;
1206   }
1207
1208#else
1209#  error "Unknown arch"
1210#endif
1211}
1212
1213/* Notify host cpu cache line size. */
1214#if defined(VGA_ppc32)
1215void VG_(machine_ppc32_set_clszB)( Int szB )
1216{
1217   vg_assert(hwcaps_done);
1218
1219   /* Either the value must not have been set yet (zero) or we can
1220      tolerate it being set to the same value multiple times, as the
1221      stack scanning logic in m_main is a bit stupid. */
1222   vg_assert(vai.ppc_cache_line_szB == 0
1223             || vai.ppc_cache_line_szB == szB);
1224
1225   vg_assert(szB == 32 || szB == 64 || szB == 128);
1226   vai.ppc_cache_line_szB = szB;
1227}
1228#endif
1229
1230
1231/* Notify host cpu cache line size. */
1232#if defined(VGA_ppc64)
1233void VG_(machine_ppc64_set_clszB)( Int szB )
1234{
1235   vg_assert(hwcaps_done);
1236
1237   /* Either the value must not have been set yet (zero) or we can
1238      tolerate it being set to the same value multiple times, as the
1239      stack scanning logic in m_main is a bit stupid. */
1240   vg_assert(vai.ppc_cache_line_szB == 0
1241             || vai.ppc_cache_line_szB == szB);
1242
1243   vg_assert(szB == 32 || szB == 64 || szB == 128);
1244   vai.ppc_cache_line_szB = szB;
1245}
1246#endif
1247
1248
1249/* Notify host's ability to handle NEON instructions. */
1250#if defined(VGA_arm)
1251void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1252{
1253   vg_assert(hwcaps_done);
1254   /* There's nothing else we can sanity check. */
1255
1256   if (has_neon) {
1257      vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1258   } else {
1259      vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1260   }
1261}
1262#endif
1263
1264
1265/* Fetch host cpu info, once established. */
1266void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1267                                   /*OUT*/VexArchInfo* pVai )
1268{
1269   vg_assert(hwcaps_done);
1270   if (pVa)  *pVa  = va;
1271   if (pVai) *pVai = vai;
1272}
1273
1274
1275// Given a pointer to a function as obtained by "& functionname" in C,
1276// produce a pointer to the actual entry point for the function.
1277void* VG_(fnptr_to_fnentry)( void* f )
1278{
1279#  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1280      || defined(VGP_arm_linux)                           \
1281      || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
1282      || defined(VGP_s390x_linux)
1283   return f;
1284#  elif defined(VGP_ppc64_linux)
1285   /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1286      3-word function descriptor, of which the first word is the entry
1287      address. */
1288   UWord* descr = (UWord*)f;
1289   return (void*)(descr[0]);
1290#  else
1291#    error "Unknown platform"
1292#  endif
1293}
1294
1295/*--------------------------------------------------------------------*/
1296/*--- end                                                          ---*/
1297/*--------------------------------------------------------------------*/
1298