m_machine.c revision 68aba524d0de0532c29e60f7b8140e6e39fbac30
1/*--------------------------------------------------------------------*/
2/*--- Machine-related stuff.                           m_machine.c ---*/
3/*--------------------------------------------------------------------*/
4
5/*
6   This file is part of Valgrind, a dynamic binary instrumentation
7   framework.
8
9   Copyright (C) 2000-2011 Julian Seward
10      jseward@acm.org
11
12   This program is free software; you can redistribute it and/or
13   modify it under the terms of the GNU General Public License as
14   published by the Free Software Foundation; either version 2 of the
15   License, or (at your option) any later version.
16
17   This program is distributed in the hope that it will be useful, but
18   WITHOUT ANY WARRANTY; without even the implied warranty of
19   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20   General Public License for more details.
21
22   You should have received a copy of the GNU General Public License
23   along with this program; if not, write to the Free Software
24   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25   02111-1307, USA.
26
27   The GNU General Public License is contained in the file COPYING.
28*/
29
30#include "pub_core_basics.h"
31#include "pub_core_vki.h"
32#include "pub_core_libcsetjmp.h"   // setjmp facilities
33#include "pub_core_threadstate.h"
34#include "pub_core_libcassert.h"
35#include "pub_core_libcbase.h"
36#include "pub_core_libcfile.h"
37#include "pub_core_mallocfree.h"
38#include "pub_core_machine.h"
39#include "pub_core_cpuid.h"
40#include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
41#include "pub_core_debuglog.h"
42
43
44#define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
45#define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
46#define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
47
48Addr VG_(get_IP) ( ThreadId tid ) {
49   return INSTR_PTR( VG_(threads)[tid].arch );
50}
51Addr VG_(get_SP) ( ThreadId tid ) {
52   return STACK_PTR( VG_(threads)[tid].arch );
53}
54Addr VG_(get_FP) ( ThreadId tid ) {
55   return FRAME_PTR( VG_(threads)[tid].arch );
56}
57
58void VG_(set_IP) ( ThreadId tid, Addr ip ) {
59   INSTR_PTR( VG_(threads)[tid].arch ) = ip;
60}
61void VG_(set_SP) ( ThreadId tid, Addr sp ) {
62   STACK_PTR( VG_(threads)[tid].arch ) = sp;
63}
64
65void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
66                                ThreadId tid )
67{
68#  if defined(VGA_x86)
69   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
70   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
71   regs->misc.X86.r_ebp
72      = VG_(threads)[tid].arch.vex.guest_EBP;
73#  elif defined(VGA_amd64)
74   regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
75   regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
76   regs->misc.AMD64.r_rbp
77      = VG_(threads)[tid].arch.vex.guest_RBP;
78#  elif defined(VGA_ppc32)
79   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
80   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
81   regs->misc.PPC32.r_lr
82      = VG_(threads)[tid].arch.vex.guest_LR;
83#  elif defined(VGA_ppc64)
84   regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
85   regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
86   regs->misc.PPC64.r_lr
87      = VG_(threads)[tid].arch.vex.guest_LR;
88#  elif defined(VGA_arm)
89   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
90   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
91   regs->misc.ARM.r14
92      = VG_(threads)[tid].arch.vex.guest_R14;
93   regs->misc.ARM.r12
94      = VG_(threads)[tid].arch.vex.guest_R12;
95   regs->misc.ARM.r11
96      = VG_(threads)[tid].arch.vex.guest_R11;
97   regs->misc.ARM.r7
98      = VG_(threads)[tid].arch.vex.guest_R7;
99#  elif defined(VGA_s390x)
100   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
101   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
102   regs->misc.S390X.r_fp
103      = VG_(threads)[tid].arch.vex.guest_r11;
104   regs->misc.S390X.r_lr
105      = VG_(threads)[tid].arch.vex.guest_r14;
106#  else
107#    error "Unknown arch"
108#  endif
109}
110
111
112void VG_(set_syscall_return_shadows) ( ThreadId tid,
113                                       /* shadow vals for the result */
114                                       UWord s1res, UWord s2res,
115                                       /* shadow vals for the error val */
116                                       UWord s1err, UWord s2err )
117{
118#  if defined(VGP_x86_linux)
119   VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
120   VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
121#  elif defined(VGP_amd64_linux)
122   VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
123   VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
124#  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
125   VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
126   VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
127#  elif defined(VGP_arm_linux)
128   VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
129   VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
130#  elif defined(VGO_darwin)
131   // GrP fixme darwin syscalls may return more values (2 registers plus error)
132#  elif defined(VGP_s390x_linux)
133   VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
134   VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
135#  else
136#    error "Unknown plat"
137#  endif
138}
139
140void
141VG_(get_shadow_regs_area) ( ThreadId tid,
142                            /*DST*/UChar* dst,
143                            /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
144{
145   void*        src;
146   ThreadState* tst;
147   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
148   vg_assert(VG_(is_valid_tid)(tid));
149   // Bounds check
150   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
151   vg_assert(offset + size <= sizeof(VexGuestArchState));
152   // Copy
153   tst = & VG_(threads)[tid];
154   src = NULL;
155   switch (shadowNo) {
156      case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
157      case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
158      case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
159   }
160   tl_assert(src != NULL);
161   VG_(memcpy)( dst, src, size);
162}
163
164void
165VG_(set_shadow_regs_area) ( ThreadId tid,
166                            /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
167                            /*SRC*/const UChar* src )
168{
169   void*        dst;
170   ThreadState* tst;
171   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
172   vg_assert(VG_(is_valid_tid)(tid));
173   // Bounds check
174   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
175   vg_assert(offset + size <= sizeof(VexGuestArchState));
176   // Copy
177   tst = & VG_(threads)[tid];
178   dst = NULL;
179   switch (shadowNo) {
180      case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
181      case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
182      case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
183   }
184   tl_assert(dst != NULL);
185   VG_(memcpy)( dst, src, size);
186}
187
188
189static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, HChar*, Addr))
190{
191   VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
192#if defined(VGA_x86)
193   (*f)(tid, "EAX", vex->guest_EAX);
194   (*f)(tid, "ECX", vex->guest_ECX);
195   (*f)(tid, "EDX", vex->guest_EDX);
196   (*f)(tid, "EBX", vex->guest_EBX);
197   (*f)(tid, "ESI", vex->guest_ESI);
198   (*f)(tid, "EDI", vex->guest_EDI);
199   (*f)(tid, "ESP", vex->guest_ESP);
200   (*f)(tid, "EBP", vex->guest_EBP);
201#elif defined(VGA_amd64)
202   (*f)(tid, "RAX", vex->guest_RAX);
203   (*f)(tid, "RCX", vex->guest_RCX);
204   (*f)(tid, "RDX", vex->guest_RDX);
205   (*f)(tid, "RBX", vex->guest_RBX);
206   (*f)(tid, "RSI", vex->guest_RSI);
207   (*f)(tid, "RDI", vex->guest_RDI);
208   (*f)(tid, "RSP", vex->guest_RSP);
209   (*f)(tid, "RBP", vex->guest_RBP);
210   (*f)(tid, "R8" , vex->guest_R8 );
211   (*f)(tid, "R9" , vex->guest_R9 );
212   (*f)(tid, "R10", vex->guest_R10);
213   (*f)(tid, "R11", vex->guest_R11);
214   (*f)(tid, "R12", vex->guest_R12);
215   (*f)(tid, "R13", vex->guest_R13);
216   (*f)(tid, "R14", vex->guest_R14);
217   (*f)(tid, "R15", vex->guest_R15);
218#elif defined(VGA_ppc32) || defined(VGA_ppc64)
219   (*f)(tid, "GPR0" , vex->guest_GPR0 );
220   (*f)(tid, "GPR1" , vex->guest_GPR1 );
221   (*f)(tid, "GPR2" , vex->guest_GPR2 );
222   (*f)(tid, "GPR3" , vex->guest_GPR3 );
223   (*f)(tid, "GPR4" , vex->guest_GPR4 );
224   (*f)(tid, "GPR5" , vex->guest_GPR5 );
225   (*f)(tid, "GPR6" , vex->guest_GPR6 );
226   (*f)(tid, "GPR7" , vex->guest_GPR7 );
227   (*f)(tid, "GPR8" , vex->guest_GPR8 );
228   (*f)(tid, "GPR9" , vex->guest_GPR9 );
229   (*f)(tid, "GPR10", vex->guest_GPR10);
230   (*f)(tid, "GPR11", vex->guest_GPR11);
231   (*f)(tid, "GPR12", vex->guest_GPR12);
232   (*f)(tid, "GPR13", vex->guest_GPR13);
233   (*f)(tid, "GPR14", vex->guest_GPR14);
234   (*f)(tid, "GPR15", vex->guest_GPR15);
235   (*f)(tid, "GPR16", vex->guest_GPR16);
236   (*f)(tid, "GPR17", vex->guest_GPR17);
237   (*f)(tid, "GPR18", vex->guest_GPR18);
238   (*f)(tid, "GPR19", vex->guest_GPR19);
239   (*f)(tid, "GPR20", vex->guest_GPR20);
240   (*f)(tid, "GPR21", vex->guest_GPR21);
241   (*f)(tid, "GPR22", vex->guest_GPR22);
242   (*f)(tid, "GPR23", vex->guest_GPR23);
243   (*f)(tid, "GPR24", vex->guest_GPR24);
244   (*f)(tid, "GPR25", vex->guest_GPR25);
245   (*f)(tid, "GPR26", vex->guest_GPR26);
246   (*f)(tid, "GPR27", vex->guest_GPR27);
247   (*f)(tid, "GPR28", vex->guest_GPR28);
248   (*f)(tid, "GPR29", vex->guest_GPR29);
249   (*f)(tid, "GPR30", vex->guest_GPR30);
250   (*f)(tid, "GPR31", vex->guest_GPR31);
251   (*f)(tid, "CTR"  , vex->guest_CTR  );
252   (*f)(tid, "LR"   , vex->guest_LR   );
253#elif defined(VGA_arm)
254   (*f)(tid, "R0" , vex->guest_R0 );
255   (*f)(tid, "R1" , vex->guest_R1 );
256   (*f)(tid, "R2" , vex->guest_R2 );
257   (*f)(tid, "R3" , vex->guest_R3 );
258   (*f)(tid, "R4" , vex->guest_R4 );
259   (*f)(tid, "R5" , vex->guest_R5 );
260   (*f)(tid, "R6" , vex->guest_R6 );
261   (*f)(tid, "R8" , vex->guest_R8 );
262   (*f)(tid, "R9" , vex->guest_R9 );
263   (*f)(tid, "R10", vex->guest_R10);
264   (*f)(tid, "R11", vex->guest_R11);
265   (*f)(tid, "R12", vex->guest_R12);
266   (*f)(tid, "R13", vex->guest_R13);
267   (*f)(tid, "R14", vex->guest_R14);
268#elif defined(VGA_s390x)
269   (*f)(tid, "r0" , vex->guest_r0 );
270   (*f)(tid, "r1" , vex->guest_r1 );
271   (*f)(tid, "r2" , vex->guest_r2 );
272   (*f)(tid, "r3" , vex->guest_r3 );
273   (*f)(tid, "r4" , vex->guest_r4 );
274   (*f)(tid, "r5" , vex->guest_r5 );
275   (*f)(tid, "r6" , vex->guest_r6 );
276   (*f)(tid, "r7" , vex->guest_r7 );
277   (*f)(tid, "r8" , vex->guest_r8 );
278   (*f)(tid, "r9" , vex->guest_r9 );
279   (*f)(tid, "r10", vex->guest_r10);
280   (*f)(tid, "r11", vex->guest_r11);
281   (*f)(tid, "r12", vex->guest_r12);
282   (*f)(tid, "r13", vex->guest_r13);
283   (*f)(tid, "r14", vex->guest_r14);
284   (*f)(tid, "r15", vex->guest_r15);
285#else
286#  error Unknown arch
287#endif
288}
289
290
291void VG_(apply_to_GP_regs)(void (*f)(ThreadId, HChar*, UWord))
292{
293   ThreadId tid;
294
295   for (tid = 1; tid < VG_N_THREADS; tid++) {
296      if (VG_(is_valid_tid)(tid)) {
297         apply_to_GPs_of_tid(tid, f);
298      }
299   }
300}
301
302void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
303{
304   *tid = (ThreadId)(-1);
305}
306
307Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
308                            /*OUT*/Addr* stack_min,
309                            /*OUT*/Addr* stack_max)
310{
311   ThreadId i;
312   for (i = (*tid)+1; i < VG_N_THREADS; i++) {
313      if (i == VG_INVALID_THREADID)
314         continue;
315      if (VG_(threads)[i].status != VgTs_Empty) {
316         *tid       = i;
317         *stack_min = VG_(get_SP)(i);
318         *stack_max = VG_(threads)[i].client_stack_highest_word;
319         return True;
320      }
321   }
322   return False;
323}
324
325Addr VG_(thread_get_stack_max)(ThreadId tid)
326{
327   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
328   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
329   return VG_(threads)[tid].client_stack_highest_word;
330}
331
332SizeT VG_(thread_get_stack_size)(ThreadId tid)
333{
334   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
335   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
336   return VG_(threads)[tid].client_stack_szB;
337}
338
339Addr VG_(thread_get_altstack_min)(ThreadId tid)
340{
341   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
342   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
343   return (Addr)VG_(threads)[tid].altstack.ss_sp;
344}
345
346SizeT VG_(thread_get_altstack_size)(ThreadId tid)
347{
348   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
349   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
350   return VG_(threads)[tid].altstack.ss_size;
351}
352
353//-------------------------------------------------------------
354/* Details about the capabilities of the underlying (host) CPU.  These
355   details are acquired by (1) enquiring with the CPU at startup, or
356   (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
357   line size).  It's a bit nasty in the sense that there's no obvious
358   way to stop uses of some of this info before it's ready to go.
359
360   Current dependencies are:
361
362   x86:   initially:  call VG_(machine_get_hwcaps)
363
364          then safe to use VG_(machine_get_VexArchInfo)
365                       and VG_(machine_x86_have_mxcsr)
366   -------------
367   amd64: initially:  call VG_(machine_get_hwcaps)
368
369          then safe to use VG_(machine_get_VexArchInfo)
370   -------------
371   ppc32: initially:  call VG_(machine_get_hwcaps)
372                      call VG_(machine_ppc32_set_clszB)
373
374          then safe to use VG_(machine_get_VexArchInfo)
375                       and VG_(machine_ppc32_has_FP)
376                       and VG_(machine_ppc32_has_VMX)
377   -------------
378   ppc64: initially:  call VG_(machine_get_hwcaps)
379                      call VG_(machine_ppc64_set_clszB)
380
381          then safe to use VG_(machine_get_VexArchInfo)
382                       and VG_(machine_ppc64_has_VMX)
383
384   -------------
385   s390x: initially:  call VG_(machine_get_hwcaps)
386
387          then safe to use VG_(machine_get_VexArchInfo)
388
389   VG_(machine_get_hwcaps) may use signals (although it attempts to
390   leave signal state unchanged) and therefore should only be
391   called before m_main sets up the client's signal state.
392*/
393
394/* --------- State --------- */
395static Bool hwcaps_done = False;
396
397/* --- all archs --- */
398static VexArch     va;
399static VexArchInfo vai;
400
401#if defined(VGA_x86)
402UInt VG_(machine_x86_have_mxcsr) = 0;
403#endif
404#if defined(VGA_ppc32)
405UInt VG_(machine_ppc32_has_FP)  = 0;
406UInt VG_(machine_ppc32_has_VMX) = 0;
407#endif
408#if defined(VGA_ppc64)
409ULong VG_(machine_ppc64_has_VMX) = 0;
410#endif
411#if defined(VGA_arm)
412Int VG_(machine_arm_archlevel) = 4;
413#endif
414
415/* fixs390: anything for s390x here ? */
416
417/* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
418   testing, so we need a VG_MINIMAL_JMP_BUF. */
419#if defined(VGA_ppc32) || defined(VGA_ppc64) \
420    || defined(VGA_arm) || defined(VGA_s390x)
421#include "pub_tool_libcsetjmp.h"
422static VG_MINIMAL_JMP_BUF(env_unsup_insn);
423static void handler_unsup_insn ( Int x ) {
424   VG_MINIMAL_LONGJMP(env_unsup_insn);
425}
426#endif
427
428
429/* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
430 * handlers are installed.  Determines the the sizes affected by dcbz
431 * and dcbzl instructions and updates the given VexArchInfo structure
432 * accordingly.
433 *
434 * Not very defensive: assumes that as long as the dcbz/dcbzl
435 * instructions don't raise a SIGILL, that they will zero an aligned,
436 * contiguous block of memory of a sensible size. */
437#if defined(VGA_ppc32) || defined(VGA_ppc64)
438static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
439{
440   Int dcbz_szB = 0;
441   Int dcbzl_szB;
442#  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
443   char test_block[4*MAX_DCBZL_SZB];
444   char *aligned = test_block;
445   Int i;
446
447   /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
448   aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
449   vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
450
451   /* dcbz often clears 32B, although sometimes whatever the native cache
452    * block size is */
453   VG_(memset)(test_block, 0xff, sizeof(test_block));
454   __asm__ __volatile__("dcbz 0,%0"
455                        : /*out*/
456                        : "r" (aligned) /*in*/
457                        : "memory" /*clobber*/);
458   for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
459      if (!test_block[i])
460         ++dcbz_szB;
461   }
462   vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
463
464   /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
465   if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
466      dcbzl_szB = 0; /* indicates unsupported */
467   }
468   else {
469      VG_(memset)(test_block, 0xff, sizeof(test_block));
470      /* some older assemblers won't understand the dcbzl instruction
471       * variant, so we directly emit the instruction ourselves */
472      __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
473                           : /*out*/
474                           : "r" (aligned) /*in*/
475                           : "memory", "r9" /*clobber*/);
476      for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
477         if (!test_block[i])
478            ++dcbzl_szB;
479      }
480      vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
481   }
482
483   arch_info->ppc_dcbz_szB  = dcbz_szB;
484   arch_info->ppc_dcbzl_szB = dcbzl_szB;
485
486   VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
487                 dcbz_szB, dcbzl_szB);
488#  undef MAX_DCBZL_SZB
489}
490#endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
491
492#ifdef VGA_s390x
493
494/* Read /proc/cpuinfo. Look for lines like these
495
496   processor 0: version = FF,  identification = 0117C9,  machine = 2064
497
498   and return the machine model or VEX_S390X_MODEL_INVALID on error. */
499
500static UInt VG_(get_machine_model)(void)
501{
502   static struct model_map {
503      HChar name[5];
504      UInt  id;
505   } model_map[] = {
506      { "2064", VEX_S390X_MODEL_Z900 },
507      { "2066", VEX_S390X_MODEL_Z800 },
508      { "2084", VEX_S390X_MODEL_Z990 },
509      { "2086", VEX_S390X_MODEL_Z890 },
510      { "2094", VEX_S390X_MODEL_Z9_EC },
511      { "2096", VEX_S390X_MODEL_Z9_BC },
512      { "2097", VEX_S390X_MODEL_Z10_EC },
513      { "2098", VEX_S390X_MODEL_Z10_BC },
514      { "2817", VEX_S390X_MODEL_Z196 },
515      { "2818", VEX_S390X_MODEL_Z114 },
516   };
517
518   Int    model, n, fh;
519   SysRes fd;
520   SizeT  num_bytes, file_buf_size;
521   HChar *p, *m, *model_name, *file_buf;
522
523   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
524   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
525   if ( sr_isError(fd) ) return VEX_S390X_MODEL_INVALID;
526
527   fh  = sr_Res(fd);
528
529   /* Determine the size of /proc/cpuinfo.
530      Work around broken-ness in /proc file system implementation.
531      fstat returns a zero size for /proc/cpuinfo although it is
532      claimed to be a regular file. */
533   num_bytes = 0;
534   file_buf_size = 1000;
535   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
536   while (42) {
537      n = VG_(read)(fh, file_buf, file_buf_size);
538      if (n < 0) break;
539
540      num_bytes += n;
541      if (n < file_buf_size) break;  /* reached EOF */
542   }
543
544   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
545
546   if (num_bytes > file_buf_size) {
547      VG_(free)( file_buf );
548      VG_(lseek)( fh, 0, VKI_SEEK_SET );
549      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
550      n = VG_(read)( fh, file_buf, num_bytes );
551      if (n < 0) num_bytes = 0;
552   }
553
554   file_buf[num_bytes] = '\0';
555   VG_(close)(fh);
556
557   /* Parse file */
558   model = VEX_S390X_MODEL_INVALID;
559   for (p = file_buf; *p; ++p) {
560      /* Beginning of line */
561     if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
562
563     m = VG_(strstr)( p, "machine" );
564     if (m == NULL) continue;
565
566     p = m + sizeof "machine" - 1;
567     while ( VG_(isspace)( *p ) || *p == '=') {
568       if (*p == '\n') goto next_line;
569       ++p;
570     }
571
572     model_name = p;
573     for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
574       struct model_map *mm = model_map + n;
575       SizeT len = VG_(strlen)( mm->name );
576       if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
577            VG_(isspace)( model_name[len] )) {
578         if (mm->id < model) model = mm->id;
579         p = model_name + len;
580         break;
581       }
582     }
583     /* Skip until end-of-line */
584     while (*p != '\n')
585       ++p;
586   next_line: ;
587   }
588
589   VG_(free)( file_buf );
590   VG_(debugLog)(1, "machine", "model = %s\n",
591                 model == VEX_S390X_MODEL_INVALID ? "UNKNOWN"
592                                                  : model_map[model].name);
593
594   return model;
595}
596
597#endif /* VGA_s390x */
598
599/* Determine what insn set and insn set variant the host has, and
600   record it.  To be called once at system startup.  Returns False if
601   this a CPU incapable of running Valgrind. */
602
603Bool VG_(machine_get_hwcaps)( void )
604{
605   vg_assert(hwcaps_done == False);
606   hwcaps_done = True;
607
608   // Whack default settings into vai, so that we only need to fill in
609   // any interesting bits.
610   LibVEX_default_VexArchInfo(&vai);
611
612#if defined(VGA_x86)
613   { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
614     UInt eax, ebx, ecx, edx, max_extended;
615     UChar vstr[13];
616     vstr[0] = 0;
617
618     if (!VG_(has_cpuid)())
619        /* we can't do cpuid at all.  Give up. */
620        return False;
621
622     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
623     if (eax < 1)
624        /* we can't ask for cpuid(x) for x > 0.  Give up. */
625        return False;
626
627     /* Get processor ID string, and max basic/extended index
628        values. */
629     VG_(memcpy)(&vstr[0], &ebx, 4);
630     VG_(memcpy)(&vstr[4], &edx, 4);
631     VG_(memcpy)(&vstr[8], &ecx, 4);
632     vstr[12] = 0;
633
634     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
635     max_extended = eax;
636
637     /* get capabilities bits into edx */
638     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
639
640     have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
641     have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
642
643     /* cmpxchg8b is a minimum requirement now; if we don't have it we
644        must simply give up.  But all CPUs since Pentium-I have it, so
645        that doesn't seem like much of a restriction. */
646     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
647     if (!have_cx8)
648        return False;
649
650     /* Figure out if this is an AMD that can do LZCNT. */
651     have_lzcnt = False;
652     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
653         && max_extended >= 0x80000001) {
654        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
655        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
656     }
657
658     if (have_sse2 && have_sse1) {
659        va          = VexArchX86;
660        vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
661        vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
662        if (have_lzcnt)
663           vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
664        VG_(machine_x86_have_mxcsr) = 1;
665        return True;
666     }
667
668     if (have_sse1) {
669        va          = VexArchX86;
670        vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
671        VG_(machine_x86_have_mxcsr) = 1;
672        return True;
673     }
674
675     va         = VexArchX86;
676     vai.hwcaps = 0; /*baseline - no sse at all*/
677     VG_(machine_x86_have_mxcsr) = 0;
678     return True;
679   }
680
681#elif defined(VGA_amd64)
682   { Bool have_sse3, have_cx8, have_cx16;
683     Bool have_lzcnt;
684     UInt eax, ebx, ecx, edx, max_extended;
685     UChar vstr[13];
686     vstr[0] = 0;
687
688     if (!VG_(has_cpuid)())
689        /* we can't do cpuid at all.  Give up. */
690        return False;
691
692     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
693     if (eax < 1)
694        /* we can't ask for cpuid(x) for x > 0.  Give up. */
695        return False;
696
697     /* Get processor ID string, and max basic/extended index
698        values. */
699     VG_(memcpy)(&vstr[0], &ebx, 4);
700     VG_(memcpy)(&vstr[4], &edx, 4);
701     VG_(memcpy)(&vstr[8], &ecx, 4);
702     vstr[12] = 0;
703
704     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
705     max_extended = eax;
706
707     /* get capabilities bits into edx */
708     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
709
710     // we assume that SSE1 and SSE2 are available by default
711     have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
712     // ssse3  is ecx:9
713     // sse41  is ecx:19
714     // sse42  is ecx:20
715
716     /* cmpxchg8b is a minimum requirement now; if we don't have it we
717        must simply give up.  But all CPUs since Pentium-I have it, so
718        that doesn't seem like much of a restriction. */
719     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
720     if (!have_cx8)
721        return False;
722
723     /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
724     have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
725
726     /* Figure out if this is an AMD that can do LZCNT. */
727     have_lzcnt = False;
728     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
729         && max_extended >= 0x80000001) {
730        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
731        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
732     }
733
734     va         = VexArchAMD64;
735     vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
736                  | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
737                  | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0);
738     return True;
739   }
740
741#elif defined(VGA_ppc32)
742   {
743     /* Find out which subset of the ppc32 instruction set is supported by
744        verifying whether various ppc32 instructions generate a SIGILL
745        or a SIGFPE. An alternative approach is to check the AT_HWCAP and
746        AT_PLATFORM entries in the ELF auxiliary table -- see also
747        the_iifii.client_auxv in m_main.c.
748      */
749     vki_sigset_t          saved_set, tmp_set;
750     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
751     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
752
753     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
754     Int r;
755
756     /* This is a kludge.  Really we ought to back-convert saved_act
757        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
758        since that's a no-op on all ppc32 platforms so far supported,
759        it's not worth the typing effort.  At least include most basic
760        sanity check: */
761     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
762
763     VG_(sigemptyset)(&tmp_set);
764     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
765     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
766
767     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
768     vg_assert(r == 0);
769
770     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
771     vg_assert(r == 0);
772     tmp_sigill_act = saved_sigill_act;
773
774     r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
775     vg_assert(r == 0);
776     tmp_sigfpe_act = saved_sigfpe_act;
777
778     /* NODEFER: signal handler does not return (from the kernel's point of
779        view), hence if it is to successfully catch a signal more than once,
780        we need the NODEFER flag. */
781     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
782     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
783     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
784     tmp_sigill_act.ksa_handler = handler_unsup_insn;
785     r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
786     vg_assert(r == 0);
787
788     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
789     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
790     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
791     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
792     r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
793     vg_assert(r == 0);
794
795     /* standard FP insns */
796     have_F = True;
797     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
798        have_F = False;
799     } else {
800        __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
801     }
802
803     /* Altivec insns */
804     have_V = True;
805     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
806        have_V = False;
807     } else {
808        /* Unfortunately some older assemblers don't speak Altivec (or
809           choose not to), so to be safe we directly emit the 32-bit
810           word corresponding to "vor 0,0,0".  This fixes a build
811           problem that happens on Debian 3.1 (ppc32), and probably
812           various other places. */
813        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
814     }
815
816     /* General-Purpose optional (fsqrt, fsqrts) */
817     have_FX = True;
818     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
819        have_FX = False;
820     } else {
821        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
822     }
823
824     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
825     have_GX = True;
826     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
827        have_GX = False;
828     } else {
829        __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
830     }
831
832     /* VSX support implies Power ISA 2.06 */
833     have_VX = True;
834     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
835        have_VX = False;
836     } else {
837        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
838     }
839
840     /* Check for Decimal Floating Point (DFP) support. */
841     have_DFP = True;
842     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
843        have_DFP = False;
844     } else {
845        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
846     }
847
848     /* determine dcbz/dcbzl sizes while we still have the signal
849      * handlers registered */
850     find_ppc_dcbz_sz(&vai);
851
852     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
853     vg_assert(r == 0);
854     r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
855     vg_assert(r == 0);
856     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
857     vg_assert(r == 0);
858     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
859                    (Int)have_F, (Int)have_V, (Int)have_FX,
860                    (Int)have_GX, (Int)have_VX, (Int)have_DFP);
861     /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
862     if (have_V && !have_F)
863        have_V = False;
864     if (have_FX && !have_F)
865        have_FX = False;
866     if (have_GX && !have_F)
867        have_GX = False;
868
869     VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
870     VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
871
872     va = VexArchPPC32;
873
874     vai.hwcaps = 0;
875     if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
876     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
877     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
878     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
879     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
880     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
881
882
883     /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
884        called before we're ready to go. */
885     return True;
886   }
887
888#elif defined(VGA_ppc64)
889   {
890     /* Same instruction set detection algorithm as for ppc32. */
891     vki_sigset_t          saved_set, tmp_set;
892     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
893     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
894
895     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
896     Int r;
897
898     /* This is a kludge.  Really we ought to back-convert saved_act
899        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
900        since that's a no-op on all ppc64 platforms so far supported,
901        it's not worth the typing effort.  At least include most basic
902        sanity check: */
903     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
904
905     VG_(sigemptyset)(&tmp_set);
906     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
907     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
908
909     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
910     vg_assert(r == 0);
911
912     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
913     vg_assert(r == 0);
914     tmp_sigill_act = saved_sigill_act;
915
916     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
917     tmp_sigfpe_act = saved_sigfpe_act;
918
919     /* NODEFER: signal handler does not return (from the kernel's point of
920        view), hence if it is to successfully catch a signal more than once,
921        we need the NODEFER flag. */
922     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
923     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
924     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
925     tmp_sigill_act.ksa_handler = handler_unsup_insn;
926     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
927
928     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
929     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
930     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
931     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
932     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
933
934     /* standard FP insns */
935     have_F = True;
936     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
937        have_F = False;
938     } else {
939        __asm__ __volatile__("fmr 0,0");
940     }
941
942     /* Altivec insns */
943     have_V = True;
944     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
945        have_V = False;
946     } else {
947        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
948     }
949
950     /* General-Purpose optional (fsqrt, fsqrts) */
951     have_FX = True;
952     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
953        have_FX = False;
954     } else {
955        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
956     }
957
958     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
959     have_GX = True;
960     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
961        have_GX = False;
962     } else {
963        __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
964     }
965
966     /* VSX support implies Power ISA 2.06 */
967     have_VX = True;
968     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
969        have_VX = False;
970     } else {
971        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
972     }
973
974     /* Check for Decimal Floating Point (DFP) support. */
975     have_DFP = True;
976     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
977        have_DFP = False;
978     } else {
979        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
980     }
981
982     /* determine dcbz/dcbzl sizes while we still have the signal
983      * handlers registered */
984     find_ppc_dcbz_sz(&vai);
985
986     VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
987     VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
988     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
989     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
990                    (Int)have_F, (Int)have_V, (Int)have_FX,
991                    (Int)have_GX, (Int)have_VX, (Int)have_DFP);
992     /* on ppc64, if we don't even have FP, just give up. */
993     if (!have_F)
994        return False;
995
996     VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
997
998     va = VexArchPPC64;
999
1000     vai.hwcaps = 0;
1001     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1002     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1003     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1004     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1005     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1006
1007     /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1008        called before we're ready to go. */
1009     return True;
1010   }
1011
1012#elif defined(VGA_s390x)
1013   {
1014     /* Instruction set detection code borrowed from ppc above. */
1015     vki_sigset_t          saved_set, tmp_set;
1016     vki_sigaction_fromK_t saved_sigill_act;
1017     vki_sigaction_toK_t     tmp_sigill_act;
1018
1019     volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX;
1020     volatile Bool have_STFLE, have_ETF2;
1021     Int r, model;
1022
1023     /* Unblock SIGILL and stash away the old action for that signal */
1024     VG_(sigemptyset)(&tmp_set);
1025     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1026
1027     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1028     vg_assert(r == 0);
1029
1030     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1031     vg_assert(r == 0);
1032     tmp_sigill_act = saved_sigill_act;
1033
1034     /* NODEFER: signal handler does not return (from the kernel's point of
1035        view), hence if it is to successfully catch a signal more than once,
1036        we need the NODEFER flag. */
1037     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1038     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1039     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1040     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1041     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1042
1043     /* Determine hwcaps. Note, we cannot use the stfle insn because it
1044        is not supported on z900. */
1045
1046     have_LDISP = True;
1047     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1048        have_LDISP = False;
1049     } else {
1050       /* BASR loads the address of the next insn into r1. Needed to avoid
1051          a segfault in XY. */
1052        __asm__ __volatile__("basr %%r1,%%r0\n\t"
1053                             ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
1054                             ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1055     }
1056
1057     have_EIMM = True;
1058     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1059        have_EIMM = False;
1060     } else {
1061        __asm__ __volatile__(".long  0xc0090000\n\t"  /* iilf r0,0 */
1062                             ".short 0x0000" : : : "r0", "memory");
1063     }
1064
1065     have_GIE = True;
1066     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1067        have_GIE = False;
1068     } else {
1069        __asm__ __volatile__(".long  0xc2010000\n\t"  /* msfi r0,0 */
1070                             ".short 0x0000" : : : "r0", "memory");
1071     }
1072
1073     have_DFP = True;
1074     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1075        have_DFP = False;
1076     } else {
1077        __asm__ __volatile__(".long 0xb3d20000"
1078                               : : : "r0", "cc", "memory");  /* adtr r0,r0,r0 */
1079     }
1080
1081     have_FGX = True;
1082     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1083        have_FGX = False;
1084     } else {
1085        __asm__ __volatile__(".long 0xb3cd0000" : : : "r0");  /* lgdr r0,f0 */
1086     }
1087
1088     /* Detect presence of the ETF2-enhancement facility using the
1089        STFLE insn. Note, that STFLE and ETF2 were introduced at the same
1090        time, so the absence of STLFE implies the absence of ETF2. */
1091     have_STFLE = True;
1092     have_ETF2 = False;
1093     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1094        have_STFLE = False;
1095     } else {
1096         ULong hoststfle[1];
1097         register ULong reg0 asm("0") = 0; /* one double word available */
1098
1099         __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
1100                              : "=m" (hoststfle), "+d"(reg0)
1101                              : : "cc", "memory");
1102         if (hoststfle[0] & (1ULL << (63 - 24)))
1103             have_ETF2 = True;
1104     }
1105
1106     /* Restore signals */
1107     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1108     vg_assert(r == 0);
1109     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1110     vg_assert(r == 0);
1111     va = VexArchS390X;
1112
1113     model = VG_(get_machine_model)();
1114
1115     VG_(debugLog)(1, "machine", "machine %d  LDISP %d EIMM %d GIE %d DFP %d "
1116                   "FGX %d STFLE %d ETF2 %d\n", model, have_LDISP, have_EIMM,
1117                   have_GIE, have_DFP, have_FGX, have_STFLE, have_ETF2);
1118
1119     if (model == VEX_S390X_MODEL_INVALID) return False;
1120
1121     vai.hwcaps = model;
1122     if (have_LDISP) {
1123        /* Use long displacement only on machines >= z990. For all other machines
1124           it is millicoded and therefore slow. */
1125        if (model >= VEX_S390X_MODEL_Z990)
1126           vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1127     }
1128     if (have_EIMM)  vai.hwcaps |= VEX_HWCAPS_S390X_EIMM;
1129     if (have_GIE)   vai.hwcaps |= VEX_HWCAPS_S390X_GIE;
1130     if (have_DFP)   vai.hwcaps |= VEX_HWCAPS_S390X_DFP;
1131     if (have_FGX)   vai.hwcaps |= VEX_HWCAPS_S390X_FGX;
1132     if (have_ETF2)  vai.hwcaps |= VEX_HWCAPS_S390X_ETF2;
1133     if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1134
1135     VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1136
1137     return True;
1138   }
1139
1140#elif defined(VGA_arm)
1141   {
1142     /* Same instruction set detection algorithm as for ppc32. */
1143     vki_sigset_t          saved_set, tmp_set;
1144     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1145     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1146
1147     volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1148     volatile Int archlevel;
1149     Int r;
1150
1151     /* This is a kludge.  Really we ought to back-convert saved_act
1152        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1153        since that's a no-op on all ppc64 platforms so far supported,
1154        it's not worth the typing effort.  At least include most basic
1155        sanity check: */
1156     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1157
1158     VG_(sigemptyset)(&tmp_set);
1159     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1160     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1161
1162     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1163     vg_assert(r == 0);
1164
1165     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1166     vg_assert(r == 0);
1167     tmp_sigill_act = saved_sigill_act;
1168
1169     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1170     tmp_sigfpe_act = saved_sigfpe_act;
1171
1172     /* NODEFER: signal handler does not return (from the kernel's point of
1173        view), hence if it is to successfully catch a signal more than once,
1174        we need the NODEFER flag. */
1175     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1176     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1177     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1178     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1179     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1180
1181     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1182     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1183     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1184     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1185     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1186
1187     /* VFP insns */
1188     have_VFP = True;
1189     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1190        have_VFP = False;
1191     } else {
1192        __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1193     }
1194     /* There are several generation of VFP extension but they differs very
1195        little so for now we will not distinguish them. */
1196     have_VFP2 = have_VFP;
1197     have_VFP3 = have_VFP;
1198
1199     /* NEON insns */
1200     have_NEON = True;
1201     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1202        have_NEON = False;
1203     } else {
1204        __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1205     }
1206
1207     /* ARM architecture level */
1208     archlevel = 5; /* v5 will be base level */
1209     if (archlevel < 7) {
1210        archlevel = 7;
1211        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1212           archlevel = 5;
1213        } else {
1214           __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1215        }
1216     }
1217     if (archlevel < 6) {
1218        archlevel = 6;
1219        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1220           archlevel = 5;
1221        } else {
1222           __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1223        }
1224     }
1225
1226     VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1227     VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1228     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1229     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1230     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1231
1232     VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1233           archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1234           (Int)have_NEON);
1235
1236     VG_(machine_arm_archlevel) = archlevel;
1237
1238     va = VexArchARM;
1239
1240     vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1241     if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1242     if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1243     if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1244     if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1245
1246     return True;
1247   }
1248
1249#else
1250#  error "Unknown arch"
1251#endif
1252}
1253
1254/* Notify host cpu cache line size. */
1255#if defined(VGA_ppc32)
1256void VG_(machine_ppc32_set_clszB)( Int szB )
1257{
1258   vg_assert(hwcaps_done);
1259
1260   /* Either the value must not have been set yet (zero) or we can
1261      tolerate it being set to the same value multiple times, as the
1262      stack scanning logic in m_main is a bit stupid. */
1263   vg_assert(vai.ppc_cache_line_szB == 0
1264             || vai.ppc_cache_line_szB == szB);
1265
1266   vg_assert(szB == 32 || szB == 64 || szB == 128);
1267   vai.ppc_cache_line_szB = szB;
1268}
1269#endif
1270
1271
1272/* Notify host cpu cache line size. */
1273#if defined(VGA_ppc64)
1274void VG_(machine_ppc64_set_clszB)( Int szB )
1275{
1276   vg_assert(hwcaps_done);
1277
1278   /* Either the value must not have been set yet (zero) or we can
1279      tolerate it being set to the same value multiple times, as the
1280      stack scanning logic in m_main is a bit stupid. */
1281   vg_assert(vai.ppc_cache_line_szB == 0
1282             || vai.ppc_cache_line_szB == szB);
1283
1284   vg_assert(szB == 32 || szB == 64 || szB == 128);
1285   vai.ppc_cache_line_szB = szB;
1286}
1287#endif
1288
1289
1290/* Notify host's ability to handle NEON instructions. */
1291#if defined(VGA_arm)
1292void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1293{
1294   vg_assert(hwcaps_done);
1295   /* There's nothing else we can sanity check. */
1296
1297   if (has_neon) {
1298      vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1299   } else {
1300      vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1301   }
1302}
1303#endif
1304
1305
1306/* Fetch host cpu info, once established. */
1307void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1308                                   /*OUT*/VexArchInfo* pVai )
1309{
1310   vg_assert(hwcaps_done);
1311   if (pVa)  *pVa  = va;
1312   if (pVai) *pVai = vai;
1313}
1314
1315
1316// Given a pointer to a function as obtained by "& functionname" in C,
1317// produce a pointer to the actual entry point for the function.
1318void* VG_(fnptr_to_fnentry)( void* f )
1319{
1320#  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1321      || defined(VGP_arm_linux)                           \
1322      || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
1323      || defined(VGP_s390x_linux)
1324   return f;
1325#  elif defined(VGP_ppc64_linux)
1326   /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1327      3-word function descriptor, of which the first word is the entry
1328      address. */
1329   UWord* descr = (UWord*)f;
1330   return (void*)(descr[0]);
1331#  else
1332#    error "Unknown platform"
1333#  endif
1334}
1335
1336/*--------------------------------------------------------------------*/
1337/*--- end                                                          ---*/
1338/*--------------------------------------------------------------------*/
1339