m_machine.c revision 6bd9dc18c043927c1196caba20a327238a179c42
1/*--------------------------------------------------------------------*/
2/*--- Machine-related stuff.                           m_machine.c ---*/
3/*--------------------------------------------------------------------*/
4
5/*
6   This file is part of Valgrind, a dynamic binary instrumentation
7   framework.
8
9   Copyright (C) 2000-2012 Julian Seward
10      jseward@acm.org
11
12   This program is free software; you can redistribute it and/or
13   modify it under the terms of the GNU General Public License as
14   published by the Free Software Foundation; either version 2 of the
15   License, or (at your option) any later version.
16
17   This program is distributed in the hope that it will be useful, but
18   WITHOUT ANY WARRANTY; without even the implied warranty of
19   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20   General Public License for more details.
21
22   You should have received a copy of the GNU General Public License
23   along with this program; if not, write to the Free Software
24   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25   02111-1307, USA.
26
27   The GNU General Public License is contained in the file COPYING.
28*/
29
30#include "pub_core_basics.h"
31#include "pub_core_vki.h"
32#include "pub_core_libcsetjmp.h"   // setjmp facilities
33#include "pub_core_threadstate.h"
34#include "pub_core_libcassert.h"
35#include "pub_core_libcbase.h"
36#include "pub_core_libcfile.h"
37#include "pub_core_mallocfree.h"
38#include "pub_core_machine.h"
39#include "pub_core_cpuid.h"
40#include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
41#include "pub_core_debuglog.h"
42
43
44#define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
45#define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
46#define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
47
48Addr VG_(get_IP) ( ThreadId tid ) {
49   return INSTR_PTR( VG_(threads)[tid].arch );
50}
51Addr VG_(get_SP) ( ThreadId tid ) {
52   return STACK_PTR( VG_(threads)[tid].arch );
53}
54Addr VG_(get_FP) ( ThreadId tid ) {
55   return FRAME_PTR( VG_(threads)[tid].arch );
56}
57
58void VG_(set_IP) ( ThreadId tid, Addr ip ) {
59   INSTR_PTR( VG_(threads)[tid].arch ) = ip;
60}
61void VG_(set_SP) ( ThreadId tid, Addr sp ) {
62   STACK_PTR( VG_(threads)[tid].arch ) = sp;
63}
64
65void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
66                                ThreadId tid )
67{
68#  if defined(VGA_x86)
69   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
70   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
71   regs->misc.X86.r_ebp
72      = VG_(threads)[tid].arch.vex.guest_EBP;
73#  elif defined(VGA_amd64)
74   regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
75   regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
76   regs->misc.AMD64.r_rbp
77      = VG_(threads)[tid].arch.vex.guest_RBP;
78#  elif defined(VGA_ppc32)
79   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
80   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
81   regs->misc.PPC32.r_lr
82      = VG_(threads)[tid].arch.vex.guest_LR;
83#  elif defined(VGA_ppc64)
84   regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
85   regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
86   regs->misc.PPC64.r_lr
87      = VG_(threads)[tid].arch.vex.guest_LR;
88#  elif defined(VGA_arm)
89   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
90   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
91   regs->misc.ARM.r14
92      = VG_(threads)[tid].arch.vex.guest_R14;
93   regs->misc.ARM.r12
94      = VG_(threads)[tid].arch.vex.guest_R12;
95   regs->misc.ARM.r11
96      = VG_(threads)[tid].arch.vex.guest_R11;
97   regs->misc.ARM.r7
98      = VG_(threads)[tid].arch.vex.guest_R7;
99#  elif defined(VGA_s390x)
100   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
101   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
102   regs->misc.S390X.r_fp
103      = VG_(threads)[tid].arch.vex.guest_r11;
104   regs->misc.S390X.r_lr
105      = VG_(threads)[tid].arch.vex.guest_r14;
106#  elif defined(VGA_mips32)
107   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
108   regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
109   regs->misc.MIPS32.r30
110      = VG_(threads)[tid].arch.vex.guest_r30;
111   regs->misc.MIPS32.r31
112      = VG_(threads)[tid].arch.vex.guest_r31;
113   regs->misc.MIPS32.r28
114      = VG_(threads)[tid].arch.vex.guest_r28;
115#  else
116#    error "Unknown arch"
117#  endif
118}
119
120
121void VG_(set_syscall_return_shadows) ( ThreadId tid,
122                                       /* shadow vals for the result */
123                                       UWord s1res, UWord s2res,
124                                       /* shadow vals for the error val */
125                                       UWord s1err, UWord s2err )
126{
127#  if defined(VGP_x86_linux)
128   VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
129   VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
130#  elif defined(VGP_amd64_linux)
131   VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
132   VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
133#  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
134   VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
135   VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
136#  elif defined(VGP_arm_linux)
137   VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
138   VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
139#  elif defined(VGO_darwin)
140   // GrP fixme darwin syscalls may return more values (2 registers plus error)
141#  elif defined(VGP_s390x_linux)
142   VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
143   VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
144#  elif defined(VGP_mips32_linux)
145   VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
146   VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
147#  else
148#    error "Unknown plat"
149#  endif
150}
151
152void
153VG_(get_shadow_regs_area) ( ThreadId tid,
154                            /*DST*/UChar* dst,
155                            /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
156{
157   void*        src;
158   ThreadState* tst;
159   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
160   vg_assert(VG_(is_valid_tid)(tid));
161   // Bounds check
162   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
163   vg_assert(offset + size <= sizeof(VexGuestArchState));
164   // Copy
165   tst = & VG_(threads)[tid];
166   src = NULL;
167   switch (shadowNo) {
168      case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
169      case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
170      case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
171   }
172   tl_assert(src != NULL);
173   VG_(memcpy)( dst, src, size);
174}
175
176void
177VG_(set_shadow_regs_area) ( ThreadId tid,
178                            /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
179                            /*SRC*/const UChar* src )
180{
181   void*        dst;
182   ThreadState* tst;
183   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
184   vg_assert(VG_(is_valid_tid)(tid));
185   // Bounds check
186   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
187   vg_assert(offset + size <= sizeof(VexGuestArchState));
188   // Copy
189   tst = & VG_(threads)[tid];
190   dst = NULL;
191   switch (shadowNo) {
192      case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
193      case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
194      case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
195   }
196   tl_assert(dst != NULL);
197   VG_(memcpy)( dst, src, size);
198}
199
200
201static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId,
202                                                        const HChar*, Addr))
203{
204   VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
205#if defined(VGA_x86)
206   (*f)(tid, "EAX", vex->guest_EAX);
207   (*f)(tid, "ECX", vex->guest_ECX);
208   (*f)(tid, "EDX", vex->guest_EDX);
209   (*f)(tid, "EBX", vex->guest_EBX);
210   (*f)(tid, "ESI", vex->guest_ESI);
211   (*f)(tid, "EDI", vex->guest_EDI);
212   (*f)(tid, "ESP", vex->guest_ESP);
213   (*f)(tid, "EBP", vex->guest_EBP);
214#elif defined(VGA_amd64)
215   (*f)(tid, "RAX", vex->guest_RAX);
216   (*f)(tid, "RCX", vex->guest_RCX);
217   (*f)(tid, "RDX", vex->guest_RDX);
218   (*f)(tid, "RBX", vex->guest_RBX);
219   (*f)(tid, "RSI", vex->guest_RSI);
220   (*f)(tid, "RDI", vex->guest_RDI);
221   (*f)(tid, "RSP", vex->guest_RSP);
222   (*f)(tid, "RBP", vex->guest_RBP);
223   (*f)(tid, "R8" , vex->guest_R8 );
224   (*f)(tid, "R9" , vex->guest_R9 );
225   (*f)(tid, "R10", vex->guest_R10);
226   (*f)(tid, "R11", vex->guest_R11);
227   (*f)(tid, "R12", vex->guest_R12);
228   (*f)(tid, "R13", vex->guest_R13);
229   (*f)(tid, "R14", vex->guest_R14);
230   (*f)(tid, "R15", vex->guest_R15);
231#elif defined(VGA_ppc32) || defined(VGA_ppc64)
232   (*f)(tid, "GPR0" , vex->guest_GPR0 );
233   (*f)(tid, "GPR1" , vex->guest_GPR1 );
234   (*f)(tid, "GPR2" , vex->guest_GPR2 );
235   (*f)(tid, "GPR3" , vex->guest_GPR3 );
236   (*f)(tid, "GPR4" , vex->guest_GPR4 );
237   (*f)(tid, "GPR5" , vex->guest_GPR5 );
238   (*f)(tid, "GPR6" , vex->guest_GPR6 );
239   (*f)(tid, "GPR7" , vex->guest_GPR7 );
240   (*f)(tid, "GPR8" , vex->guest_GPR8 );
241   (*f)(tid, "GPR9" , vex->guest_GPR9 );
242   (*f)(tid, "GPR10", vex->guest_GPR10);
243   (*f)(tid, "GPR11", vex->guest_GPR11);
244   (*f)(tid, "GPR12", vex->guest_GPR12);
245   (*f)(tid, "GPR13", vex->guest_GPR13);
246   (*f)(tid, "GPR14", vex->guest_GPR14);
247   (*f)(tid, "GPR15", vex->guest_GPR15);
248   (*f)(tid, "GPR16", vex->guest_GPR16);
249   (*f)(tid, "GPR17", vex->guest_GPR17);
250   (*f)(tid, "GPR18", vex->guest_GPR18);
251   (*f)(tid, "GPR19", vex->guest_GPR19);
252   (*f)(tid, "GPR20", vex->guest_GPR20);
253   (*f)(tid, "GPR21", vex->guest_GPR21);
254   (*f)(tid, "GPR22", vex->guest_GPR22);
255   (*f)(tid, "GPR23", vex->guest_GPR23);
256   (*f)(tid, "GPR24", vex->guest_GPR24);
257   (*f)(tid, "GPR25", vex->guest_GPR25);
258   (*f)(tid, "GPR26", vex->guest_GPR26);
259   (*f)(tid, "GPR27", vex->guest_GPR27);
260   (*f)(tid, "GPR28", vex->guest_GPR28);
261   (*f)(tid, "GPR29", vex->guest_GPR29);
262   (*f)(tid, "GPR30", vex->guest_GPR30);
263   (*f)(tid, "GPR31", vex->guest_GPR31);
264   (*f)(tid, "CTR"  , vex->guest_CTR  );
265   (*f)(tid, "LR"   , vex->guest_LR   );
266#elif defined(VGA_arm)
267   (*f)(tid, "R0" , vex->guest_R0 );
268   (*f)(tid, "R1" , vex->guest_R1 );
269   (*f)(tid, "R2" , vex->guest_R2 );
270   (*f)(tid, "R3" , vex->guest_R3 );
271   (*f)(tid, "R4" , vex->guest_R4 );
272   (*f)(tid, "R5" , vex->guest_R5 );
273   (*f)(tid, "R6" , vex->guest_R6 );
274   (*f)(tid, "R8" , vex->guest_R8 );
275   (*f)(tid, "R9" , vex->guest_R9 );
276   (*f)(tid, "R10", vex->guest_R10);
277   (*f)(tid, "R11", vex->guest_R11);
278   (*f)(tid, "R12", vex->guest_R12);
279   (*f)(tid, "R13", vex->guest_R13);
280   (*f)(tid, "R14", vex->guest_R14);
281#elif defined(VGA_s390x)
282   (*f)(tid, "r0" , vex->guest_r0 );
283   (*f)(tid, "r1" , vex->guest_r1 );
284   (*f)(tid, "r2" , vex->guest_r2 );
285   (*f)(tid, "r3" , vex->guest_r3 );
286   (*f)(tid, "r4" , vex->guest_r4 );
287   (*f)(tid, "r5" , vex->guest_r5 );
288   (*f)(tid, "r6" , vex->guest_r6 );
289   (*f)(tid, "r7" , vex->guest_r7 );
290   (*f)(tid, "r8" , vex->guest_r8 );
291   (*f)(tid, "r9" , vex->guest_r9 );
292   (*f)(tid, "r10", vex->guest_r10);
293   (*f)(tid, "r11", vex->guest_r11);
294   (*f)(tid, "r12", vex->guest_r12);
295   (*f)(tid, "r13", vex->guest_r13);
296   (*f)(tid, "r14", vex->guest_r14);
297   (*f)(tid, "r15", vex->guest_r15);
298#elif defined(VGA_mips32)
299   (*f)(tid, "r0" , vex->guest_r0 );
300   (*f)(tid, "r1" , vex->guest_r1 );
301   (*f)(tid, "r2" , vex->guest_r2 );
302   (*f)(tid, "r3" , vex->guest_r3 );
303   (*f)(tid, "r4" , vex->guest_r4 );
304   (*f)(tid, "r5" , vex->guest_r5 );
305   (*f)(tid, "r6" , vex->guest_r6 );
306   (*f)(tid, "r7" , vex->guest_r7 );
307   (*f)(tid, "r8" , vex->guest_r8 );
308   (*f)(tid, "r9" , vex->guest_r9 );
309   (*f)(tid, "r10", vex->guest_r10);
310   (*f)(tid, "r11", vex->guest_r11);
311   (*f)(tid, "r12", vex->guest_r12);
312   (*f)(tid, "r13", vex->guest_r13);
313   (*f)(tid, "r14", vex->guest_r14);
314   (*f)(tid, "r15", vex->guest_r15);
315   (*f)(tid, "r16", vex->guest_r16);
316   (*f)(tid, "r17", vex->guest_r17);
317   (*f)(tid, "r18", vex->guest_r18);
318   (*f)(tid, "r19", vex->guest_r19);
319   (*f)(tid, "r20", vex->guest_r20);
320   (*f)(tid, "r21", vex->guest_r21);
321   (*f)(tid, "r22", vex->guest_r22);
322   (*f)(tid, "r23", vex->guest_r23);
323   (*f)(tid, "r24", vex->guest_r24);
324   (*f)(tid, "r25", vex->guest_r25);
325   (*f)(tid, "r26", vex->guest_r26);
326   (*f)(tid, "r27", vex->guest_r27);
327   (*f)(tid, "r28", vex->guest_r28);
328   (*f)(tid, "r29", vex->guest_r29);
329   (*f)(tid, "r30", vex->guest_r30);
330   (*f)(tid, "r31", vex->guest_r31);
331#else
332#  error Unknown arch
333#endif
334}
335
336
337void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord))
338{
339   ThreadId tid;
340
341   for (tid = 1; tid < VG_N_THREADS; tid++) {
342      if (VG_(is_valid_tid)(tid)) {
343         apply_to_GPs_of_tid(tid, f);
344      }
345   }
346}
347
348void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
349{
350   *tid = (ThreadId)(-1);
351}
352
353Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
354                            /*OUT*/Addr* stack_min,
355                            /*OUT*/Addr* stack_max)
356{
357   ThreadId i;
358   for (i = (*tid)+1; i < VG_N_THREADS; i++) {
359      if (i == VG_INVALID_THREADID)
360         continue;
361      if (VG_(threads)[i].status != VgTs_Empty) {
362         *tid       = i;
363         *stack_min = VG_(get_SP)(i);
364         *stack_max = VG_(threads)[i].client_stack_highest_word;
365         return True;
366      }
367   }
368   return False;
369}
370
371Addr VG_(thread_get_stack_max)(ThreadId tid)
372{
373   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
374   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
375   return VG_(threads)[tid].client_stack_highest_word;
376}
377
378SizeT VG_(thread_get_stack_size)(ThreadId tid)
379{
380   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
381   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
382   return VG_(threads)[tid].client_stack_szB;
383}
384
385Addr VG_(thread_get_altstack_min)(ThreadId tid)
386{
387   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
388   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
389   return (Addr)VG_(threads)[tid].altstack.ss_sp;
390}
391
392SizeT VG_(thread_get_altstack_size)(ThreadId tid)
393{
394   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
395   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
396   return VG_(threads)[tid].altstack.ss_size;
397}
398
399//-------------------------------------------------------------
400/* Details about the capabilities of the underlying (host) CPU.  These
401   details are acquired by (1) enquiring with the CPU at startup, or
402   (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
403   line size).  It's a bit nasty in the sense that there's no obvious
404   way to stop uses of some of this info before it's ready to go.
405   See pub_core_machine.h for more information about that.
406
407   VG_(machine_get_hwcaps) may use signals (although it attempts to
408   leave signal state unchanged) and therefore should only be
409   called before m_main sets up the client's signal state.
410*/
411
412/* --------- State --------- */
413static Bool hwcaps_done = False;
414
415/* --- all archs --- */
416static VexArch     va = VexArch_INVALID;
417static VexArchInfo vai;
418
419#if defined(VGA_x86)
420UInt VG_(machine_x86_have_mxcsr) = 0;
421#endif
422#if defined(VGA_ppc32)
423UInt VG_(machine_ppc32_has_FP)  = 0;
424UInt VG_(machine_ppc32_has_VMX) = 0;
425#endif
426#if defined(VGA_ppc64)
427ULong VG_(machine_ppc64_has_VMX) = 0;
428#endif
429#if defined(VGA_arm)
430Int VG_(machine_arm_archlevel) = 4;
431#endif
432
433/* fixs390: anything for s390x here ? */
434
435/* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
436   testing, so we need a VG_MINIMAL_JMP_BUF. */
437#if defined(VGA_ppc32) || defined(VGA_ppc64) \
438    || defined(VGA_arm) || defined(VGA_s390x)
439#include "pub_tool_libcsetjmp.h"
440static VG_MINIMAL_JMP_BUF(env_unsup_insn);
441static void handler_unsup_insn ( Int x ) {
442   VG_MINIMAL_LONGJMP(env_unsup_insn);
443}
444#endif
445
446
447/* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
448 * handlers are installed.  Determines the the sizes affected by dcbz
449 * and dcbzl instructions and updates the given VexArchInfo structure
450 * accordingly.
451 *
452 * Not very defensive: assumes that as long as the dcbz/dcbzl
453 * instructions don't raise a SIGILL, that they will zero an aligned,
454 * contiguous block of memory of a sensible size. */
455#if defined(VGA_ppc32) || defined(VGA_ppc64)
456static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
457{
458   Int dcbz_szB = 0;
459   Int dcbzl_szB;
460#  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
461   char test_block[4*MAX_DCBZL_SZB];
462   char *aligned = test_block;
463   Int i;
464
465   /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
466   aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
467   vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
468
469   /* dcbz often clears 32B, although sometimes whatever the native cache
470    * block size is */
471   VG_(memset)(test_block, 0xff, sizeof(test_block));
472   __asm__ __volatile__("dcbz 0,%0"
473                        : /*out*/
474                        : "r" (aligned) /*in*/
475                        : "memory" /*clobber*/);
476   for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
477      if (!test_block[i])
478         ++dcbz_szB;
479   }
480   vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
481
482   /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
483   if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
484      dcbzl_szB = 0; /* indicates unsupported */
485   }
486   else {
487      VG_(memset)(test_block, 0xff, sizeof(test_block));
488      /* some older assemblers won't understand the dcbzl instruction
489       * variant, so we directly emit the instruction ourselves */
490      __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
491                           : /*out*/
492                           : "r" (aligned) /*in*/
493                           : "memory", "r9" /*clobber*/);
494      for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
495         if (!test_block[i])
496            ++dcbzl_szB;
497      }
498      vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
499   }
500
501   arch_info->ppc_dcbz_szB  = dcbz_szB;
502   arch_info->ppc_dcbzl_szB = dcbzl_szB;
503
504   VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
505                 dcbz_szB, dcbzl_szB);
506#  undef MAX_DCBZL_SZB
507}
508#endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
509
510#ifdef VGA_s390x
511
512/* Read /proc/cpuinfo. Look for lines like these
513
514   processor 0: version = FF,  identification = 0117C9,  machine = 2064
515
516   and return the machine model. If the machine model could not be determined
517   or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
518
519static UInt VG_(get_machine_model)(void)
520{
521   static struct model_map {
522      HChar name[5];
523      UInt  id;
524   } model_map[] = {
525      { "2064", VEX_S390X_MODEL_Z900 },
526      { "2066", VEX_S390X_MODEL_Z800 },
527      { "2084", VEX_S390X_MODEL_Z990 },
528      { "2086", VEX_S390X_MODEL_Z890 },
529      { "2094", VEX_S390X_MODEL_Z9_EC },
530      { "2096", VEX_S390X_MODEL_Z9_BC },
531      { "2097", VEX_S390X_MODEL_Z10_EC },
532      { "2098", VEX_S390X_MODEL_Z10_BC },
533      { "2817", VEX_S390X_MODEL_Z196 },
534      { "2818", VEX_S390X_MODEL_Z114 },
535      { "2827", VEX_S390X_MODEL_ZEC12 },
536   };
537
538   Int    model, n, fh;
539   SysRes fd;
540   SizeT  num_bytes, file_buf_size;
541   HChar *p, *m, *model_name, *file_buf;
542
543   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
544   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
545   if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
546
547   fh  = sr_Res(fd);
548
549   /* Determine the size of /proc/cpuinfo.
550      Work around broken-ness in /proc file system implementation.
551      fstat returns a zero size for /proc/cpuinfo although it is
552      claimed to be a regular file. */
553   num_bytes = 0;
554   file_buf_size = 1000;
555   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
556   while (42) {
557      n = VG_(read)(fh, file_buf, file_buf_size);
558      if (n < 0) break;
559
560      num_bytes += n;
561      if (n < file_buf_size) break;  /* reached EOF */
562   }
563
564   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
565
566   if (num_bytes > file_buf_size) {
567      VG_(free)( file_buf );
568      VG_(lseek)( fh, 0, VKI_SEEK_SET );
569      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
570      n = VG_(read)( fh, file_buf, num_bytes );
571      if (n < 0) num_bytes = 0;
572   }
573
574   file_buf[num_bytes] = '\0';
575   VG_(close)(fh);
576
577   /* Parse file */
578   model = VEX_S390X_MODEL_UNKNOWN;
579   for (p = file_buf; *p; ++p) {
580      /* Beginning of line */
581     if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
582
583     m = VG_(strstr)( p, "machine" );
584     if (m == NULL) continue;
585
586     p = m + sizeof "machine" - 1;
587     while ( VG_(isspace)( *p ) || *p == '=') {
588       if (*p == '\n') goto next_line;
589       ++p;
590     }
591
592     model_name = p;
593     for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
594       struct model_map *mm = model_map + n;
595       SizeT len = VG_(strlen)( mm->name );
596       if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
597            VG_(isspace)( model_name[len] )) {
598         if (mm->id < model) model = mm->id;
599         p = model_name + len;
600         break;
601       }
602     }
603     /* Skip until end-of-line */
604     while (*p != '\n')
605       ++p;
606   next_line: ;
607   }
608
609   VG_(free)( file_buf );
610   VG_(debugLog)(1, "machine", "model = %s\n",
611                 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
612                                                  : model_map[model].name);
613   return model;
614}
615
616#endif /* VGA_s390x */
617
618#ifdef VGA_mips32
619
620/* Read /proc/cpuinfo and return the machine model. */
621static UInt VG_(get_machine_model)(void)
622{
623   char *search_MIPS_str = "MIPS";
624   char *search_Broadcom_str = "Broadcom";
625   Int    n, fh;
626   SysRes fd;
627   SizeT  num_bytes, file_buf_size;
628   HChar  *file_buf;
629
630   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
631   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
632   if ( sr_isError(fd) ) return -1;
633
634   fh  = sr_Res(fd);
635
636   /* Determine the size of /proc/cpuinfo.
637      Work around broken-ness in /proc file system implementation.
638      fstat returns a zero size for /proc/cpuinfo although it is
639      claimed to be a regular file. */
640   num_bytes = 0;
641   file_buf_size = 1000;
642   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
643   while (42) {
644      n = VG_(read)(fh, file_buf, file_buf_size);
645      if (n < 0) break;
646
647      num_bytes += n;
648      if (n < file_buf_size) break;  /* reached EOF */
649   }
650
651   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
652
653   if (num_bytes > file_buf_size) {
654      VG_(free)( file_buf );
655      VG_(lseek)( fh, 0, VKI_SEEK_SET );
656      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
657      n = VG_(read)( fh, file_buf, num_bytes );
658      if (n < 0) num_bytes = 0;
659   }
660
661   file_buf[num_bytes] = '\0';
662   VG_(close)(fh);
663
664   /* Parse file */
665   if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
666       return VEX_PRID_COMP_BROADCOM;
667   if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
668       return VEX_PRID_COMP_MIPS;
669
670   /* Did not find string in the proc file. */
671   return -1;
672}
673
674#endif
675
676/* Determine what insn set and insn set variant the host has, and
677   record it.  To be called once at system startup.  Returns False if
678   this a CPU incapable of running Valgrind.
679   Also determine information about the caches on this host. */
680
681Bool VG_(machine_get_hwcaps)( void )
682{
683   vg_assert(hwcaps_done == False);
684   hwcaps_done = True;
685
686   // Whack default settings into vai, so that we only need to fill in
687   // any interesting bits.
688   LibVEX_default_VexArchInfo(&vai);
689
690#if defined(VGA_x86)
691   { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
692     UInt eax, ebx, ecx, edx, max_extended;
693     HChar vstr[13];
694     vstr[0] = 0;
695
696     if (!VG_(has_cpuid)())
697        /* we can't do cpuid at all.  Give up. */
698        return False;
699
700     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
701     if (eax < 1)
702        /* we can't ask for cpuid(x) for x > 0.  Give up. */
703        return False;
704
705     /* Get processor ID string, and max basic/extended index
706        values. */
707     VG_(memcpy)(&vstr[0], &ebx, 4);
708     VG_(memcpy)(&vstr[4], &edx, 4);
709     VG_(memcpy)(&vstr[8], &ecx, 4);
710     vstr[12] = 0;
711
712     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
713     max_extended = eax;
714
715     /* get capabilities bits into edx */
716     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
717
718     have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
719     have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
720
721     /* cmpxchg8b is a minimum requirement now; if we don't have it we
722        must simply give up.  But all CPUs since Pentium-I have it, so
723        that doesn't seem like much of a restriction. */
724     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
725     if (!have_cx8)
726        return False;
727
728     /* Figure out if this is an AMD that can do LZCNT. */
729     have_lzcnt = False;
730     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
731         && max_extended >= 0x80000001) {
732        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
733        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
734     }
735
736     va = VexArchX86;
737     if (have_sse2 && have_sse1) {
738        vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
739        vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
740        if (have_lzcnt)
741           vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
742        VG_(machine_x86_have_mxcsr) = 1;
743     } else if (have_sse1) {
744        vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
745        VG_(machine_x86_have_mxcsr) = 1;
746     } else {
747       vai.hwcaps = 0; /*baseline - no sse at all*/
748       VG_(machine_x86_have_mxcsr) = 0;
749     }
750
751     VG_(machine_get_cache_info)(&vai);
752
753     return True;
754   }
755
756#elif defined(VGA_amd64)
757   { Bool have_sse3, have_cx8, have_cx16;
758     Bool have_lzcnt, have_avx /*, have_fma*/;
759     UInt eax, ebx, ecx, edx, max_extended;
760     HChar vstr[13];
761     vstr[0] = 0;
762
763     if (!VG_(has_cpuid)())
764        /* we can't do cpuid at all.  Give up. */
765        return False;
766
767     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
768     if (eax < 1)
769        /* we can't ask for cpuid(x) for x > 0.  Give up. */
770        return False;
771
772     /* Get processor ID string, and max basic/extended index
773        values. */
774     VG_(memcpy)(&vstr[0], &ebx, 4);
775     VG_(memcpy)(&vstr[4], &edx, 4);
776     VG_(memcpy)(&vstr[8], &ecx, 4);
777     vstr[12] = 0;
778
779     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
780     max_extended = eax;
781
782     /* get capabilities bits into edx */
783     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
784
785     // we assume that SSE1 and SSE2 are available by default
786     have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
787     // ssse3   is ecx:9
788     // sse41   is ecx:19
789     // sse42   is ecx:20
790
791     // osxsave is ecx:27
792     // avx     is ecx:28
793     // fma     is ecx:12
794     have_avx = False;
795     /* have_fma = False; */
796     if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) {
797        /* processor supports AVX instructions and XGETBV is enabled
798           by OS */
799        ULong w;
800        __asm__ __volatile__("movq $0,%%rcx ; "
801                             ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
802                             "movq %%rax,%0"
803                             :/*OUT*/"=r"(w) :/*IN*/
804                             :/*TRASH*/"rdx","rcx");
805        if ((w & 6) == 6) {
806           /* OS has enabled both XMM and YMM state support */
807           have_avx = True;
808           /* have_fma = (ecx & (1<<12)) != 0; */
809           /* have_fma: Probably correct, but gcc complains due to
810              unusedness. &*/
811        }
812     }
813
814
815     /* cmpxchg8b is a minimum requirement now; if we don't have it we
816        must simply give up.  But all CPUs since Pentium-I have it, so
817        that doesn't seem like much of a restriction. */
818     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
819     if (!have_cx8)
820        return False;
821
822     /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
823     have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
824
825     /* Figure out if this is an AMD that can do LZCNT. */
826     have_lzcnt = False;
827     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
828         && max_extended >= 0x80000001) {
829        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
830        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
831     }
832
833     va         = VexArchAMD64;
834     vai.hwcaps = (have_sse3  ? VEX_HWCAPS_AMD64_SSE3  : 0)
835                | (have_cx16  ? VEX_HWCAPS_AMD64_CX16  : 0)
836                | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
837                | (have_avx   ? VEX_HWCAPS_AMD64_AVX   : 0);
838
839     VG_(machine_get_cache_info)(&vai);
840
841     return True;
842   }
843
844#elif defined(VGA_ppc32)
845   {
846     /* Find out which subset of the ppc32 instruction set is supported by
847        verifying whether various ppc32 instructions generate a SIGILL
848        or a SIGFPE. An alternative approach is to check the AT_HWCAP and
849        AT_PLATFORM entries in the ELF auxiliary table -- see also
850        the_iifii.client_auxv in m_main.c.
851      */
852     vki_sigset_t          saved_set, tmp_set;
853     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
854     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
855
856     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
857     Int r;
858
859     /* This is a kludge.  Really we ought to back-convert saved_act
860        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
861        since that's a no-op on all ppc32 platforms so far supported,
862        it's not worth the typing effort.  At least include most basic
863        sanity check: */
864     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
865
866     VG_(sigemptyset)(&tmp_set);
867     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
868     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
869
870     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
871     vg_assert(r == 0);
872
873     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
874     vg_assert(r == 0);
875     tmp_sigill_act = saved_sigill_act;
876
877     r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
878     vg_assert(r == 0);
879     tmp_sigfpe_act = saved_sigfpe_act;
880
881     /* NODEFER: signal handler does not return (from the kernel's point of
882        view), hence if it is to successfully catch a signal more than once,
883        we need the NODEFER flag. */
884     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
885     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
886     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
887     tmp_sigill_act.ksa_handler = handler_unsup_insn;
888     r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
889     vg_assert(r == 0);
890
891     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
892     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
893     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
894     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
895     r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
896     vg_assert(r == 0);
897
898     /* standard FP insns */
899     have_F = True;
900     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
901        have_F = False;
902     } else {
903        __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
904     }
905
906     /* Altivec insns */
907     have_V = True;
908     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
909        have_V = False;
910     } else {
911        /* Unfortunately some older assemblers don't speak Altivec (or
912           choose not to), so to be safe we directly emit the 32-bit
913           word corresponding to "vor 0,0,0".  This fixes a build
914           problem that happens on Debian 3.1 (ppc32), and probably
915           various other places. */
916        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
917     }
918
919     /* General-Purpose optional (fsqrt, fsqrts) */
920     have_FX = True;
921     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
922        have_FX = False;
923     } else {
924        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
925     }
926
927     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
928     have_GX = True;
929     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
930        have_GX = False;
931     } else {
932        __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
933     }
934
935     /* VSX support implies Power ISA 2.06 */
936     have_VX = True;
937     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
938        have_VX = False;
939     } else {
940        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
941     }
942
943     /* Check for Decimal Floating Point (DFP) support. */
944     have_DFP = True;
945     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
946        have_DFP = False;
947     } else {
948        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
949     }
950
951     /* determine dcbz/dcbzl sizes while we still have the signal
952      * handlers registered */
953     find_ppc_dcbz_sz(&vai);
954
955     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
956     vg_assert(r == 0);
957     r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
958     vg_assert(r == 0);
959     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
960     vg_assert(r == 0);
961     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
962                    (Int)have_F, (Int)have_V, (Int)have_FX,
963                    (Int)have_GX, (Int)have_VX, (Int)have_DFP);
964     /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
965     if (have_V && !have_F)
966        have_V = False;
967     if (have_FX && !have_F)
968        have_FX = False;
969     if (have_GX && !have_F)
970        have_GX = False;
971
972     VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
973     VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
974
975     va = VexArchPPC32;
976
977     vai.hwcaps = 0;
978     if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
979     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
980     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
981     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
982     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
983     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
984
985     VG_(machine_get_cache_info)(&vai);
986
987     /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
988        called before we're ready to go. */
989     return True;
990   }
991
992#elif defined(VGA_ppc64)
993   {
994     /* Same instruction set detection algorithm as for ppc32. */
995     vki_sigset_t          saved_set, tmp_set;
996     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
997     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
998
999     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
1000     Int r;
1001
1002     /* This is a kludge.  Really we ought to back-convert saved_act
1003        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1004        since that's a no-op on all ppc64 platforms so far supported,
1005        it's not worth the typing effort.  At least include most basic
1006        sanity check: */
1007     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1008
1009     VG_(sigemptyset)(&tmp_set);
1010     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1011     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1012
1013     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1014     vg_assert(r == 0);
1015
1016     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1017     vg_assert(r == 0);
1018     tmp_sigill_act = saved_sigill_act;
1019
1020     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1021     tmp_sigfpe_act = saved_sigfpe_act;
1022
1023     /* NODEFER: signal handler does not return (from the kernel's point of
1024        view), hence if it is to successfully catch a signal more than once,
1025        we need the NODEFER flag. */
1026     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1027     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1028     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1029     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1030     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1031
1032     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1033     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1034     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1035     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1036     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1037
1038     /* standard FP insns */
1039     have_F = True;
1040     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1041        have_F = False;
1042     } else {
1043        __asm__ __volatile__("fmr 0,0");
1044     }
1045
1046     /* Altivec insns */
1047     have_V = True;
1048     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1049        have_V = False;
1050     } else {
1051        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1052     }
1053
1054     /* General-Purpose optional (fsqrt, fsqrts) */
1055     have_FX = True;
1056     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1057        have_FX = False;
1058     } else {
1059        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1060     }
1061
1062     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1063     have_GX = True;
1064     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1065        have_GX = False;
1066     } else {
1067        __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1068     }
1069
1070     /* VSX support implies Power ISA 2.06 */
1071     have_VX = True;
1072     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1073        have_VX = False;
1074     } else {
1075        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1076     }
1077
1078     /* Check for Decimal Floating Point (DFP) support. */
1079     have_DFP = True;
1080     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1081        have_DFP = False;
1082     } else {
1083        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1084     }
1085
1086     /* determine dcbz/dcbzl sizes while we still have the signal
1087      * handlers registered */
1088     find_ppc_dcbz_sz(&vai);
1089
1090     VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1091     VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1092     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1093     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
1094                    (Int)have_F, (Int)have_V, (Int)have_FX,
1095                    (Int)have_GX, (Int)have_VX, (Int)have_DFP);
1096     /* on ppc64, if we don't even have FP, just give up. */
1097     if (!have_F)
1098        return False;
1099
1100     VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1101
1102     va = VexArchPPC64;
1103
1104     vai.hwcaps = 0;
1105     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1106     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1107     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1108     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1109     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1110
1111     VG_(machine_get_cache_info)(&vai);
1112
1113     /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1114        called before we're ready to go. */
1115     return True;
1116   }
1117
1118#elif defined(VGA_s390x)
1119   {
1120     /* Instruction set detection code borrowed from ppc above. */
1121     vki_sigset_t          saved_set, tmp_set;
1122     vki_sigaction_fromK_t saved_sigill_act;
1123     vki_sigaction_toK_t     tmp_sigill_act;
1124
1125     volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX;
1126     volatile Bool have_STFLE, have_ETF2, have_ETF3, have_STCKF, have_FPEXT;
1127     Int r, model;
1128
1129     /* Unblock SIGILL and stash away the old action for that signal */
1130     VG_(sigemptyset)(&tmp_set);
1131     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1132
1133     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1134     vg_assert(r == 0);
1135
1136     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1137     vg_assert(r == 0);
1138     tmp_sigill_act = saved_sigill_act;
1139
1140     /* NODEFER: signal handler does not return (from the kernel's point of
1141        view), hence if it is to successfully catch a signal more than once,
1142        we need the NODEFER flag. */
1143     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1144     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1145     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1146     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1147     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1148
1149     /* Determine hwcaps. Note, we cannot use the stfle insn because it
1150        is not supported on z900. */
1151
1152     have_LDISP = True;
1153     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1154        have_LDISP = False;
1155     } else {
1156       /* BASR loads the address of the next insn into r1. Needed to avoid
1157          a segfault in XY. */
1158        __asm__ __volatile__("basr %%r1,%%r0\n\t"
1159                             ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
1160                             ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1161     }
1162
1163     have_EIMM = True;
1164     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1165        have_EIMM = False;
1166     } else {
1167        __asm__ __volatile__(".long  0xc0090000\n\t"  /* iilf r0,0 */
1168                             ".short 0x0000" : : : "r0", "memory");
1169     }
1170
1171     have_GIE = True;
1172     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1173        have_GIE = False;
1174     } else {
1175        __asm__ __volatile__(".long  0xc2010000\n\t"  /* msfi r0,0 */
1176                             ".short 0x0000" : : : "r0", "memory");
1177     }
1178
1179     have_DFP = True;
1180     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1181        have_DFP = False;
1182     } else {
1183        __asm__ __volatile__(".long 0xb3d20000"
1184                               : : : "r0", "cc", "memory");  /* adtr r0,r0,r0 */
1185     }
1186
1187     have_FGX = True;
1188     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1189        have_FGX = False;
1190     } else {
1191        __asm__ __volatile__(".long 0xb3cd0000" : : : "r0");  /* lgdr r0,f0 */
1192     }
1193
1194     /* Detect presence of certain facilities using the STFLE insn.
1195        Note, that these facilities were introduced at the same time or later
1196        as STFLE, so the absence of STLFE implies the absence of the facility
1197        we're trying to detect. */
1198     have_STFLE = True;
1199     have_ETF2 = False;
1200     have_ETF3 = False;
1201     have_STCKF = False;
1202     have_FPEXT = False;
1203     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1204        have_STFLE = False;
1205     } else {
1206         ULong hoststfle[1];
1207         register ULong reg0 asm("0") = 0; /* one double word available */
1208
1209         __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
1210                              : "=m" (hoststfle), "+d"(reg0)
1211                              : : "cc", "memory");
1212         if (hoststfle[0] & (1ULL << (63 - 24)))
1213             have_ETF2 = True;
1214         if (hoststfle[0] & (1ULL << (63 - 30)))
1215             have_ETF3 = True;
1216         if (hoststfle[0] & (1ULL << (63 - 25)))
1217             have_STCKF = True;
1218         if (hoststfle[0] & (1ULL << (63 - 37)))
1219             have_FPEXT = True;
1220     }
1221
1222     /* Restore signals */
1223     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1224     vg_assert(r == 0);
1225     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1226     vg_assert(r == 0);
1227     va = VexArchS390X;
1228
1229     model = VG_(get_machine_model)();
1230
1231     /* If the model is "unknown" don't treat this as an error. Assume
1232        this is a brand-new machine model for which we don't have the
1233        identification yet. Keeping fingers crossed. */
1234
1235     VG_(debugLog)(1, "machine", "machine %d  LDISP %d EIMM %d GIE %d DFP %d "
1236                   "FGX %d STFLE %d ETF2 %d ETF3 %d STCKF %d\n",
1237                   model, have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX,
1238                   have_STFLE, have_ETF2, have_ETF3, have_STCKF);
1239
1240     vai.hwcaps = model;
1241     if (have_LDISP) {
1242        /* Use long displacement only on machines >= z990. For all other machines
1243           it is millicoded and therefore slow. */
1244        if (model >= VEX_S390X_MODEL_Z990)
1245           vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1246     }
1247     if (have_EIMM)  vai.hwcaps |= VEX_HWCAPS_S390X_EIMM;
1248     if (have_GIE)   vai.hwcaps |= VEX_HWCAPS_S390X_GIE;
1249     if (have_DFP)   vai.hwcaps |= VEX_HWCAPS_S390X_DFP;
1250     if (have_FGX)   vai.hwcaps |= VEX_HWCAPS_S390X_FGX;
1251     if (have_ETF2)  vai.hwcaps |= VEX_HWCAPS_S390X_ETF2;
1252     if (have_ETF3)  vai.hwcaps |= VEX_HWCAPS_S390X_ETF3;
1253     if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1254     if (have_STCKF) vai.hwcaps |= VEX_HWCAPS_S390X_STCKF;
1255     if (have_FPEXT) vai.hwcaps |= VEX_HWCAPS_S390X_FPEXT;
1256
1257     VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1258
1259     VG_(machine_get_cache_info)(&vai);
1260
1261     return True;
1262   }
1263
1264#elif defined(VGA_arm)
1265   {
1266     /* Same instruction set detection algorithm as for ppc32. */
1267     vki_sigset_t          saved_set, tmp_set;
1268     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1269     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1270
1271     volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1272     volatile Int archlevel;
1273     Int r;
1274
1275     /* This is a kludge.  Really we ought to back-convert saved_act
1276        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1277        since that's a no-op on all ppc64 platforms so far supported,
1278        it's not worth the typing effort.  At least include most basic
1279        sanity check: */
1280     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1281
1282     VG_(sigemptyset)(&tmp_set);
1283     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1284     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1285
1286     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1287     vg_assert(r == 0);
1288
1289     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1290     vg_assert(r == 0);
1291     tmp_sigill_act = saved_sigill_act;
1292
1293     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1294     tmp_sigfpe_act = saved_sigfpe_act;
1295
1296     /* NODEFER: signal handler does not return (from the kernel's point of
1297        view), hence if it is to successfully catch a signal more than once,
1298        we need the NODEFER flag. */
1299     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1300     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1301     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1302     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1303     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1304
1305     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1306     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1307     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1308     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1309     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1310
1311     /* VFP insns */
1312     have_VFP = True;
1313     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1314        have_VFP = False;
1315     } else {
1316        __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1317     }
1318     /* There are several generation of VFP extension but they differs very
1319        little so for now we will not distinguish them. */
1320     have_VFP2 = have_VFP;
1321     have_VFP3 = have_VFP;
1322
1323     /* NEON insns */
1324     have_NEON = True;
1325     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1326        have_NEON = False;
1327     } else {
1328        __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1329     }
1330
1331     /* ARM architecture level */
1332     archlevel = 5; /* v5 will be base level */
1333     if (archlevel < 7) {
1334        archlevel = 7;
1335        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1336           archlevel = 5;
1337        } else {
1338           __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1339        }
1340     }
1341     if (archlevel < 6) {
1342        archlevel = 6;
1343        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1344           archlevel = 5;
1345        } else {
1346           __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1347        }
1348     }
1349
1350     VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1351     VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1352     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1353     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1354     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1355
1356     VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1357           archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1358           (Int)have_NEON);
1359
1360     VG_(machine_arm_archlevel) = archlevel;
1361
1362     va = VexArchARM;
1363
1364     vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1365     if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1366     if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1367     if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1368     if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1369
1370     VG_(machine_get_cache_info)(&vai);
1371
1372     return True;
1373   }
1374
1375#elif defined(VGA_mips32)
1376   {
1377     va = VexArchMIPS32;
1378     UInt model = VG_(get_machine_model)();
1379     if (model== -1)
1380         return False;
1381
1382     vai.hwcaps = model;
1383
1384     VG_(machine_get_cache_info)(&vai);
1385
1386     return True;
1387   }
1388
1389#else
1390#  error "Unknown arch"
1391#endif
1392}
1393
1394/* Notify host cpu cache line size. */
1395#if defined(VGA_ppc32)
1396void VG_(machine_ppc32_set_clszB)( Int szB )
1397{
1398   vg_assert(hwcaps_done);
1399
1400   /* Either the value must not have been set yet (zero) or we can
1401      tolerate it being set to the same value multiple times, as the
1402      stack scanning logic in m_main is a bit stupid. */
1403   vg_assert(vai.ppc_cache_line_szB == 0
1404             || vai.ppc_cache_line_szB == szB);
1405
1406   vg_assert(szB == 32 || szB == 64 || szB == 128);
1407   vai.ppc_cache_line_szB = szB;
1408}
1409#endif
1410
1411
1412/* Notify host cpu cache line size. */
1413#if defined(VGA_ppc64)
1414void VG_(machine_ppc64_set_clszB)( Int szB )
1415{
1416   vg_assert(hwcaps_done);
1417
1418   /* Either the value must not have been set yet (zero) or we can
1419      tolerate it being set to the same value multiple times, as the
1420      stack scanning logic in m_main is a bit stupid. */
1421   vg_assert(vai.ppc_cache_line_szB == 0
1422             || vai.ppc_cache_line_szB == szB);
1423
1424   vg_assert(szB == 32 || szB == 64 || szB == 128);
1425   vai.ppc_cache_line_szB = szB;
1426}
1427#endif
1428
1429
1430/* Notify host's ability to handle NEON instructions. */
1431#if defined(VGA_arm)
1432void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1433{
1434   vg_assert(hwcaps_done);
1435   /* There's nothing else we can sanity check. */
1436
1437   if (has_neon) {
1438      vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1439   } else {
1440      vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1441   }
1442}
1443#endif
1444
1445
1446/* Fetch host cpu info, once established. */
1447void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1448                                   /*OUT*/VexArchInfo* pVai )
1449{
1450   vg_assert(hwcaps_done);
1451   if (pVa)  *pVa  = va;
1452   if (pVai) *pVai = vai;
1453}
1454
1455
1456/* Returns the size of the largest guest register that we will
1457   simulate in this run.  This depends on both the guest architecture
1458   and on the specific capabilities we are simulating for that guest
1459   (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
1460   or 32.  General rule: if in doubt, return a value larger than
1461   reality.
1462
1463   This information is needed by Cachegrind and Callgrind to decide
1464   what the minimum cache line size they are prepared to simulate is.
1465   Basically require that the minimum cache line size is at least as
1466   large as the largest register that might get transferred to/from
1467   memory, so as to guarantee that any such transaction can straddle
1468   at most 2 cache lines.
1469*/
1470Int VG_(machine_get_size_of_largest_guest_register) ( void )
1471{
1472   vg_assert(hwcaps_done);
1473   /* Once hwcaps_done is True, we can fish around inside va/vai to
1474      find the information we need. */
1475
1476#  if defined(VGA_x86)
1477   vg_assert(va == VexArchX86);
1478   /* We don't support AVX, so 32 is out.  At the other end, even if
1479      we don't support any SSE, the X87 can generate 10 byte
1480      transfers, so let's say 16 to be on the safe side.  Hence the
1481      answer is always 16. */
1482   return 16;
1483
1484#  elif defined(VGA_amd64)
1485   /* if AVX then 32 else 16 */
1486   return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1487
1488#  elif defined(VGA_ppc32)
1489   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1490   if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1491   if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1492   if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1493   return 8;
1494
1495#  elif defined(VGA_ppc64)
1496   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1497   if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1498   if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1499   if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1500   return 8;
1501
1502#  elif defined(VGA_s390x)
1503   return 8;
1504
1505#  elif defined(VGA_arm)
1506   /* Really it depends whether or not we have NEON, but let's just
1507      assume we always do. */
1508   return 16;
1509
1510#  elif defined(VGA_mips32)
1511   /* The guest state implies 4, but that can't really be true, can
1512      it? */
1513   return 8;
1514
1515#  else
1516#    error "Unknown arch"
1517#  endif
1518}
1519
1520
1521// Given a pointer to a function as obtained by "& functionname" in C,
1522// produce a pointer to the actual entry point for the function.
1523void* VG_(fnptr_to_fnentry)( void* f )
1524{
1525#  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1526      || defined(VGP_arm_linux)                           \
1527      || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
1528      || defined(VGP_s390x_linux) || defined(VGP_mips32_linux)
1529   return f;
1530#  elif defined(VGP_ppc64_linux)
1531   /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1532      3-word function descriptor, of which the first word is the entry
1533      address. */
1534   UWord* descr = (UWord*)f;
1535   return (void*)(descr[0]);
1536#  else
1537#    error "Unknown platform"
1538#  endif
1539}
1540
1541/*--------------------------------------------------------------------*/
1542/*--- end                                                          ---*/
1543/*--------------------------------------------------------------------*/
1544