1/*--------------------------------------------------------------------*/
2/*--- Machine-related stuff.                           m_machine.c ---*/
3/*--------------------------------------------------------------------*/
4
5/*
6   This file is part of Valgrind, a dynamic binary instrumentation
7   framework.
8
9   Copyright (C) 2000-2012 Julian Seward
10      jseward@acm.org
11
12   This program is free software; you can redistribute it and/or
13   modify it under the terms of the GNU General Public License as
14   published by the Free Software Foundation; either version 2 of the
15   License, or (at your option) any later version.
16
17   This program is distributed in the hope that it will be useful, but
18   WITHOUT ANY WARRANTY; without even the implied warranty of
19   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20   General Public License for more details.
21
22   You should have received a copy of the GNU General Public License
23   along with this program; if not, write to the Free Software
24   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
25   02111-1307, USA.
26
27   The GNU General Public License is contained in the file COPYING.
28*/
29
30#include "pub_core_basics.h"
31#include "pub_core_vki.h"
32#include "pub_core_libcsetjmp.h"   // setjmp facilities
33#include "pub_core_threadstate.h"
34#include "pub_core_libcassert.h"
35#include "pub_core_libcbase.h"
36#include "pub_core_libcfile.h"
37#include "pub_core_mallocfree.h"
38#include "pub_core_machine.h"
39#include "pub_core_cpuid.h"
40#include "pub_core_libcsignal.h"   // for ppc32 messing with SIGILL and SIGFPE
41#include "pub_core_debuglog.h"
42
43
44#define INSTR_PTR(regs)    ((regs).vex.VG_INSTR_PTR)
45#define STACK_PTR(regs)    ((regs).vex.VG_STACK_PTR)
46#define FRAME_PTR(regs)    ((regs).vex.VG_FRAME_PTR)
47
48Addr VG_(get_IP) ( ThreadId tid ) {
49   return INSTR_PTR( VG_(threads)[tid].arch );
50}
51Addr VG_(get_SP) ( ThreadId tid ) {
52   return STACK_PTR( VG_(threads)[tid].arch );
53}
54Addr VG_(get_FP) ( ThreadId tid ) {
55   return FRAME_PTR( VG_(threads)[tid].arch );
56}
57
58void VG_(set_IP) ( ThreadId tid, Addr ip ) {
59   INSTR_PTR( VG_(threads)[tid].arch ) = ip;
60}
61void VG_(set_SP) ( ThreadId tid, Addr sp ) {
62   STACK_PTR( VG_(threads)[tid].arch ) = sp;
63}
64
65void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs,
66                                ThreadId tid )
67{
68#  if defined(VGA_x86)
69   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP;
70   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP;
71   regs->misc.X86.r_ebp
72      = VG_(threads)[tid].arch.vex.guest_EBP;
73#  elif defined(VGA_amd64)
74   regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP;
75   regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP;
76   regs->misc.AMD64.r_rbp
77      = VG_(threads)[tid].arch.vex.guest_RBP;
78#  elif defined(VGA_ppc32)
79   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA;
80   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1;
81   regs->misc.PPC32.r_lr
82      = VG_(threads)[tid].arch.vex.guest_LR;
83#  elif defined(VGA_ppc64)
84   regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA;
85   regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1;
86   regs->misc.PPC64.r_lr
87      = VG_(threads)[tid].arch.vex.guest_LR;
88#  elif defined(VGA_arm)
89   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T;
90   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13;
91   regs->misc.ARM.r14
92      = VG_(threads)[tid].arch.vex.guest_R14;
93   regs->misc.ARM.r12
94      = VG_(threads)[tid].arch.vex.guest_R12;
95   regs->misc.ARM.r11
96      = VG_(threads)[tid].arch.vex.guest_R11;
97   regs->misc.ARM.r7
98      = VG_(threads)[tid].arch.vex.guest_R7;
99#  elif defined(VGA_s390x)
100   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
101   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
102   regs->misc.S390X.r_fp
103      = VG_(threads)[tid].arch.vex.guest_r11;
104   regs->misc.S390X.r_lr
105      = VG_(threads)[tid].arch.vex.guest_r14;
106#  elif defined(VGA_mips32)
107   regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC;
108   regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29;
109   regs->misc.MIPS32.r30
110      = VG_(threads)[tid].arch.vex.guest_r30;
111   regs->misc.MIPS32.r31
112      = VG_(threads)[tid].arch.vex.guest_r31;
113   regs->misc.MIPS32.r28
114      = VG_(threads)[tid].arch.vex.guest_r28;
115#  else
116#    error "Unknown arch"
117#  endif
118}
119
120
121void VG_(set_syscall_return_shadows) ( ThreadId tid,
122                                       /* shadow vals for the result */
123                                       UWord s1res, UWord s2res,
124                                       /* shadow vals for the error val */
125                                       UWord s1err, UWord s2err )
126{
127#  if defined(VGP_x86_linux)
128   VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res;
129   VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res;
130#  elif defined(VGP_amd64_linux)
131   VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res;
132   VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res;
133#  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
134   VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res;
135   VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res;
136#  elif defined(VGP_arm_linux)
137   VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res;
138   VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res;
139#  elif defined(VGO_darwin)
140   // GrP fixme darwin syscalls may return more values (2 registers plus error)
141#  elif defined(VGP_s390x_linux)
142   VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
143   VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
144#  elif defined(VGP_mips32_linux)
145   VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
146   VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
147#  else
148#    error "Unknown plat"
149#  endif
150}
151
152void
153VG_(get_shadow_regs_area) ( ThreadId tid,
154                            /*DST*/UChar* dst,
155                            /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size )
156{
157   void*        src;
158   ThreadState* tst;
159   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
160   vg_assert(VG_(is_valid_tid)(tid));
161   // Bounds check
162   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
163   vg_assert(offset + size <= sizeof(VexGuestArchState));
164   // Copy
165   tst = & VG_(threads)[tid];
166   src = NULL;
167   switch (shadowNo) {
168      case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
169      case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
170      case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
171   }
172   tl_assert(src != NULL);
173   VG_(memcpy)( dst, src, size);
174}
175
176void
177VG_(set_shadow_regs_area) ( ThreadId tid,
178                            /*DST*/Int shadowNo, PtrdiffT offset, SizeT size,
179                            /*SRC*/const UChar* src )
180{
181   void*        dst;
182   ThreadState* tst;
183   vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2);
184   vg_assert(VG_(is_valid_tid)(tid));
185   // Bounds check
186   vg_assert(0 <= offset && offset < sizeof(VexGuestArchState));
187   vg_assert(offset + size <= sizeof(VexGuestArchState));
188   // Copy
189   tst = & VG_(threads)[tid];
190   dst = NULL;
191   switch (shadowNo) {
192      case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break;
193      case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break;
194      case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break;
195   }
196   tl_assert(dst != NULL);
197   VG_(memcpy)( dst, src, size);
198}
199
200
201static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, HChar*, Addr))
202{
203   VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex);
204#if defined(VGA_x86)
205   (*f)(tid, "EAX", vex->guest_EAX);
206   (*f)(tid, "ECX", vex->guest_ECX);
207   (*f)(tid, "EDX", vex->guest_EDX);
208   (*f)(tid, "EBX", vex->guest_EBX);
209   (*f)(tid, "ESI", vex->guest_ESI);
210   (*f)(tid, "EDI", vex->guest_EDI);
211   (*f)(tid, "ESP", vex->guest_ESP);
212   (*f)(tid, "EBP", vex->guest_EBP);
213#elif defined(VGA_amd64)
214   (*f)(tid, "RAX", vex->guest_RAX);
215   (*f)(tid, "RCX", vex->guest_RCX);
216   (*f)(tid, "RDX", vex->guest_RDX);
217   (*f)(tid, "RBX", vex->guest_RBX);
218   (*f)(tid, "RSI", vex->guest_RSI);
219   (*f)(tid, "RDI", vex->guest_RDI);
220   (*f)(tid, "RSP", vex->guest_RSP);
221   (*f)(tid, "RBP", vex->guest_RBP);
222   (*f)(tid, "R8" , vex->guest_R8 );
223   (*f)(tid, "R9" , vex->guest_R9 );
224   (*f)(tid, "R10", vex->guest_R10);
225   (*f)(tid, "R11", vex->guest_R11);
226   (*f)(tid, "R12", vex->guest_R12);
227   (*f)(tid, "R13", vex->guest_R13);
228   (*f)(tid, "R14", vex->guest_R14);
229   (*f)(tid, "R15", vex->guest_R15);
230#elif defined(VGA_ppc32) || defined(VGA_ppc64)
231   (*f)(tid, "GPR0" , vex->guest_GPR0 );
232   (*f)(tid, "GPR1" , vex->guest_GPR1 );
233   (*f)(tid, "GPR2" , vex->guest_GPR2 );
234   (*f)(tid, "GPR3" , vex->guest_GPR3 );
235   (*f)(tid, "GPR4" , vex->guest_GPR4 );
236   (*f)(tid, "GPR5" , vex->guest_GPR5 );
237   (*f)(tid, "GPR6" , vex->guest_GPR6 );
238   (*f)(tid, "GPR7" , vex->guest_GPR7 );
239   (*f)(tid, "GPR8" , vex->guest_GPR8 );
240   (*f)(tid, "GPR9" , vex->guest_GPR9 );
241   (*f)(tid, "GPR10", vex->guest_GPR10);
242   (*f)(tid, "GPR11", vex->guest_GPR11);
243   (*f)(tid, "GPR12", vex->guest_GPR12);
244   (*f)(tid, "GPR13", vex->guest_GPR13);
245   (*f)(tid, "GPR14", vex->guest_GPR14);
246   (*f)(tid, "GPR15", vex->guest_GPR15);
247   (*f)(tid, "GPR16", vex->guest_GPR16);
248   (*f)(tid, "GPR17", vex->guest_GPR17);
249   (*f)(tid, "GPR18", vex->guest_GPR18);
250   (*f)(tid, "GPR19", vex->guest_GPR19);
251   (*f)(tid, "GPR20", vex->guest_GPR20);
252   (*f)(tid, "GPR21", vex->guest_GPR21);
253   (*f)(tid, "GPR22", vex->guest_GPR22);
254   (*f)(tid, "GPR23", vex->guest_GPR23);
255   (*f)(tid, "GPR24", vex->guest_GPR24);
256   (*f)(tid, "GPR25", vex->guest_GPR25);
257   (*f)(tid, "GPR26", vex->guest_GPR26);
258   (*f)(tid, "GPR27", vex->guest_GPR27);
259   (*f)(tid, "GPR28", vex->guest_GPR28);
260   (*f)(tid, "GPR29", vex->guest_GPR29);
261   (*f)(tid, "GPR30", vex->guest_GPR30);
262   (*f)(tid, "GPR31", vex->guest_GPR31);
263   (*f)(tid, "CTR"  , vex->guest_CTR  );
264   (*f)(tid, "LR"   , vex->guest_LR   );
265#elif defined(VGA_arm)
266   (*f)(tid, "R0" , vex->guest_R0 );
267   (*f)(tid, "R1" , vex->guest_R1 );
268   (*f)(tid, "R2" , vex->guest_R2 );
269   (*f)(tid, "R3" , vex->guest_R3 );
270   (*f)(tid, "R4" , vex->guest_R4 );
271   (*f)(tid, "R5" , vex->guest_R5 );
272   (*f)(tid, "R6" , vex->guest_R6 );
273   (*f)(tid, "R8" , vex->guest_R8 );
274   (*f)(tid, "R9" , vex->guest_R9 );
275   (*f)(tid, "R10", vex->guest_R10);
276   (*f)(tid, "R11", vex->guest_R11);
277   (*f)(tid, "R12", vex->guest_R12);
278   (*f)(tid, "R13", vex->guest_R13);
279   (*f)(tid, "R14", vex->guest_R14);
280#elif defined(VGA_s390x)
281   (*f)(tid, "r0" , vex->guest_r0 );
282   (*f)(tid, "r1" , vex->guest_r1 );
283   (*f)(tid, "r2" , vex->guest_r2 );
284   (*f)(tid, "r3" , vex->guest_r3 );
285   (*f)(tid, "r4" , vex->guest_r4 );
286   (*f)(tid, "r5" , vex->guest_r5 );
287   (*f)(tid, "r6" , vex->guest_r6 );
288   (*f)(tid, "r7" , vex->guest_r7 );
289   (*f)(tid, "r8" , vex->guest_r8 );
290   (*f)(tid, "r9" , vex->guest_r9 );
291   (*f)(tid, "r10", vex->guest_r10);
292   (*f)(tid, "r11", vex->guest_r11);
293   (*f)(tid, "r12", vex->guest_r12);
294   (*f)(tid, "r13", vex->guest_r13);
295   (*f)(tid, "r14", vex->guest_r14);
296   (*f)(tid, "r15", vex->guest_r15);
297#elif defined(VGA_mips32)
298   (*f)(tid, "r0" , vex->guest_r0 );
299   (*f)(tid, "r1" , vex->guest_r1 );
300   (*f)(tid, "r2" , vex->guest_r2 );
301   (*f)(tid, "r3" , vex->guest_r3 );
302   (*f)(tid, "r4" , vex->guest_r4 );
303   (*f)(tid, "r5" , vex->guest_r5 );
304   (*f)(tid, "r6" , vex->guest_r6 );
305   (*f)(tid, "r7" , vex->guest_r7 );
306   (*f)(tid, "r8" , vex->guest_r8 );
307   (*f)(tid, "r9" , vex->guest_r9 );
308   (*f)(tid, "r10", vex->guest_r10);
309   (*f)(tid, "r11", vex->guest_r11);
310   (*f)(tid, "r12", vex->guest_r12);
311   (*f)(tid, "r13", vex->guest_r13);
312   (*f)(tid, "r14", vex->guest_r14);
313   (*f)(tid, "r15", vex->guest_r15);
314   (*f)(tid, "r16", vex->guest_r16);
315   (*f)(tid, "r17", vex->guest_r17);
316   (*f)(tid, "r18", vex->guest_r18);
317   (*f)(tid, "r19", vex->guest_r19);
318   (*f)(tid, "r20", vex->guest_r20);
319   (*f)(tid, "r21", vex->guest_r21);
320   (*f)(tid, "r22", vex->guest_r22);
321   (*f)(tid, "r23", vex->guest_r23);
322   (*f)(tid, "r24", vex->guest_r24);
323   (*f)(tid, "r25", vex->guest_r25);
324   (*f)(tid, "r26", vex->guest_r26);
325   (*f)(tid, "r27", vex->guest_r27);
326   (*f)(tid, "r28", vex->guest_r28);
327   (*f)(tid, "r29", vex->guest_r29);
328   (*f)(tid, "r30", vex->guest_r30);
329   (*f)(tid, "r31", vex->guest_r31);
330#else
331#  error Unknown arch
332#endif
333}
334
335
336void VG_(apply_to_GP_regs)(void (*f)(ThreadId, HChar*, UWord))
337{
338   ThreadId tid;
339
340   for (tid = 1; tid < VG_N_THREADS; tid++) {
341      if (VG_(is_valid_tid)(tid)) {
342         apply_to_GPs_of_tid(tid, f);
343      }
344   }
345}
346
347void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid)
348{
349   *tid = (ThreadId)(-1);
350}
351
352Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid,
353                            /*OUT*/Addr* stack_min,
354                            /*OUT*/Addr* stack_max)
355{
356   ThreadId i;
357   for (i = (*tid)+1; i < VG_N_THREADS; i++) {
358      if (i == VG_INVALID_THREADID)
359         continue;
360      if (VG_(threads)[i].status != VgTs_Empty) {
361         *tid       = i;
362         *stack_min = VG_(get_SP)(i);
363         *stack_max = VG_(threads)[i].client_stack_highest_word;
364         return True;
365      }
366   }
367   return False;
368}
369
370Addr VG_(thread_get_stack_max)(ThreadId tid)
371{
372   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
373   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
374   return VG_(threads)[tid].client_stack_highest_word;
375}
376
377SizeT VG_(thread_get_stack_size)(ThreadId tid)
378{
379   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
380   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
381   return VG_(threads)[tid].client_stack_szB;
382}
383
384Addr VG_(thread_get_altstack_min)(ThreadId tid)
385{
386   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
387   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
388   return (Addr)VG_(threads)[tid].altstack.ss_sp;
389}
390
391SizeT VG_(thread_get_altstack_size)(ThreadId tid)
392{
393   vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID);
394   vg_assert(VG_(threads)[tid].status != VgTs_Empty);
395   return VG_(threads)[tid].altstack.ss_size;
396}
397
398//-------------------------------------------------------------
399/* Details about the capabilities of the underlying (host) CPU.  These
400   details are acquired by (1) enquiring with the CPU at startup, or
401   (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache
402   line size).  It's a bit nasty in the sense that there's no obvious
403   way to stop uses of some of this info before it's ready to go.
404   See pub_core_machine.h for more information about that.
405
406   VG_(machine_get_hwcaps) may use signals (although it attempts to
407   leave signal state unchanged) and therefore should only be
408   called before m_main sets up the client's signal state.
409*/
410
411/* --------- State --------- */
412static Bool hwcaps_done = False;
413
414/* --- all archs --- */
415static VexArch     va = VexArch_INVALID;
416static VexArchInfo vai;
417
418#if defined(VGA_x86)
419UInt VG_(machine_x86_have_mxcsr) = 0;
420#endif
421#if defined(VGA_ppc32)
422UInt VG_(machine_ppc32_has_FP)  = 0;
423UInt VG_(machine_ppc32_has_VMX) = 0;
424#endif
425#if defined(VGA_ppc64)
426ULong VG_(machine_ppc64_has_VMX) = 0;
427#endif
428#if defined(VGA_arm)
429Int VG_(machine_arm_archlevel) = 4;
430#endif
431
432/* fixs390: anything for s390x here ? */
433
434/* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL
435   testing, so we need a VG_MINIMAL_JMP_BUF. */
436#if defined(VGA_ppc32) || defined(VGA_ppc64) \
437    || defined(VGA_arm) || defined(VGA_s390x)
438#include "pub_tool_libcsetjmp.h"
439static VG_MINIMAL_JMP_BUF(env_unsup_insn);
440static void handler_unsup_insn ( Int x ) {
441   VG_MINIMAL_LONGJMP(env_unsup_insn);
442}
443#endif
444
445
446/* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc
447 * handlers are installed.  Determines the the sizes affected by dcbz
448 * and dcbzl instructions and updates the given VexArchInfo structure
449 * accordingly.
450 *
451 * Not very defensive: assumes that as long as the dcbz/dcbzl
452 * instructions don't raise a SIGILL, that they will zero an aligned,
453 * contiguous block of memory of a sensible size. */
454#if defined(VGA_ppc32) || defined(VGA_ppc64)
455static void find_ppc_dcbz_sz(VexArchInfo *arch_info)
456{
457   Int dcbz_szB = 0;
458   Int dcbzl_szB;
459#  define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */
460   char test_block[4*MAX_DCBZL_SZB];
461   char *aligned = test_block;
462   Int i;
463
464   /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */
465   aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1));
466   vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]);
467
468   /* dcbz often clears 32B, although sometimes whatever the native cache
469    * block size is */
470   VG_(memset)(test_block, 0xff, sizeof(test_block));
471   __asm__ __volatile__("dcbz 0,%0"
472                        : /*out*/
473                        : "r" (aligned) /*in*/
474                        : "memory" /*clobber*/);
475   for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) {
476      if (!test_block[i])
477         ++dcbz_szB;
478   }
479   vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128);
480
481   /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */
482   if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
483      dcbzl_szB = 0; /* indicates unsupported */
484   }
485   else {
486      VG_(memset)(test_block, 0xff, sizeof(test_block));
487      /* some older assemblers won't understand the dcbzl instruction
488       * variant, so we directly emit the instruction ourselves */
489      __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/
490                           : /*out*/
491                           : "r" (aligned) /*in*/
492                           : "memory", "r9" /*clobber*/);
493      for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) {
494         if (!test_block[i])
495            ++dcbzl_szB;
496      }
497      vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128);
498   }
499
500   arch_info->ppc_dcbz_szB  = dcbz_szB;
501   arch_info->ppc_dcbzl_szB = dcbzl_szB;
502
503   VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n",
504                 dcbz_szB, dcbzl_szB);
505#  undef MAX_DCBZL_SZB
506}
507#endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */
508
509#ifdef VGA_s390x
510
511/* Read /proc/cpuinfo. Look for lines like these
512
513   processor 0: version = FF,  identification = 0117C9,  machine = 2064
514
515   and return the machine model. If the machine model could not be determined
516   or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */
517
518static UInt VG_(get_machine_model)(void)
519{
520   static struct model_map {
521      HChar name[5];
522      UInt  id;
523   } model_map[] = {
524      { "2064", VEX_S390X_MODEL_Z900 },
525      { "2066", VEX_S390X_MODEL_Z800 },
526      { "2084", VEX_S390X_MODEL_Z990 },
527      { "2086", VEX_S390X_MODEL_Z890 },
528      { "2094", VEX_S390X_MODEL_Z9_EC },
529      { "2096", VEX_S390X_MODEL_Z9_BC },
530      { "2097", VEX_S390X_MODEL_Z10_EC },
531      { "2098", VEX_S390X_MODEL_Z10_BC },
532      { "2817", VEX_S390X_MODEL_Z196 },
533      { "2818", VEX_S390X_MODEL_Z114 },
534   };
535
536   Int    model, n, fh;
537   SysRes fd;
538   SizeT  num_bytes, file_buf_size;
539   HChar *p, *m, *model_name, *file_buf;
540
541   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
542   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
543   if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN;
544
545   fh  = sr_Res(fd);
546
547   /* Determine the size of /proc/cpuinfo.
548      Work around broken-ness in /proc file system implementation.
549      fstat returns a zero size for /proc/cpuinfo although it is
550      claimed to be a regular file. */
551   num_bytes = 0;
552   file_buf_size = 1000;
553   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
554   while (42) {
555      n = VG_(read)(fh, file_buf, file_buf_size);
556      if (n < 0) break;
557
558      num_bytes += n;
559      if (n < file_buf_size) break;  /* reached EOF */
560   }
561
562   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
563
564   if (num_bytes > file_buf_size) {
565      VG_(free)( file_buf );
566      VG_(lseek)( fh, 0, VKI_SEEK_SET );
567      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
568      n = VG_(read)( fh, file_buf, num_bytes );
569      if (n < 0) num_bytes = 0;
570   }
571
572   file_buf[num_bytes] = '\0';
573   VG_(close)(fh);
574
575   /* Parse file */
576   model = VEX_S390X_MODEL_UNKNOWN;
577   for (p = file_buf; *p; ++p) {
578      /* Beginning of line */
579     if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue;
580
581     m = VG_(strstr)( p, "machine" );
582     if (m == NULL) continue;
583
584     p = m + sizeof "machine" - 1;
585     while ( VG_(isspace)( *p ) || *p == '=') {
586       if (*p == '\n') goto next_line;
587       ++p;
588     }
589
590     model_name = p;
591     for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) {
592       struct model_map *mm = model_map + n;
593       SizeT len = VG_(strlen)( mm->name );
594       if ( VG_(strncmp)( mm->name, model_name, len ) == 0 &&
595            VG_(isspace)( model_name[len] )) {
596         if (mm->id < model) model = mm->id;
597         p = model_name + len;
598         break;
599       }
600     }
601     /* Skip until end-of-line */
602     while (*p != '\n')
603       ++p;
604   next_line: ;
605   }
606
607   VG_(free)( file_buf );
608   VG_(debugLog)(1, "machine", "model = %s\n",
609                 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN"
610                                                  : model_map[model].name);
611   return model;
612}
613
614#endif /* VGA_s390x */
615
616#ifdef VGA_mips32
617
618/* Read /proc/cpuinfo and return the machine model. */
619static UInt VG_(get_machine_model)(void)
620{
621   char *search_MIPS_str = "MIPS";
622   char *search_Broadcom_str = "Broadcom";
623   Int    n, fh;
624   SysRes fd;
625   SizeT  num_bytes, file_buf_size;
626   HChar  *file_buf;
627
628   /* Slurp contents of /proc/cpuinfo into FILE_BUF */
629   fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR );
630   if ( sr_isError(fd) ) return -1;
631
632   fh  = sr_Res(fd);
633
634   /* Determine the size of /proc/cpuinfo.
635      Work around broken-ness in /proc file system implementation.
636      fstat returns a zero size for /proc/cpuinfo although it is
637      claimed to be a regular file. */
638   num_bytes = 0;
639   file_buf_size = 1000;
640   file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1);
641   while (42) {
642      n = VG_(read)(fh, file_buf, file_buf_size);
643      if (n < 0) break;
644
645      num_bytes += n;
646      if (n < file_buf_size) break;  /* reached EOF */
647   }
648
649   if (n < 0) num_bytes = 0;   /* read error; ignore contents */
650
651   if (num_bytes > file_buf_size) {
652      VG_(free)( file_buf );
653      VG_(lseek)( fh, 0, VKI_SEEK_SET );
654      file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 );
655      n = VG_(read)( fh, file_buf, num_bytes );
656      if (n < 0) num_bytes = 0;
657   }
658
659   file_buf[num_bytes] = '\0';
660   VG_(close)(fh);
661
662   /* Parse file */
663   if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL)
664       return VEX_PRID_COMP_BROADCOM;
665   if (VG_(strstr) (file_buf, search_MIPS_str) != NULL)
666       return VEX_PRID_COMP_MIPS;
667
668   /* Did not find string in the proc file. */
669   return -1;
670}
671
672#endif
673
674/* Determine what insn set and insn set variant the host has, and
675   record it.  To be called once at system startup.  Returns False if
676   this a CPU incapable of running Valgrind. */
677
678Bool VG_(machine_get_hwcaps)( void )
679{
680   vg_assert(hwcaps_done == False);
681   hwcaps_done = True;
682
683   // Whack default settings into vai, so that we only need to fill in
684   // any interesting bits.
685   LibVEX_default_VexArchInfo(&vai);
686
687#if defined(VGA_x86)
688   { Bool have_sse1, have_sse2, have_cx8, have_lzcnt;
689     UInt eax, ebx, ecx, edx, max_extended;
690     UChar vstr[13];
691     vstr[0] = 0;
692
693     if (!VG_(has_cpuid)())
694        /* we can't do cpuid at all.  Give up. */
695        return False;
696
697     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
698     if (eax < 1)
699        /* we can't ask for cpuid(x) for x > 0.  Give up. */
700        return False;
701
702     /* Get processor ID string, and max basic/extended index
703        values. */
704     VG_(memcpy)(&vstr[0], &ebx, 4);
705     VG_(memcpy)(&vstr[4], &edx, 4);
706     VG_(memcpy)(&vstr[8], &ecx, 4);
707     vstr[12] = 0;
708
709     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
710     max_extended = eax;
711
712     /* get capabilities bits into edx */
713     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
714
715     have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */
716     have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */
717
718     /* cmpxchg8b is a minimum requirement now; if we don't have it we
719        must simply give up.  But all CPUs since Pentium-I have it, so
720        that doesn't seem like much of a restriction. */
721     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
722     if (!have_cx8)
723        return False;
724
725     /* Figure out if this is an AMD that can do LZCNT. */
726     have_lzcnt = False;
727     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
728         && max_extended >= 0x80000001) {
729        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
730        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
731     }
732
733     if (have_sse2 && have_sse1) {
734        va          = VexArchX86;
735        vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
736        vai.hwcaps |= VEX_HWCAPS_X86_SSE2;
737        if (have_lzcnt)
738           vai.hwcaps |= VEX_HWCAPS_X86_LZCNT;
739        VG_(machine_x86_have_mxcsr) = 1;
740        return True;
741     }
742
743     if (have_sse1) {
744        va          = VexArchX86;
745        vai.hwcaps  = VEX_HWCAPS_X86_SSE1;
746        VG_(machine_x86_have_mxcsr) = 1;
747        return True;
748     }
749
750     va         = VexArchX86;
751     vai.hwcaps = 0; /*baseline - no sse at all*/
752     VG_(machine_x86_have_mxcsr) = 0;
753     return True;
754   }
755
756#elif defined(VGA_amd64)
757   { Bool have_sse3, have_cx8, have_cx16;
758     Bool have_lzcnt, have_avx /*, have_fma*/;
759     UInt eax, ebx, ecx, edx, max_extended;
760     UChar vstr[13];
761     vstr[0] = 0;
762
763     if (!VG_(has_cpuid)())
764        /* we can't do cpuid at all.  Give up. */
765        return False;
766
767     VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
768     if (eax < 1)
769        /* we can't ask for cpuid(x) for x > 0.  Give up. */
770        return False;
771
772     /* Get processor ID string, and max basic/extended index
773        values. */
774     VG_(memcpy)(&vstr[0], &ebx, 4);
775     VG_(memcpy)(&vstr[4], &edx, 4);
776     VG_(memcpy)(&vstr[8], &ecx, 4);
777     vstr[12] = 0;
778
779     VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx);
780     max_extended = eax;
781
782     /* get capabilities bits into edx */
783     VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx);
784
785     // we assume that SSE1 and SSE2 are available by default
786     have_sse3 = (ecx & (1<<0)) != 0;  /* True => have sse3 insns */
787     // ssse3   is ecx:9
788     // sse41   is ecx:19
789     // sse42   is ecx:20
790
791     // osxsave is ecx:27
792     // avx     is ecx:28
793     // fma     is ecx:12
794     have_avx = False;
795     /* have_fma = False; */
796     if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) {
797        /* processor supports AVX instructions and XGETBV is enabled
798           by OS */
799        ULong w;
800        __asm__ __volatile__("movq $0,%%rcx ; "
801                             ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */
802                             "movq %%rax,%0"
803                             :/*OUT*/"=r"(w) :/*IN*/
804                             :/*TRASH*/"rdx","rcx");
805        if ((w & 6) == 6) {
806           /* OS has enabled both XMM and YMM state support */
807           have_avx = True;
808           /* have_fma = (ecx & (1<<12)) != 0; */
809           /* have_fma: Probably correct, but gcc complains due to
810              unusedness. &*/
811        }
812     }
813
814
815     /* cmpxchg8b is a minimum requirement now; if we don't have it we
816        must simply give up.  But all CPUs since Pentium-I have it, so
817        that doesn't seem like much of a restriction. */
818     have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */
819     if (!have_cx8)
820        return False;
821
822     /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
823     have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
824
825     /* Figure out if this is an AMD that can do LZCNT. */
826     have_lzcnt = False;
827     if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
828         && max_extended >= 0x80000001) {
829        VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
830        have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
831     }
832
833     va         = VexArchAMD64;
834     vai.hwcaps = (have_sse3  ? VEX_HWCAPS_AMD64_SSE3  : 0)
835                | (have_cx16  ? VEX_HWCAPS_AMD64_CX16  : 0)
836                | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
837                | (have_avx   ? VEX_HWCAPS_AMD64_AVX   : 0);
838     return True;
839   }
840
841#elif defined(VGA_ppc32)
842   {
843     /* Find out which subset of the ppc32 instruction set is supported by
844        verifying whether various ppc32 instructions generate a SIGILL
845        or a SIGFPE. An alternative approach is to check the AT_HWCAP and
846        AT_PLATFORM entries in the ELF auxiliary table -- see also
847        the_iifii.client_auxv in m_main.c.
848      */
849     vki_sigset_t          saved_set, tmp_set;
850     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
851     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
852
853     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
854     Int r;
855
856     /* This is a kludge.  Really we ought to back-convert saved_act
857        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
858        since that's a no-op on all ppc32 platforms so far supported,
859        it's not worth the typing effort.  At least include most basic
860        sanity check: */
861     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
862
863     VG_(sigemptyset)(&tmp_set);
864     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
865     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
866
867     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
868     vg_assert(r == 0);
869
870     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
871     vg_assert(r == 0);
872     tmp_sigill_act = saved_sigill_act;
873
874     r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
875     vg_assert(r == 0);
876     tmp_sigfpe_act = saved_sigfpe_act;
877
878     /* NODEFER: signal handler does not return (from the kernel's point of
879        view), hence if it is to successfully catch a signal more than once,
880        we need the NODEFER flag. */
881     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
882     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
883     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
884     tmp_sigill_act.ksa_handler = handler_unsup_insn;
885     r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
886     vg_assert(r == 0);
887
888     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
889     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
890     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
891     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
892     r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
893     vg_assert(r == 0);
894
895     /* standard FP insns */
896     have_F = True;
897     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
898        have_F = False;
899     } else {
900        __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */
901     }
902
903     /* Altivec insns */
904     have_V = True;
905     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
906        have_V = False;
907     } else {
908        /* Unfortunately some older assemblers don't speak Altivec (or
909           choose not to), so to be safe we directly emit the 32-bit
910           word corresponding to "vor 0,0,0".  This fixes a build
911           problem that happens on Debian 3.1 (ppc32), and probably
912           various other places. */
913        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
914     }
915
916     /* General-Purpose optional (fsqrt, fsqrts) */
917     have_FX = True;
918     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
919        have_FX = False;
920     } else {
921        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */
922     }
923
924     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
925     have_GX = True;
926     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
927        have_GX = False;
928     } else {
929        __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */
930     }
931
932     /* VSX support implies Power ISA 2.06 */
933     have_VX = True;
934     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
935        have_VX = False;
936     } else {
937        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
938     }
939
940     /* Check for Decimal Floating Point (DFP) support. */
941     have_DFP = True;
942     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
943        have_DFP = False;
944     } else {
945        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
946     }
947
948     /* determine dcbz/dcbzl sizes while we still have the signal
949      * handlers registered */
950     find_ppc_dcbz_sz(&vai);
951
952     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
953     vg_assert(r == 0);
954     r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
955     vg_assert(r == 0);
956     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
957     vg_assert(r == 0);
958     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
959                    (Int)have_F, (Int)have_V, (Int)have_FX,
960                    (Int)have_GX, (Int)have_VX, (Int)have_DFP);
961     /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */
962     if (have_V && !have_F)
963        have_V = False;
964     if (have_FX && !have_F)
965        have_FX = False;
966     if (have_GX && !have_F)
967        have_GX = False;
968
969     VG_(machine_ppc32_has_FP)  = have_F ? 1 : 0;
970     VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0;
971
972     va = VexArchPPC32;
973
974     vai.hwcaps = 0;
975     if (have_F)  vai.hwcaps |= VEX_HWCAPS_PPC32_F;
976     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC32_V;
977     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX;
978     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX;
979     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX;
980     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP;
981
982
983     /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be
984        called before we're ready to go. */
985     return True;
986   }
987
988#elif defined(VGA_ppc64)
989   {
990     /* Same instruction set detection algorithm as for ppc32. */
991     vki_sigset_t          saved_set, tmp_set;
992     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
993     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
994
995     volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP;
996     Int r;
997
998     /* This is a kludge.  Really we ought to back-convert saved_act
999        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1000        since that's a no-op on all ppc64 platforms so far supported,
1001        it's not worth the typing effort.  At least include most basic
1002        sanity check: */
1003     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1004
1005     VG_(sigemptyset)(&tmp_set);
1006     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1007     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1008
1009     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1010     vg_assert(r == 0);
1011
1012     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1013     vg_assert(r == 0);
1014     tmp_sigill_act = saved_sigill_act;
1015
1016     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1017     tmp_sigfpe_act = saved_sigfpe_act;
1018
1019     /* NODEFER: signal handler does not return (from the kernel's point of
1020        view), hence if it is to successfully catch a signal more than once,
1021        we need the NODEFER flag. */
1022     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1023     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1024     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1025     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1026     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1027
1028     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1029     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1030     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1031     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1032     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1033
1034     /* standard FP insns */
1035     have_F = True;
1036     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1037        have_F = False;
1038     } else {
1039        __asm__ __volatile__("fmr 0,0");
1040     }
1041
1042     /* Altivec insns */
1043     have_V = True;
1044     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1045        have_V = False;
1046     } else {
1047        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
1048     }
1049
1050     /* General-Purpose optional (fsqrt, fsqrts) */
1051     have_FX = True;
1052     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1053        have_FX = False;
1054     } else {
1055        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
1056     }
1057
1058     /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
1059     have_GX = True;
1060     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1061        have_GX = False;
1062     } else {
1063        __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
1064     }
1065
1066     /* VSX support implies Power ISA 2.06 */
1067     have_VX = True;
1068     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1069        have_VX = False;
1070     } else {
1071        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
1072     }
1073
1074     /* Check for Decimal Floating Point (DFP) support. */
1075     have_DFP = True;
1076     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1077        have_DFP = False;
1078     } else {
1079        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
1080     }
1081
1082     /* determine dcbz/dcbzl sizes while we still have the signal
1083      * handlers registered */
1084     find_ppc_dcbz_sz(&vai);
1085
1086     VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1087     VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL);
1088     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1089     VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n",
1090                    (Int)have_F, (Int)have_V, (Int)have_FX,
1091                    (Int)have_GX, (Int)have_VX, (Int)have_DFP);
1092     /* on ppc64, if we don't even have FP, just give up. */
1093     if (!have_F)
1094        return False;
1095
1096     VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0;
1097
1098     va = VexArchPPC64;
1099
1100     vai.hwcaps = 0;
1101     if (have_V)  vai.hwcaps |= VEX_HWCAPS_PPC64_V;
1102     if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX;
1103     if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX;
1104     if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX;
1105     if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP;
1106
1107     /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be
1108        called before we're ready to go. */
1109     return True;
1110   }
1111
1112#elif defined(VGA_s390x)
1113   {
1114     /* Instruction set detection code borrowed from ppc above. */
1115     vki_sigset_t          saved_set, tmp_set;
1116     vki_sigaction_fromK_t saved_sigill_act;
1117     vki_sigaction_toK_t     tmp_sigill_act;
1118
1119     volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX;
1120     volatile Bool have_STFLE, have_ETF2, have_ETF3;
1121     Int r, model;
1122
1123     /* Unblock SIGILL and stash away the old action for that signal */
1124     VG_(sigemptyset)(&tmp_set);
1125     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1126
1127     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1128     vg_assert(r == 0);
1129
1130     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1131     vg_assert(r == 0);
1132     tmp_sigill_act = saved_sigill_act;
1133
1134     /* NODEFER: signal handler does not return (from the kernel's point of
1135        view), hence if it is to successfully catch a signal more than once,
1136        we need the NODEFER flag. */
1137     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1138     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1139     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1140     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1141     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1142
1143     /* Determine hwcaps. Note, we cannot use the stfle insn because it
1144        is not supported on z900. */
1145
1146     have_LDISP = True;
1147     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1148        have_LDISP = False;
1149     } else {
1150       /* BASR loads the address of the next insn into r1. Needed to avoid
1151          a segfault in XY. */
1152        __asm__ __volatile__("basr %%r1,%%r0\n\t"
1153                             ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
1154                             ".short 0x0057" : : : "r0", "r1", "cc", "memory");
1155     }
1156
1157     have_EIMM = True;
1158     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1159        have_EIMM = False;
1160     } else {
1161        __asm__ __volatile__(".long  0xc0090000\n\t"  /* iilf r0,0 */
1162                             ".short 0x0000" : : : "r0", "memory");
1163     }
1164
1165     have_GIE = True;
1166     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1167        have_GIE = False;
1168     } else {
1169        __asm__ __volatile__(".long  0xc2010000\n\t"  /* msfi r0,0 */
1170                             ".short 0x0000" : : : "r0", "memory");
1171     }
1172
1173     have_DFP = True;
1174     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1175        have_DFP = False;
1176     } else {
1177        __asm__ __volatile__(".long 0xb3d20000"
1178                               : : : "r0", "cc", "memory");  /* adtr r0,r0,r0 */
1179     }
1180
1181     have_FGX = True;
1182     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1183        have_FGX = False;
1184     } else {
1185        __asm__ __volatile__(".long 0xb3cd0000" : : : "r0");  /* lgdr r0,f0 */
1186     }
1187
1188     /* Detect presence of the ETF2-enhancement facility using the
1189        STFLE insn. Note, that STFLE and ETF2 were introduced at the same
1190        time, so the absence of STLFE implies the absence of ETF2. */
1191     have_STFLE = True;
1192     have_ETF2 = False;
1193     have_ETF3 = False;
1194     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1195        have_STFLE = False;
1196     } else {
1197         ULong hoststfle[1];
1198         register ULong reg0 asm("0") = 0; /* one double word available */
1199
1200         __asm__ __volatile__(" .insn s,0xb2b00000,%0\n"   /* stfle */
1201                              : "=m" (hoststfle), "+d"(reg0)
1202                              : : "cc", "memory");
1203         if (hoststfle[0] & (1ULL << (63 - 24)))
1204             have_ETF2 = True;
1205         if (hoststfle[0] & (1ULL << (63 - 30)))
1206             have_ETF3 = True;
1207     }
1208
1209     /* Restore signals */
1210     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
1211     vg_assert(r == 0);
1212     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1213     vg_assert(r == 0);
1214     va = VexArchS390X;
1215
1216     model = VG_(get_machine_model)();
1217
1218     /* If the model is "unknown" don't treat this as an error. Assume
1219        this is a brand-new machine model for which we don't have the
1220        identification yet. Keeping fingers crossed. */
1221
1222     VG_(debugLog)(1, "machine", "machine %d  LDISP %d EIMM %d GIE %d DFP %d "
1223                   "FGX %d STFLE %d ETF2 %d ETF3 %d\n", model, have_LDISP, have_EIMM,
1224                   have_GIE, have_DFP, have_FGX, have_STFLE, have_ETF2, have_ETF3);
1225
1226     vai.hwcaps = model;
1227     if (have_LDISP) {
1228        /* Use long displacement only on machines >= z990. For all other machines
1229           it is millicoded and therefore slow. */
1230        if (model >= VEX_S390X_MODEL_Z990)
1231           vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
1232     }
1233     if (have_EIMM)  vai.hwcaps |= VEX_HWCAPS_S390X_EIMM;
1234     if (have_GIE)   vai.hwcaps |= VEX_HWCAPS_S390X_GIE;
1235     if (have_DFP)   vai.hwcaps |= VEX_HWCAPS_S390X_DFP;
1236     if (have_FGX)   vai.hwcaps |= VEX_HWCAPS_S390X_FGX;
1237     if (have_ETF2)  vai.hwcaps |= VEX_HWCAPS_S390X_ETF2;
1238     if (have_ETF3)  vai.hwcaps |= VEX_HWCAPS_S390X_ETF3;
1239     if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE;
1240
1241     VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps);
1242
1243     return True;
1244   }
1245
1246#elif defined(VGA_arm)
1247   {
1248     /* Same instruction set detection algorithm as for ppc32. */
1249     vki_sigset_t          saved_set, tmp_set;
1250     vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act;
1251     vki_sigaction_toK_t     tmp_sigill_act,   tmp_sigfpe_act;
1252
1253     volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON;
1254     volatile Int archlevel;
1255     Int r;
1256
1257     /* This is a kludge.  Really we ought to back-convert saved_act
1258        into a toK_t using VG_(convert_sigaction_fromK_to_toK), but
1259        since that's a no-op on all ppc64 platforms so far supported,
1260        it's not worth the typing effort.  At least include most basic
1261        sanity check: */
1262     vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t));
1263
1264     VG_(sigemptyset)(&tmp_set);
1265     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
1266     VG_(sigaddset)(&tmp_set, VKI_SIGFPE);
1267
1268     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
1269     vg_assert(r == 0);
1270
1271     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
1272     vg_assert(r == 0);
1273     tmp_sigill_act = saved_sigill_act;
1274
1275     VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act);
1276     tmp_sigfpe_act = saved_sigfpe_act;
1277
1278     /* NODEFER: signal handler does not return (from the kernel's point of
1279        view), hence if it is to successfully catch a signal more than once,
1280        we need the NODEFER flag. */
1281     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
1282     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
1283     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
1284     tmp_sigill_act.ksa_handler = handler_unsup_insn;
1285     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1286
1287     tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND;
1288     tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO;
1289     tmp_sigfpe_act.sa_flags |=  VKI_SA_NODEFER;
1290     tmp_sigfpe_act.ksa_handler = handler_unsup_insn;
1291     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1292
1293     /* VFP insns */
1294     have_VFP = True;
1295     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1296        have_VFP = False;
1297     } else {
1298        __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */
1299     }
1300     /* There are several generation of VFP extension but they differs very
1301        little so for now we will not distinguish them. */
1302     have_VFP2 = have_VFP;
1303     have_VFP3 = have_VFP;
1304
1305     /* NEON insns */
1306     have_NEON = True;
1307     if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1308        have_NEON = False;
1309     } else {
1310        __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */
1311     }
1312
1313     /* ARM architecture level */
1314     archlevel = 5; /* v5 will be base level */
1315     if (archlevel < 7) {
1316        archlevel = 7;
1317        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1318           archlevel = 5;
1319        } else {
1320           __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */
1321        }
1322     }
1323     if (archlevel < 6) {
1324        archlevel = 6;
1325        if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
1326           archlevel = 5;
1327        } else {
1328           __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */
1329        }
1330     }
1331
1332     VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act);
1333     VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act);
1334     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
1335     VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL);
1336     VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
1337
1338     VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n",
1339           archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3,
1340           (Int)have_NEON);
1341
1342     VG_(machine_arm_archlevel) = archlevel;
1343
1344     va = VexArchARM;
1345
1346     vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel);
1347     if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3;
1348     if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2;
1349     if (have_VFP)  vai.hwcaps |= VEX_HWCAPS_ARM_VFP;
1350     if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1351
1352     return True;
1353   }
1354
1355#elif defined(VGA_mips32)
1356   {
1357     va = VexArchMIPS32;
1358     UInt model = VG_(get_machine_model)();
1359     if (model== -1)
1360         return False;
1361
1362     vai.hwcaps = model;
1363     return True;
1364   }
1365
1366#else
1367#  error "Unknown arch"
1368#endif
1369}
1370
1371/* Notify host cpu cache line size. */
1372#if defined(VGA_ppc32)
1373void VG_(machine_ppc32_set_clszB)( Int szB )
1374{
1375   vg_assert(hwcaps_done);
1376
1377   /* Either the value must not have been set yet (zero) or we can
1378      tolerate it being set to the same value multiple times, as the
1379      stack scanning logic in m_main is a bit stupid. */
1380   vg_assert(vai.ppc_cache_line_szB == 0
1381             || vai.ppc_cache_line_szB == szB);
1382
1383   vg_assert(szB == 32 || szB == 64 || szB == 128);
1384   vai.ppc_cache_line_szB = szB;
1385}
1386#endif
1387
1388
1389/* Notify host cpu cache line size. */
1390#if defined(VGA_ppc64)
1391void VG_(machine_ppc64_set_clszB)( Int szB )
1392{
1393   vg_assert(hwcaps_done);
1394
1395   /* Either the value must not have been set yet (zero) or we can
1396      tolerate it being set to the same value multiple times, as the
1397      stack scanning logic in m_main is a bit stupid. */
1398   vg_assert(vai.ppc_cache_line_szB == 0
1399             || vai.ppc_cache_line_szB == szB);
1400
1401   vg_assert(szB == 32 || szB == 64 || szB == 128);
1402   vai.ppc_cache_line_szB = szB;
1403}
1404#endif
1405
1406
1407/* Notify host's ability to handle NEON instructions. */
1408#if defined(VGA_arm)
1409void VG_(machine_arm_set_has_NEON)( Bool has_neon )
1410{
1411   vg_assert(hwcaps_done);
1412   /* There's nothing else we can sanity check. */
1413
1414   if (has_neon) {
1415      vai.hwcaps |= VEX_HWCAPS_ARM_NEON;
1416   } else {
1417      vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON;
1418   }
1419}
1420#endif
1421
1422
1423/* Fetch host cpu info, once established. */
1424void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa,
1425                                   /*OUT*/VexArchInfo* pVai )
1426{
1427   vg_assert(hwcaps_done);
1428   if (pVa)  *pVa  = va;
1429   if (pVai) *pVai = vai;
1430}
1431
1432
1433/* Returns the size of the largest guest register that we will
1434   simulate in this run.  This depends on both the guest architecture
1435   and on the specific capabilities we are simulating for that guest
1436   (eg, AVX or non-AVX ?, for amd64).  Should return either 4, 8, 16
1437   or 32.  General rule: if in doubt, return a value larger than
1438   reality.
1439
1440   This information is needed by Cachegrind and Callgrind to decide
1441   what the minimum cache line size they are prepared to simulate is.
1442   Basically require that the minimum cache line size is at least as
1443   large as the largest register that might get transferred to/from
1444   memory, so as to guarantee that any such transaction can straddle
1445   at most 2 cache lines.
1446*/
1447Int VG_(machine_get_size_of_largest_guest_register) ( void )
1448{
1449   vg_assert(hwcaps_done);
1450   /* Once hwcaps_done is True, we can fish around inside va/vai to
1451      find the information we need. */
1452
1453#  if defined(VGA_x86)
1454   vg_assert(va == VexArchX86);
1455   /* We don't support AVX, so 32 is out.  At the other end, even if
1456      we don't support any SSE, the X87 can generate 10 byte
1457      transfers, so let's say 16 to be on the safe side.  Hence the
1458      answer is always 16. */
1459   return 16;
1460
1461#  elif defined(VGA_amd64)
1462   /* if AVX then 32 else 16 */
1463   return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16;
1464
1465#  elif defined(VGA_ppc32)
1466   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1467   if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16;
1468   if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16;
1469   if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16;
1470   return 8;
1471
1472#  elif defined(VGA_ppc64)
1473   /* 8 if boring; 16 if signs of Altivec or other exotic stuff */
1474   if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16;
1475   if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16;
1476   if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16;
1477   return 8;
1478
1479#  elif defined(VGA_s390x)
1480   return 8;
1481
1482#  elif defined(VGA_arm)
1483   /* Really it depends whether or not we have NEON, but let's just
1484      assume we always do. */
1485   return 16;
1486
1487#  elif defined(VGA_mips32)
1488   /* The guest state implies 4, but that can't really be true, can
1489      it? */
1490   return 8;
1491
1492#  else
1493#    error "Unknown arch"
1494#  endif
1495}
1496
1497
1498// Given a pointer to a function as obtained by "& functionname" in C,
1499// produce a pointer to the actual entry point for the function.
1500void* VG_(fnptr_to_fnentry)( void* f )
1501{
1502#  if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
1503      || defined(VGP_arm_linux)                           \
1504      || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
1505      || defined(VGP_s390x_linux) || defined(VGP_mips32_linux)
1506   return f;
1507#  elif defined(VGP_ppc64_linux)
1508   /* ppc64-linux uses the AIX scheme, in which f is a pointer to a
1509      3-word function descriptor, of which the first word is the entry
1510      address. */
1511   UWord* descr = (UWord*)f;
1512   return (void*)(descr[0]);
1513#  else
1514#    error "Unknown platform"
1515#  endif
1516}
1517
1518/*--------------------------------------------------------------------*/
1519/*--- end                                                          ---*/
1520/*--------------------------------------------------------------------*/
1521