m_machine.c revision 6bd9dc18c043927c1196caba20a327238a179c42
1/*--------------------------------------------------------------------*/ 2/*--- Machine-related stuff. m_machine.c ---*/ 3/*--------------------------------------------------------------------*/ 4 5/* 6 This file is part of Valgrind, a dynamic binary instrumentation 7 framework. 8 9 Copyright (C) 2000-2012 Julian Seward 10 jseward@acm.org 11 12 This program is free software; you can redistribute it and/or 13 modify it under the terms of the GNU General Public License as 14 published by the Free Software Foundation; either version 2 of the 15 License, or (at your option) any later version. 16 17 This program is distributed in the hope that it will be useful, but 18 WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 General Public License for more details. 21 22 You should have received a copy of the GNU General Public License 23 along with this program; if not, write to the Free Software 24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 25 02111-1307, USA. 26 27 The GNU General Public License is contained in the file COPYING. 28*/ 29 30#include "pub_core_basics.h" 31#include "pub_core_vki.h" 32#include "pub_core_libcsetjmp.h" // setjmp facilities 33#include "pub_core_threadstate.h" 34#include "pub_core_libcassert.h" 35#include "pub_core_libcbase.h" 36#include "pub_core_libcfile.h" 37#include "pub_core_mallocfree.h" 38#include "pub_core_machine.h" 39#include "pub_core_cpuid.h" 40#include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE 41#include "pub_core_debuglog.h" 42 43 44#define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR) 45#define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR) 46#define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR) 47 48Addr VG_(get_IP) ( ThreadId tid ) { 49 return INSTR_PTR( VG_(threads)[tid].arch ); 50} 51Addr VG_(get_SP) ( ThreadId tid ) { 52 return STACK_PTR( VG_(threads)[tid].arch ); 53} 54Addr VG_(get_FP) ( ThreadId tid ) { 55 return FRAME_PTR( VG_(threads)[tid].arch ); 56} 57 58void VG_(set_IP) ( ThreadId tid, Addr ip ) { 59 INSTR_PTR( VG_(threads)[tid].arch ) = ip; 60} 61void VG_(set_SP) ( ThreadId tid, Addr sp ) { 62 STACK_PTR( VG_(threads)[tid].arch ) = sp; 63} 64 65void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs, 66 ThreadId tid ) 67{ 68# if defined(VGA_x86) 69 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP; 70 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP; 71 regs->misc.X86.r_ebp 72 = VG_(threads)[tid].arch.vex.guest_EBP; 73# elif defined(VGA_amd64) 74 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP; 75 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP; 76 regs->misc.AMD64.r_rbp 77 = VG_(threads)[tid].arch.vex.guest_RBP; 78# elif defined(VGA_ppc32) 79 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA; 80 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1; 81 regs->misc.PPC32.r_lr 82 = VG_(threads)[tid].arch.vex.guest_LR; 83# elif defined(VGA_ppc64) 84 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA; 85 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1; 86 regs->misc.PPC64.r_lr 87 = VG_(threads)[tid].arch.vex.guest_LR; 88# elif defined(VGA_arm) 89 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T; 90 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13; 91 regs->misc.ARM.r14 92 = VG_(threads)[tid].arch.vex.guest_R14; 93 regs->misc.ARM.r12 94 = VG_(threads)[tid].arch.vex.guest_R12; 95 regs->misc.ARM.r11 96 = VG_(threads)[tid].arch.vex.guest_R11; 97 regs->misc.ARM.r7 98 = VG_(threads)[tid].arch.vex.guest_R7; 99# elif defined(VGA_s390x) 100 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA; 101 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP; 102 regs->misc.S390X.r_fp 103 = VG_(threads)[tid].arch.vex.guest_r11; 104 regs->misc.S390X.r_lr 105 = VG_(threads)[tid].arch.vex.guest_r14; 106# elif defined(VGA_mips32) 107 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 108 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29; 109 regs->misc.MIPS32.r30 110 = VG_(threads)[tid].arch.vex.guest_r30; 111 regs->misc.MIPS32.r31 112 = VG_(threads)[tid].arch.vex.guest_r31; 113 regs->misc.MIPS32.r28 114 = VG_(threads)[tid].arch.vex.guest_r28; 115# else 116# error "Unknown arch" 117# endif 118} 119 120 121void VG_(set_syscall_return_shadows) ( ThreadId tid, 122 /* shadow vals for the result */ 123 UWord s1res, UWord s2res, 124 /* shadow vals for the error val */ 125 UWord s1err, UWord s2err ) 126{ 127# if defined(VGP_x86_linux) 128 VG_(threads)[tid].arch.vex_shadow1.guest_EAX = s1res; 129 VG_(threads)[tid].arch.vex_shadow2.guest_EAX = s2res; 130# elif defined(VGP_amd64_linux) 131 VG_(threads)[tid].arch.vex_shadow1.guest_RAX = s1res; 132 VG_(threads)[tid].arch.vex_shadow2.guest_RAX = s2res; 133# elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) 134 VG_(threads)[tid].arch.vex_shadow1.guest_GPR3 = s1res; 135 VG_(threads)[tid].arch.vex_shadow2.guest_GPR3 = s2res; 136# elif defined(VGP_arm_linux) 137 VG_(threads)[tid].arch.vex_shadow1.guest_R0 = s1res; 138 VG_(threads)[tid].arch.vex_shadow2.guest_R0 = s2res; 139# elif defined(VGO_darwin) 140 // GrP fixme darwin syscalls may return more values (2 registers plus error) 141# elif defined(VGP_s390x_linux) 142 VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res; 143 VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res; 144# elif defined(VGP_mips32_linux) 145 VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res; 146 VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res; 147# else 148# error "Unknown plat" 149# endif 150} 151 152void 153VG_(get_shadow_regs_area) ( ThreadId tid, 154 /*DST*/UChar* dst, 155 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size ) 156{ 157 void* src; 158 ThreadState* tst; 159 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 160 vg_assert(VG_(is_valid_tid)(tid)); 161 // Bounds check 162 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 163 vg_assert(offset + size <= sizeof(VexGuestArchState)); 164 // Copy 165 tst = & VG_(threads)[tid]; 166 src = NULL; 167 switch (shadowNo) { 168 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 169 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 170 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 171 } 172 tl_assert(src != NULL); 173 VG_(memcpy)( dst, src, size); 174} 175 176void 177VG_(set_shadow_regs_area) ( ThreadId tid, 178 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size, 179 /*SRC*/const UChar* src ) 180{ 181 void* dst; 182 ThreadState* tst; 183 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 184 vg_assert(VG_(is_valid_tid)(tid)); 185 // Bounds check 186 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 187 vg_assert(offset + size <= sizeof(VexGuestArchState)); 188 // Copy 189 tst = & VG_(threads)[tid]; 190 dst = NULL; 191 switch (shadowNo) { 192 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 193 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 194 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 195 } 196 tl_assert(dst != NULL); 197 VG_(memcpy)( dst, src, size); 198} 199 200 201static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, 202 const HChar*, Addr)) 203{ 204 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex); 205#if defined(VGA_x86) 206 (*f)(tid, "EAX", vex->guest_EAX); 207 (*f)(tid, "ECX", vex->guest_ECX); 208 (*f)(tid, "EDX", vex->guest_EDX); 209 (*f)(tid, "EBX", vex->guest_EBX); 210 (*f)(tid, "ESI", vex->guest_ESI); 211 (*f)(tid, "EDI", vex->guest_EDI); 212 (*f)(tid, "ESP", vex->guest_ESP); 213 (*f)(tid, "EBP", vex->guest_EBP); 214#elif defined(VGA_amd64) 215 (*f)(tid, "RAX", vex->guest_RAX); 216 (*f)(tid, "RCX", vex->guest_RCX); 217 (*f)(tid, "RDX", vex->guest_RDX); 218 (*f)(tid, "RBX", vex->guest_RBX); 219 (*f)(tid, "RSI", vex->guest_RSI); 220 (*f)(tid, "RDI", vex->guest_RDI); 221 (*f)(tid, "RSP", vex->guest_RSP); 222 (*f)(tid, "RBP", vex->guest_RBP); 223 (*f)(tid, "R8" , vex->guest_R8 ); 224 (*f)(tid, "R9" , vex->guest_R9 ); 225 (*f)(tid, "R10", vex->guest_R10); 226 (*f)(tid, "R11", vex->guest_R11); 227 (*f)(tid, "R12", vex->guest_R12); 228 (*f)(tid, "R13", vex->guest_R13); 229 (*f)(tid, "R14", vex->guest_R14); 230 (*f)(tid, "R15", vex->guest_R15); 231#elif defined(VGA_ppc32) || defined(VGA_ppc64) 232 (*f)(tid, "GPR0" , vex->guest_GPR0 ); 233 (*f)(tid, "GPR1" , vex->guest_GPR1 ); 234 (*f)(tid, "GPR2" , vex->guest_GPR2 ); 235 (*f)(tid, "GPR3" , vex->guest_GPR3 ); 236 (*f)(tid, "GPR4" , vex->guest_GPR4 ); 237 (*f)(tid, "GPR5" , vex->guest_GPR5 ); 238 (*f)(tid, "GPR6" , vex->guest_GPR6 ); 239 (*f)(tid, "GPR7" , vex->guest_GPR7 ); 240 (*f)(tid, "GPR8" , vex->guest_GPR8 ); 241 (*f)(tid, "GPR9" , vex->guest_GPR9 ); 242 (*f)(tid, "GPR10", vex->guest_GPR10); 243 (*f)(tid, "GPR11", vex->guest_GPR11); 244 (*f)(tid, "GPR12", vex->guest_GPR12); 245 (*f)(tid, "GPR13", vex->guest_GPR13); 246 (*f)(tid, "GPR14", vex->guest_GPR14); 247 (*f)(tid, "GPR15", vex->guest_GPR15); 248 (*f)(tid, "GPR16", vex->guest_GPR16); 249 (*f)(tid, "GPR17", vex->guest_GPR17); 250 (*f)(tid, "GPR18", vex->guest_GPR18); 251 (*f)(tid, "GPR19", vex->guest_GPR19); 252 (*f)(tid, "GPR20", vex->guest_GPR20); 253 (*f)(tid, "GPR21", vex->guest_GPR21); 254 (*f)(tid, "GPR22", vex->guest_GPR22); 255 (*f)(tid, "GPR23", vex->guest_GPR23); 256 (*f)(tid, "GPR24", vex->guest_GPR24); 257 (*f)(tid, "GPR25", vex->guest_GPR25); 258 (*f)(tid, "GPR26", vex->guest_GPR26); 259 (*f)(tid, "GPR27", vex->guest_GPR27); 260 (*f)(tid, "GPR28", vex->guest_GPR28); 261 (*f)(tid, "GPR29", vex->guest_GPR29); 262 (*f)(tid, "GPR30", vex->guest_GPR30); 263 (*f)(tid, "GPR31", vex->guest_GPR31); 264 (*f)(tid, "CTR" , vex->guest_CTR ); 265 (*f)(tid, "LR" , vex->guest_LR ); 266#elif defined(VGA_arm) 267 (*f)(tid, "R0" , vex->guest_R0 ); 268 (*f)(tid, "R1" , vex->guest_R1 ); 269 (*f)(tid, "R2" , vex->guest_R2 ); 270 (*f)(tid, "R3" , vex->guest_R3 ); 271 (*f)(tid, "R4" , vex->guest_R4 ); 272 (*f)(tid, "R5" , vex->guest_R5 ); 273 (*f)(tid, "R6" , vex->guest_R6 ); 274 (*f)(tid, "R8" , vex->guest_R8 ); 275 (*f)(tid, "R9" , vex->guest_R9 ); 276 (*f)(tid, "R10", vex->guest_R10); 277 (*f)(tid, "R11", vex->guest_R11); 278 (*f)(tid, "R12", vex->guest_R12); 279 (*f)(tid, "R13", vex->guest_R13); 280 (*f)(tid, "R14", vex->guest_R14); 281#elif defined(VGA_s390x) 282 (*f)(tid, "r0" , vex->guest_r0 ); 283 (*f)(tid, "r1" , vex->guest_r1 ); 284 (*f)(tid, "r2" , vex->guest_r2 ); 285 (*f)(tid, "r3" , vex->guest_r3 ); 286 (*f)(tid, "r4" , vex->guest_r4 ); 287 (*f)(tid, "r5" , vex->guest_r5 ); 288 (*f)(tid, "r6" , vex->guest_r6 ); 289 (*f)(tid, "r7" , vex->guest_r7 ); 290 (*f)(tid, "r8" , vex->guest_r8 ); 291 (*f)(tid, "r9" , vex->guest_r9 ); 292 (*f)(tid, "r10", vex->guest_r10); 293 (*f)(tid, "r11", vex->guest_r11); 294 (*f)(tid, "r12", vex->guest_r12); 295 (*f)(tid, "r13", vex->guest_r13); 296 (*f)(tid, "r14", vex->guest_r14); 297 (*f)(tid, "r15", vex->guest_r15); 298#elif defined(VGA_mips32) 299 (*f)(tid, "r0" , vex->guest_r0 ); 300 (*f)(tid, "r1" , vex->guest_r1 ); 301 (*f)(tid, "r2" , vex->guest_r2 ); 302 (*f)(tid, "r3" , vex->guest_r3 ); 303 (*f)(tid, "r4" , vex->guest_r4 ); 304 (*f)(tid, "r5" , vex->guest_r5 ); 305 (*f)(tid, "r6" , vex->guest_r6 ); 306 (*f)(tid, "r7" , vex->guest_r7 ); 307 (*f)(tid, "r8" , vex->guest_r8 ); 308 (*f)(tid, "r9" , vex->guest_r9 ); 309 (*f)(tid, "r10", vex->guest_r10); 310 (*f)(tid, "r11", vex->guest_r11); 311 (*f)(tid, "r12", vex->guest_r12); 312 (*f)(tid, "r13", vex->guest_r13); 313 (*f)(tid, "r14", vex->guest_r14); 314 (*f)(tid, "r15", vex->guest_r15); 315 (*f)(tid, "r16", vex->guest_r16); 316 (*f)(tid, "r17", vex->guest_r17); 317 (*f)(tid, "r18", vex->guest_r18); 318 (*f)(tid, "r19", vex->guest_r19); 319 (*f)(tid, "r20", vex->guest_r20); 320 (*f)(tid, "r21", vex->guest_r21); 321 (*f)(tid, "r22", vex->guest_r22); 322 (*f)(tid, "r23", vex->guest_r23); 323 (*f)(tid, "r24", vex->guest_r24); 324 (*f)(tid, "r25", vex->guest_r25); 325 (*f)(tid, "r26", vex->guest_r26); 326 (*f)(tid, "r27", vex->guest_r27); 327 (*f)(tid, "r28", vex->guest_r28); 328 (*f)(tid, "r29", vex->guest_r29); 329 (*f)(tid, "r30", vex->guest_r30); 330 (*f)(tid, "r31", vex->guest_r31); 331#else 332# error Unknown arch 333#endif 334} 335 336 337void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord)) 338{ 339 ThreadId tid; 340 341 for (tid = 1; tid < VG_N_THREADS; tid++) { 342 if (VG_(is_valid_tid)(tid)) { 343 apply_to_GPs_of_tid(tid, f); 344 } 345 } 346} 347 348void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid) 349{ 350 *tid = (ThreadId)(-1); 351} 352 353Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid, 354 /*OUT*/Addr* stack_min, 355 /*OUT*/Addr* stack_max) 356{ 357 ThreadId i; 358 for (i = (*tid)+1; i < VG_N_THREADS; i++) { 359 if (i == VG_INVALID_THREADID) 360 continue; 361 if (VG_(threads)[i].status != VgTs_Empty) { 362 *tid = i; 363 *stack_min = VG_(get_SP)(i); 364 *stack_max = VG_(threads)[i].client_stack_highest_word; 365 return True; 366 } 367 } 368 return False; 369} 370 371Addr VG_(thread_get_stack_max)(ThreadId tid) 372{ 373 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 374 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 375 return VG_(threads)[tid].client_stack_highest_word; 376} 377 378SizeT VG_(thread_get_stack_size)(ThreadId tid) 379{ 380 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 381 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 382 return VG_(threads)[tid].client_stack_szB; 383} 384 385Addr VG_(thread_get_altstack_min)(ThreadId tid) 386{ 387 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 388 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 389 return (Addr)VG_(threads)[tid].altstack.ss_sp; 390} 391 392SizeT VG_(thread_get_altstack_size)(ThreadId tid) 393{ 394 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 395 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 396 return VG_(threads)[tid].altstack.ss_size; 397} 398 399//------------------------------------------------------------- 400/* Details about the capabilities of the underlying (host) CPU. These 401 details are acquired by (1) enquiring with the CPU at startup, or 402 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache 403 line size). It's a bit nasty in the sense that there's no obvious 404 way to stop uses of some of this info before it's ready to go. 405 See pub_core_machine.h for more information about that. 406 407 VG_(machine_get_hwcaps) may use signals (although it attempts to 408 leave signal state unchanged) and therefore should only be 409 called before m_main sets up the client's signal state. 410*/ 411 412/* --------- State --------- */ 413static Bool hwcaps_done = False; 414 415/* --- all archs --- */ 416static VexArch va = VexArch_INVALID; 417static VexArchInfo vai; 418 419#if defined(VGA_x86) 420UInt VG_(machine_x86_have_mxcsr) = 0; 421#endif 422#if defined(VGA_ppc32) 423UInt VG_(machine_ppc32_has_FP) = 0; 424UInt VG_(machine_ppc32_has_VMX) = 0; 425#endif 426#if defined(VGA_ppc64) 427ULong VG_(machine_ppc64_has_VMX) = 0; 428#endif 429#if defined(VGA_arm) 430Int VG_(machine_arm_archlevel) = 4; 431#endif 432 433/* fixs390: anything for s390x here ? */ 434 435/* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL 436 testing, so we need a VG_MINIMAL_JMP_BUF. */ 437#if defined(VGA_ppc32) || defined(VGA_ppc64) \ 438 || defined(VGA_arm) || defined(VGA_s390x) 439#include "pub_tool_libcsetjmp.h" 440static VG_MINIMAL_JMP_BUF(env_unsup_insn); 441static void handler_unsup_insn ( Int x ) { 442 VG_MINIMAL_LONGJMP(env_unsup_insn); 443} 444#endif 445 446 447/* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc 448 * handlers are installed. Determines the the sizes affected by dcbz 449 * and dcbzl instructions and updates the given VexArchInfo structure 450 * accordingly. 451 * 452 * Not very defensive: assumes that as long as the dcbz/dcbzl 453 * instructions don't raise a SIGILL, that they will zero an aligned, 454 * contiguous block of memory of a sensible size. */ 455#if defined(VGA_ppc32) || defined(VGA_ppc64) 456static void find_ppc_dcbz_sz(VexArchInfo *arch_info) 457{ 458 Int dcbz_szB = 0; 459 Int dcbzl_szB; 460# define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */ 461 char test_block[4*MAX_DCBZL_SZB]; 462 char *aligned = test_block; 463 Int i; 464 465 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */ 466 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1)); 467 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]); 468 469 /* dcbz often clears 32B, although sometimes whatever the native cache 470 * block size is */ 471 VG_(memset)(test_block, 0xff, sizeof(test_block)); 472 __asm__ __volatile__("dcbz 0,%0" 473 : /*out*/ 474 : "r" (aligned) /*in*/ 475 : "memory" /*clobber*/); 476 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) { 477 if (!test_block[i]) 478 ++dcbz_szB; 479 } 480 vg_assert(dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128); 481 482 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */ 483 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 484 dcbzl_szB = 0; /* indicates unsupported */ 485 } 486 else { 487 VG_(memset)(test_block, 0xff, sizeof(test_block)); 488 /* some older assemblers won't understand the dcbzl instruction 489 * variant, so we directly emit the instruction ourselves */ 490 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/ 491 : /*out*/ 492 : "r" (aligned) /*in*/ 493 : "memory", "r9" /*clobber*/); 494 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) { 495 if (!test_block[i]) 496 ++dcbzl_szB; 497 } 498 vg_assert(dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128); 499 } 500 501 arch_info->ppc_dcbz_szB = dcbz_szB; 502 arch_info->ppc_dcbzl_szB = dcbzl_szB; 503 504 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n", 505 dcbz_szB, dcbzl_szB); 506# undef MAX_DCBZL_SZB 507} 508#endif /* defined(VGA_ppc32) || defined(VGA_ppc64) */ 509 510#ifdef VGA_s390x 511 512/* Read /proc/cpuinfo. Look for lines like these 513 514 processor 0: version = FF, identification = 0117C9, machine = 2064 515 516 and return the machine model. If the machine model could not be determined 517 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */ 518 519static UInt VG_(get_machine_model)(void) 520{ 521 static struct model_map { 522 HChar name[5]; 523 UInt id; 524 } model_map[] = { 525 { "2064", VEX_S390X_MODEL_Z900 }, 526 { "2066", VEX_S390X_MODEL_Z800 }, 527 { "2084", VEX_S390X_MODEL_Z990 }, 528 { "2086", VEX_S390X_MODEL_Z890 }, 529 { "2094", VEX_S390X_MODEL_Z9_EC }, 530 { "2096", VEX_S390X_MODEL_Z9_BC }, 531 { "2097", VEX_S390X_MODEL_Z10_EC }, 532 { "2098", VEX_S390X_MODEL_Z10_BC }, 533 { "2817", VEX_S390X_MODEL_Z196 }, 534 { "2818", VEX_S390X_MODEL_Z114 }, 535 { "2827", VEX_S390X_MODEL_ZEC12 }, 536 }; 537 538 Int model, n, fh; 539 SysRes fd; 540 SizeT num_bytes, file_buf_size; 541 HChar *p, *m, *model_name, *file_buf; 542 543 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 544 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 545 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN; 546 547 fh = sr_Res(fd); 548 549 /* Determine the size of /proc/cpuinfo. 550 Work around broken-ness in /proc file system implementation. 551 fstat returns a zero size for /proc/cpuinfo although it is 552 claimed to be a regular file. */ 553 num_bytes = 0; 554 file_buf_size = 1000; 555 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 556 while (42) { 557 n = VG_(read)(fh, file_buf, file_buf_size); 558 if (n < 0) break; 559 560 num_bytes += n; 561 if (n < file_buf_size) break; /* reached EOF */ 562 } 563 564 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 565 566 if (num_bytes > file_buf_size) { 567 VG_(free)( file_buf ); 568 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 569 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 570 n = VG_(read)( fh, file_buf, num_bytes ); 571 if (n < 0) num_bytes = 0; 572 } 573 574 file_buf[num_bytes] = '\0'; 575 VG_(close)(fh); 576 577 /* Parse file */ 578 model = VEX_S390X_MODEL_UNKNOWN; 579 for (p = file_buf; *p; ++p) { 580 /* Beginning of line */ 581 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue; 582 583 m = VG_(strstr)( p, "machine" ); 584 if (m == NULL) continue; 585 586 p = m + sizeof "machine" - 1; 587 while ( VG_(isspace)( *p ) || *p == '=') { 588 if (*p == '\n') goto next_line; 589 ++p; 590 } 591 592 model_name = p; 593 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) { 594 struct model_map *mm = model_map + n; 595 SizeT len = VG_(strlen)( mm->name ); 596 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 && 597 VG_(isspace)( model_name[len] )) { 598 if (mm->id < model) model = mm->id; 599 p = model_name + len; 600 break; 601 } 602 } 603 /* Skip until end-of-line */ 604 while (*p != '\n') 605 ++p; 606 next_line: ; 607 } 608 609 VG_(free)( file_buf ); 610 VG_(debugLog)(1, "machine", "model = %s\n", 611 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN" 612 : model_map[model].name); 613 return model; 614} 615 616#endif /* VGA_s390x */ 617 618#ifdef VGA_mips32 619 620/* Read /proc/cpuinfo and return the machine model. */ 621static UInt VG_(get_machine_model)(void) 622{ 623 char *search_MIPS_str = "MIPS"; 624 char *search_Broadcom_str = "Broadcom"; 625 Int n, fh; 626 SysRes fd; 627 SizeT num_bytes, file_buf_size; 628 HChar *file_buf; 629 630 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 631 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 632 if ( sr_isError(fd) ) return -1; 633 634 fh = sr_Res(fd); 635 636 /* Determine the size of /proc/cpuinfo. 637 Work around broken-ness in /proc file system implementation. 638 fstat returns a zero size for /proc/cpuinfo although it is 639 claimed to be a regular file. */ 640 num_bytes = 0; 641 file_buf_size = 1000; 642 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 643 while (42) { 644 n = VG_(read)(fh, file_buf, file_buf_size); 645 if (n < 0) break; 646 647 num_bytes += n; 648 if (n < file_buf_size) break; /* reached EOF */ 649 } 650 651 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 652 653 if (num_bytes > file_buf_size) { 654 VG_(free)( file_buf ); 655 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 656 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 657 n = VG_(read)( fh, file_buf, num_bytes ); 658 if (n < 0) num_bytes = 0; 659 } 660 661 file_buf[num_bytes] = '\0'; 662 VG_(close)(fh); 663 664 /* Parse file */ 665 if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL) 666 return VEX_PRID_COMP_BROADCOM; 667 if (VG_(strstr) (file_buf, search_MIPS_str) != NULL) 668 return VEX_PRID_COMP_MIPS; 669 670 /* Did not find string in the proc file. */ 671 return -1; 672} 673 674#endif 675 676/* Determine what insn set and insn set variant the host has, and 677 record it. To be called once at system startup. Returns False if 678 this a CPU incapable of running Valgrind. 679 Also determine information about the caches on this host. */ 680 681Bool VG_(machine_get_hwcaps)( void ) 682{ 683 vg_assert(hwcaps_done == False); 684 hwcaps_done = True; 685 686 // Whack default settings into vai, so that we only need to fill in 687 // any interesting bits. 688 LibVEX_default_VexArchInfo(&vai); 689 690#if defined(VGA_x86) 691 { Bool have_sse1, have_sse2, have_cx8, have_lzcnt; 692 UInt eax, ebx, ecx, edx, max_extended; 693 HChar vstr[13]; 694 vstr[0] = 0; 695 696 if (!VG_(has_cpuid)()) 697 /* we can't do cpuid at all. Give up. */ 698 return False; 699 700 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 701 if (eax < 1) 702 /* we can't ask for cpuid(x) for x > 0. Give up. */ 703 return False; 704 705 /* Get processor ID string, and max basic/extended index 706 values. */ 707 VG_(memcpy)(&vstr[0], &ebx, 4); 708 VG_(memcpy)(&vstr[4], &edx, 4); 709 VG_(memcpy)(&vstr[8], &ecx, 4); 710 vstr[12] = 0; 711 712 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 713 max_extended = eax; 714 715 /* get capabilities bits into edx */ 716 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 717 718 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */ 719 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */ 720 721 /* cmpxchg8b is a minimum requirement now; if we don't have it we 722 must simply give up. But all CPUs since Pentium-I have it, so 723 that doesn't seem like much of a restriction. */ 724 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 725 if (!have_cx8) 726 return False; 727 728 /* Figure out if this is an AMD that can do LZCNT. */ 729 have_lzcnt = False; 730 if (0 == VG_(strcmp)(vstr, "AuthenticAMD") 731 && max_extended >= 0x80000001) { 732 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 733 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 734 } 735 736 va = VexArchX86; 737 if (have_sse2 && have_sse1) { 738 vai.hwcaps = VEX_HWCAPS_X86_SSE1; 739 vai.hwcaps |= VEX_HWCAPS_X86_SSE2; 740 if (have_lzcnt) 741 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT; 742 VG_(machine_x86_have_mxcsr) = 1; 743 } else if (have_sse1) { 744 vai.hwcaps = VEX_HWCAPS_X86_SSE1; 745 VG_(machine_x86_have_mxcsr) = 1; 746 } else { 747 vai.hwcaps = 0; /*baseline - no sse at all*/ 748 VG_(machine_x86_have_mxcsr) = 0; 749 } 750 751 VG_(machine_get_cache_info)(&vai); 752 753 return True; 754 } 755 756#elif defined(VGA_amd64) 757 { Bool have_sse3, have_cx8, have_cx16; 758 Bool have_lzcnt, have_avx /*, have_fma*/; 759 UInt eax, ebx, ecx, edx, max_extended; 760 HChar vstr[13]; 761 vstr[0] = 0; 762 763 if (!VG_(has_cpuid)()) 764 /* we can't do cpuid at all. Give up. */ 765 return False; 766 767 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 768 if (eax < 1) 769 /* we can't ask for cpuid(x) for x > 0. Give up. */ 770 return False; 771 772 /* Get processor ID string, and max basic/extended index 773 values. */ 774 VG_(memcpy)(&vstr[0], &ebx, 4); 775 VG_(memcpy)(&vstr[4], &edx, 4); 776 VG_(memcpy)(&vstr[8], &ecx, 4); 777 vstr[12] = 0; 778 779 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 780 max_extended = eax; 781 782 /* get capabilities bits into edx */ 783 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 784 785 // we assume that SSE1 and SSE2 are available by default 786 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ 787 // ssse3 is ecx:9 788 // sse41 is ecx:19 789 // sse42 is ecx:20 790 791 // osxsave is ecx:27 792 // avx is ecx:28 793 // fma is ecx:12 794 have_avx = False; 795 /* have_fma = False; */ 796 if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) { 797 /* processor supports AVX instructions and XGETBV is enabled 798 by OS */ 799 ULong w; 800 __asm__ __volatile__("movq $0,%%rcx ; " 801 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */ 802 "movq %%rax,%0" 803 :/*OUT*/"=r"(w) :/*IN*/ 804 :/*TRASH*/"rdx","rcx"); 805 if ((w & 6) == 6) { 806 /* OS has enabled both XMM and YMM state support */ 807 have_avx = True; 808 /* have_fma = (ecx & (1<<12)) != 0; */ 809 /* have_fma: Probably correct, but gcc complains due to 810 unusedness. &*/ 811 } 812 } 813 814 815 /* cmpxchg8b is a minimum requirement now; if we don't have it we 816 must simply give up. But all CPUs since Pentium-I have it, so 817 that doesn't seem like much of a restriction. */ 818 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 819 if (!have_cx8) 820 return False; 821 822 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */ 823 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */ 824 825 /* Figure out if this is an AMD that can do LZCNT. */ 826 have_lzcnt = False; 827 if (0 == VG_(strcmp)(vstr, "AuthenticAMD") 828 && max_extended >= 0x80000001) { 829 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 830 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 831 } 832 833 va = VexArchAMD64; 834 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0) 835 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0) 836 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0) 837 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0); 838 839 VG_(machine_get_cache_info)(&vai); 840 841 return True; 842 } 843 844#elif defined(VGA_ppc32) 845 { 846 /* Find out which subset of the ppc32 instruction set is supported by 847 verifying whether various ppc32 instructions generate a SIGILL 848 or a SIGFPE. An alternative approach is to check the AT_HWCAP and 849 AT_PLATFORM entries in the ELF auxiliary table -- see also 850 the_iifii.client_auxv in m_main.c. 851 */ 852 vki_sigset_t saved_set, tmp_set; 853 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 854 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 855 856 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 857 Int r; 858 859 /* This is a kludge. Really we ought to back-convert saved_act 860 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 861 since that's a no-op on all ppc32 platforms so far supported, 862 it's not worth the typing effort. At least include most basic 863 sanity check: */ 864 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 865 866 VG_(sigemptyset)(&tmp_set); 867 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 868 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 869 870 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 871 vg_assert(r == 0); 872 873 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 874 vg_assert(r == 0); 875 tmp_sigill_act = saved_sigill_act; 876 877 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 878 vg_assert(r == 0); 879 tmp_sigfpe_act = saved_sigfpe_act; 880 881 /* NODEFER: signal handler does not return (from the kernel's point of 882 view), hence if it is to successfully catch a signal more than once, 883 we need the NODEFER flag. */ 884 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 885 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 886 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 887 tmp_sigill_act.ksa_handler = handler_unsup_insn; 888 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 889 vg_assert(r == 0); 890 891 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 892 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 893 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 894 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 895 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 896 vg_assert(r == 0); 897 898 /* standard FP insns */ 899 have_F = True; 900 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 901 have_F = False; 902 } else { 903 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */ 904 } 905 906 /* Altivec insns */ 907 have_V = True; 908 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 909 have_V = False; 910 } else { 911 /* Unfortunately some older assemblers don't speak Altivec (or 912 choose not to), so to be safe we directly emit the 32-bit 913 word corresponding to "vor 0,0,0". This fixes a build 914 problem that happens on Debian 3.1 (ppc32), and probably 915 various other places. */ 916 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 917 } 918 919 /* General-Purpose optional (fsqrt, fsqrts) */ 920 have_FX = True; 921 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 922 have_FX = False; 923 } else { 924 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */ 925 } 926 927 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 928 have_GX = True; 929 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 930 have_GX = False; 931 } else { 932 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */ 933 } 934 935 /* VSX support implies Power ISA 2.06 */ 936 have_VX = True; 937 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 938 have_VX = False; 939 } else { 940 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 941 } 942 943 /* Check for Decimal Floating Point (DFP) support. */ 944 have_DFP = True; 945 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 946 have_DFP = False; 947 } else { 948 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 949 } 950 951 /* determine dcbz/dcbzl sizes while we still have the signal 952 * handlers registered */ 953 find_ppc_dcbz_sz(&vai); 954 955 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 956 vg_assert(r == 0); 957 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 958 vg_assert(r == 0); 959 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 960 vg_assert(r == 0); 961 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n", 962 (Int)have_F, (Int)have_V, (Int)have_FX, 963 (Int)have_GX, (Int)have_VX, (Int)have_DFP); 964 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */ 965 if (have_V && !have_F) 966 have_V = False; 967 if (have_FX && !have_F) 968 have_FX = False; 969 if (have_GX && !have_F) 970 have_GX = False; 971 972 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0; 973 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0; 974 975 va = VexArchPPC32; 976 977 vai.hwcaps = 0; 978 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F; 979 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V; 980 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX; 981 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX; 982 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX; 983 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP; 984 985 VG_(machine_get_cache_info)(&vai); 986 987 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be 988 called before we're ready to go. */ 989 return True; 990 } 991 992#elif defined(VGA_ppc64) 993 { 994 /* Same instruction set detection algorithm as for ppc32. */ 995 vki_sigset_t saved_set, tmp_set; 996 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 997 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 998 999 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 1000 Int r; 1001 1002 /* This is a kludge. Really we ought to back-convert saved_act 1003 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1004 since that's a no-op on all ppc64 platforms so far supported, 1005 it's not worth the typing effort. At least include most basic 1006 sanity check: */ 1007 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1008 1009 VG_(sigemptyset)(&tmp_set); 1010 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1011 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1012 1013 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1014 vg_assert(r == 0); 1015 1016 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1017 vg_assert(r == 0); 1018 tmp_sigill_act = saved_sigill_act; 1019 1020 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1021 tmp_sigfpe_act = saved_sigfpe_act; 1022 1023 /* NODEFER: signal handler does not return (from the kernel's point of 1024 view), hence if it is to successfully catch a signal more than once, 1025 we need the NODEFER flag. */ 1026 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1027 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1028 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1029 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1030 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1031 1032 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1033 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1034 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1035 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1036 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1037 1038 /* standard FP insns */ 1039 have_F = True; 1040 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1041 have_F = False; 1042 } else { 1043 __asm__ __volatile__("fmr 0,0"); 1044 } 1045 1046 /* Altivec insns */ 1047 have_V = True; 1048 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1049 have_V = False; 1050 } else { 1051 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 1052 } 1053 1054 /* General-Purpose optional (fsqrt, fsqrts) */ 1055 have_FX = True; 1056 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1057 have_FX = False; 1058 } else { 1059 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/ 1060 } 1061 1062 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 1063 have_GX = True; 1064 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1065 have_GX = False; 1066 } else { 1067 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/ 1068 } 1069 1070 /* VSX support implies Power ISA 2.06 */ 1071 have_VX = True; 1072 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1073 have_VX = False; 1074 } else { 1075 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 1076 } 1077 1078 /* Check for Decimal Floating Point (DFP) support. */ 1079 have_DFP = True; 1080 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1081 have_DFP = False; 1082 } else { 1083 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 1084 } 1085 1086 /* determine dcbz/dcbzl sizes while we still have the signal 1087 * handlers registered */ 1088 find_ppc_dcbz_sz(&vai); 1089 1090 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1091 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 1092 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1093 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d\n", 1094 (Int)have_F, (Int)have_V, (Int)have_FX, 1095 (Int)have_GX, (Int)have_VX, (Int)have_DFP); 1096 /* on ppc64, if we don't even have FP, just give up. */ 1097 if (!have_F) 1098 return False; 1099 1100 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0; 1101 1102 va = VexArchPPC64; 1103 1104 vai.hwcaps = 0; 1105 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V; 1106 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX; 1107 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX; 1108 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX; 1109 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP; 1110 1111 VG_(machine_get_cache_info)(&vai); 1112 1113 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be 1114 called before we're ready to go. */ 1115 return True; 1116 } 1117 1118#elif defined(VGA_s390x) 1119 { 1120 /* Instruction set detection code borrowed from ppc above. */ 1121 vki_sigset_t saved_set, tmp_set; 1122 vki_sigaction_fromK_t saved_sigill_act; 1123 vki_sigaction_toK_t tmp_sigill_act; 1124 1125 volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX; 1126 volatile Bool have_STFLE, have_ETF2, have_ETF3, have_STCKF, have_FPEXT; 1127 Int r, model; 1128 1129 /* Unblock SIGILL and stash away the old action for that signal */ 1130 VG_(sigemptyset)(&tmp_set); 1131 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1132 1133 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1134 vg_assert(r == 0); 1135 1136 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1137 vg_assert(r == 0); 1138 tmp_sigill_act = saved_sigill_act; 1139 1140 /* NODEFER: signal handler does not return (from the kernel's point of 1141 view), hence if it is to successfully catch a signal more than once, 1142 we need the NODEFER flag. */ 1143 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1144 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1145 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1146 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1147 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1148 1149 /* Determine hwcaps. Note, we cannot use the stfle insn because it 1150 is not supported on z900. */ 1151 1152 have_LDISP = True; 1153 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1154 have_LDISP = False; 1155 } else { 1156 /* BASR loads the address of the next insn into r1. Needed to avoid 1157 a segfault in XY. */ 1158 __asm__ __volatile__("basr %%r1,%%r0\n\t" 1159 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */ 1160 ".short 0x0057" : : : "r0", "r1", "cc", "memory"); 1161 } 1162 1163 have_EIMM = True; 1164 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1165 have_EIMM = False; 1166 } else { 1167 __asm__ __volatile__(".long 0xc0090000\n\t" /* iilf r0,0 */ 1168 ".short 0x0000" : : : "r0", "memory"); 1169 } 1170 1171 have_GIE = True; 1172 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1173 have_GIE = False; 1174 } else { 1175 __asm__ __volatile__(".long 0xc2010000\n\t" /* msfi r0,0 */ 1176 ".short 0x0000" : : : "r0", "memory"); 1177 } 1178 1179 have_DFP = True; 1180 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1181 have_DFP = False; 1182 } else { 1183 __asm__ __volatile__(".long 0xb3d20000" 1184 : : : "r0", "cc", "memory"); /* adtr r0,r0,r0 */ 1185 } 1186 1187 have_FGX = True; 1188 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1189 have_FGX = False; 1190 } else { 1191 __asm__ __volatile__(".long 0xb3cd0000" : : : "r0"); /* lgdr r0,f0 */ 1192 } 1193 1194 /* Detect presence of certain facilities using the STFLE insn. 1195 Note, that these facilities were introduced at the same time or later 1196 as STFLE, so the absence of STLFE implies the absence of the facility 1197 we're trying to detect. */ 1198 have_STFLE = True; 1199 have_ETF2 = False; 1200 have_ETF3 = False; 1201 have_STCKF = False; 1202 have_FPEXT = False; 1203 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1204 have_STFLE = False; 1205 } else { 1206 ULong hoststfle[1]; 1207 register ULong reg0 asm("0") = 0; /* one double word available */ 1208 1209 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */ 1210 : "=m" (hoststfle), "+d"(reg0) 1211 : : "cc", "memory"); 1212 if (hoststfle[0] & (1ULL << (63 - 24))) 1213 have_ETF2 = True; 1214 if (hoststfle[0] & (1ULL << (63 - 30))) 1215 have_ETF3 = True; 1216 if (hoststfle[0] & (1ULL << (63 - 25))) 1217 have_STCKF = True; 1218 if (hoststfle[0] & (1ULL << (63 - 37))) 1219 have_FPEXT = True; 1220 } 1221 1222 /* Restore signals */ 1223 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1224 vg_assert(r == 0); 1225 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1226 vg_assert(r == 0); 1227 va = VexArchS390X; 1228 1229 model = VG_(get_machine_model)(); 1230 1231 /* If the model is "unknown" don't treat this as an error. Assume 1232 this is a brand-new machine model for which we don't have the 1233 identification yet. Keeping fingers crossed. */ 1234 1235 VG_(debugLog)(1, "machine", "machine %d LDISP %d EIMM %d GIE %d DFP %d " 1236 "FGX %d STFLE %d ETF2 %d ETF3 %d STCKF %d\n", 1237 model, have_LDISP, have_EIMM, have_GIE, have_DFP, have_FGX, 1238 have_STFLE, have_ETF2, have_ETF3, have_STCKF); 1239 1240 vai.hwcaps = model; 1241 if (have_LDISP) { 1242 /* Use long displacement only on machines >= z990. For all other machines 1243 it is millicoded and therefore slow. */ 1244 if (model >= VEX_S390X_MODEL_Z990) 1245 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP; 1246 } 1247 if (have_EIMM) vai.hwcaps |= VEX_HWCAPS_S390X_EIMM; 1248 if (have_GIE) vai.hwcaps |= VEX_HWCAPS_S390X_GIE; 1249 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_S390X_DFP; 1250 if (have_FGX) vai.hwcaps |= VEX_HWCAPS_S390X_FGX; 1251 if (have_ETF2) vai.hwcaps |= VEX_HWCAPS_S390X_ETF2; 1252 if (have_ETF3) vai.hwcaps |= VEX_HWCAPS_S390X_ETF3; 1253 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE; 1254 if (have_STCKF) vai.hwcaps |= VEX_HWCAPS_S390X_STCKF; 1255 if (have_FPEXT) vai.hwcaps |= VEX_HWCAPS_S390X_FPEXT; 1256 1257 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps); 1258 1259 VG_(machine_get_cache_info)(&vai); 1260 1261 return True; 1262 } 1263 1264#elif defined(VGA_arm) 1265 { 1266 /* Same instruction set detection algorithm as for ppc32. */ 1267 vki_sigset_t saved_set, tmp_set; 1268 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1269 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1270 1271 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON; 1272 volatile Int archlevel; 1273 Int r; 1274 1275 /* This is a kludge. Really we ought to back-convert saved_act 1276 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1277 since that's a no-op on all ppc64 platforms so far supported, 1278 it's not worth the typing effort. At least include most basic 1279 sanity check: */ 1280 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1281 1282 VG_(sigemptyset)(&tmp_set); 1283 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1284 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1285 1286 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1287 vg_assert(r == 0); 1288 1289 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1290 vg_assert(r == 0); 1291 tmp_sigill_act = saved_sigill_act; 1292 1293 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1294 tmp_sigfpe_act = saved_sigfpe_act; 1295 1296 /* NODEFER: signal handler does not return (from the kernel's point of 1297 view), hence if it is to successfully catch a signal more than once, 1298 we need the NODEFER flag. */ 1299 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1300 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1301 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1302 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1303 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1304 1305 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1306 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1307 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1308 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1309 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1310 1311 /* VFP insns */ 1312 have_VFP = True; 1313 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1314 have_VFP = False; 1315 } else { 1316 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */ 1317 } 1318 /* There are several generation of VFP extension but they differs very 1319 little so for now we will not distinguish them. */ 1320 have_VFP2 = have_VFP; 1321 have_VFP3 = have_VFP; 1322 1323 /* NEON insns */ 1324 have_NEON = True; 1325 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1326 have_NEON = False; 1327 } else { 1328 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */ 1329 } 1330 1331 /* ARM architecture level */ 1332 archlevel = 5; /* v5 will be base level */ 1333 if (archlevel < 7) { 1334 archlevel = 7; 1335 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1336 archlevel = 5; 1337 } else { 1338 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */ 1339 } 1340 } 1341 if (archlevel < 6) { 1342 archlevel = 6; 1343 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1344 archlevel = 5; 1345 } else { 1346 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */ 1347 } 1348 } 1349 1350 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); 1351 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act); 1352 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1353 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1354 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1355 1356 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n", 1357 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3, 1358 (Int)have_NEON); 1359 1360 VG_(machine_arm_archlevel) = archlevel; 1361 1362 va = VexArchARM; 1363 1364 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel); 1365 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3; 1366 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2; 1367 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP; 1368 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1369 1370 VG_(machine_get_cache_info)(&vai); 1371 1372 return True; 1373 } 1374 1375#elif defined(VGA_mips32) 1376 { 1377 va = VexArchMIPS32; 1378 UInt model = VG_(get_machine_model)(); 1379 if (model== -1) 1380 return False; 1381 1382 vai.hwcaps = model; 1383 1384 VG_(machine_get_cache_info)(&vai); 1385 1386 return True; 1387 } 1388 1389#else 1390# error "Unknown arch" 1391#endif 1392} 1393 1394/* Notify host cpu cache line size. */ 1395#if defined(VGA_ppc32) 1396void VG_(machine_ppc32_set_clszB)( Int szB ) 1397{ 1398 vg_assert(hwcaps_done); 1399 1400 /* Either the value must not have been set yet (zero) or we can 1401 tolerate it being set to the same value multiple times, as the 1402 stack scanning logic in m_main is a bit stupid. */ 1403 vg_assert(vai.ppc_cache_line_szB == 0 1404 || vai.ppc_cache_line_szB == szB); 1405 1406 vg_assert(szB == 32 || szB == 64 || szB == 128); 1407 vai.ppc_cache_line_szB = szB; 1408} 1409#endif 1410 1411 1412/* Notify host cpu cache line size. */ 1413#if defined(VGA_ppc64) 1414void VG_(machine_ppc64_set_clszB)( Int szB ) 1415{ 1416 vg_assert(hwcaps_done); 1417 1418 /* Either the value must not have been set yet (zero) or we can 1419 tolerate it being set to the same value multiple times, as the 1420 stack scanning logic in m_main is a bit stupid. */ 1421 vg_assert(vai.ppc_cache_line_szB == 0 1422 || vai.ppc_cache_line_szB == szB); 1423 1424 vg_assert(szB == 32 || szB == 64 || szB == 128); 1425 vai.ppc_cache_line_szB = szB; 1426} 1427#endif 1428 1429 1430/* Notify host's ability to handle NEON instructions. */ 1431#if defined(VGA_arm) 1432void VG_(machine_arm_set_has_NEON)( Bool has_neon ) 1433{ 1434 vg_assert(hwcaps_done); 1435 /* There's nothing else we can sanity check. */ 1436 1437 if (has_neon) { 1438 vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1439 } else { 1440 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON; 1441 } 1442} 1443#endif 1444 1445 1446/* Fetch host cpu info, once established. */ 1447void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa, 1448 /*OUT*/VexArchInfo* pVai ) 1449{ 1450 vg_assert(hwcaps_done); 1451 if (pVa) *pVa = va; 1452 if (pVai) *pVai = vai; 1453} 1454 1455 1456/* Returns the size of the largest guest register that we will 1457 simulate in this run. This depends on both the guest architecture 1458 and on the specific capabilities we are simulating for that guest 1459 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16 1460 or 32. General rule: if in doubt, return a value larger than 1461 reality. 1462 1463 This information is needed by Cachegrind and Callgrind to decide 1464 what the minimum cache line size they are prepared to simulate is. 1465 Basically require that the minimum cache line size is at least as 1466 large as the largest register that might get transferred to/from 1467 memory, so as to guarantee that any such transaction can straddle 1468 at most 2 cache lines. 1469*/ 1470Int VG_(machine_get_size_of_largest_guest_register) ( void ) 1471{ 1472 vg_assert(hwcaps_done); 1473 /* Once hwcaps_done is True, we can fish around inside va/vai to 1474 find the information we need. */ 1475 1476# if defined(VGA_x86) 1477 vg_assert(va == VexArchX86); 1478 /* We don't support AVX, so 32 is out. At the other end, even if 1479 we don't support any SSE, the X87 can generate 10 byte 1480 transfers, so let's say 16 to be on the safe side. Hence the 1481 answer is always 16. */ 1482 return 16; 1483 1484# elif defined(VGA_amd64) 1485 /* if AVX then 32 else 16 */ 1486 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16; 1487 1488# elif defined(VGA_ppc32) 1489 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1490 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16; 1491 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16; 1492 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16; 1493 return 8; 1494 1495# elif defined(VGA_ppc64) 1496 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1497 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16; 1498 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16; 1499 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16; 1500 return 8; 1501 1502# elif defined(VGA_s390x) 1503 return 8; 1504 1505# elif defined(VGA_arm) 1506 /* Really it depends whether or not we have NEON, but let's just 1507 assume we always do. */ 1508 return 16; 1509 1510# elif defined(VGA_mips32) 1511 /* The guest state implies 4, but that can't really be true, can 1512 it? */ 1513 return 8; 1514 1515# else 1516# error "Unknown arch" 1517# endif 1518} 1519 1520 1521// Given a pointer to a function as obtained by "& functionname" in C, 1522// produce a pointer to the actual entry point for the function. 1523void* VG_(fnptr_to_fnentry)( void* f ) 1524{ 1525# if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \ 1526 || defined(VGP_arm_linux) \ 1527 || defined(VGP_ppc32_linux) || defined(VGO_darwin) \ 1528 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) 1529 return f; 1530# elif defined(VGP_ppc64_linux) 1531 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a 1532 3-word function descriptor, of which the first word is the entry 1533 address. */ 1534 UWord* descr = (UWord*)f; 1535 return (void*)(descr[0]); 1536# else 1537# error "Unknown platform" 1538# endif 1539} 1540 1541/*--------------------------------------------------------------------*/ 1542/*--- end ---*/ 1543/*--------------------------------------------------------------------*/ 1544