m_machine.c revision 732fb4e38991c4f0cef40c5c7e210010360d05eb
1/*--------------------------------------------------------------------*/ 2/*--- Machine-related stuff. m_machine.c ---*/ 3/*--------------------------------------------------------------------*/ 4 5/* 6 This file is part of Valgrind, a dynamic binary instrumentation 7 framework. 8 9 Copyright (C) 2000-2013 Julian Seward 10 jseward@acm.org 11 12 This program is free software; you can redistribute it and/or 13 modify it under the terms of the GNU General Public License as 14 published by the Free Software Foundation; either version 2 of the 15 License, or (at your option) any later version. 16 17 This program is distributed in the hope that it will be useful, but 18 WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 General Public License for more details. 21 22 You should have received a copy of the GNU General Public License 23 along with this program; if not, write to the Free Software 24 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 25 02111-1307, USA. 26 27 The GNU General Public License is contained in the file COPYING. 28*/ 29 30#include "pub_core_basics.h" 31#include "pub_core_vki.h" 32#include "pub_core_libcsetjmp.h" // setjmp facilities 33#include "pub_core_threadstate.h" 34#include "pub_core_libcassert.h" 35#include "pub_core_libcbase.h" 36#include "pub_core_libcfile.h" 37#include "pub_core_libcprint.h" 38#include "pub_core_mallocfree.h" 39#include "pub_core_machine.h" 40#include "pub_core_cpuid.h" 41#include "pub_core_libcsignal.h" // for ppc32 messing with SIGILL and SIGFPE 42#include "pub_core_debuglog.h" 43 44 45#define INSTR_PTR(regs) ((regs).vex.VG_INSTR_PTR) 46#define STACK_PTR(regs) ((regs).vex.VG_STACK_PTR) 47#define FRAME_PTR(regs) ((regs).vex.VG_FRAME_PTR) 48 49Addr VG_(get_IP) ( ThreadId tid ) { 50 return INSTR_PTR( VG_(threads)[tid].arch ); 51} 52Addr VG_(get_SP) ( ThreadId tid ) { 53 return STACK_PTR( VG_(threads)[tid].arch ); 54} 55Addr VG_(get_FP) ( ThreadId tid ) { 56 return FRAME_PTR( VG_(threads)[tid].arch ); 57} 58 59void VG_(set_IP) ( ThreadId tid, Addr ip ) { 60 INSTR_PTR( VG_(threads)[tid].arch ) = ip; 61} 62void VG_(set_SP) ( ThreadId tid, Addr sp ) { 63 STACK_PTR( VG_(threads)[tid].arch ) = sp; 64} 65 66void VG_(get_UnwindStartRegs) ( /*OUT*/UnwindStartRegs* regs, 67 ThreadId tid ) 68{ 69# if defined(VGA_x86) 70 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_EIP; 71 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_ESP; 72 regs->misc.X86.r_ebp 73 = VG_(threads)[tid].arch.vex.guest_EBP; 74# elif defined(VGA_amd64) 75 regs->r_pc = VG_(threads)[tid].arch.vex.guest_RIP; 76 regs->r_sp = VG_(threads)[tid].arch.vex.guest_RSP; 77 regs->misc.AMD64.r_rbp 78 = VG_(threads)[tid].arch.vex.guest_RBP; 79# elif defined(VGA_ppc32) 80 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_CIA; 81 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_GPR1; 82 regs->misc.PPC32.r_lr 83 = VG_(threads)[tid].arch.vex.guest_LR; 84# elif defined(VGA_ppc64be) || defined(VGA_ppc64le) 85 regs->r_pc = VG_(threads)[tid].arch.vex.guest_CIA; 86 regs->r_sp = VG_(threads)[tid].arch.vex.guest_GPR1; 87 regs->misc.PPC64.r_lr 88 = VG_(threads)[tid].arch.vex.guest_LR; 89# elif defined(VGA_arm) 90 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_R15T; 91 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_R13; 92 regs->misc.ARM.r14 93 = VG_(threads)[tid].arch.vex.guest_R14; 94 regs->misc.ARM.r12 95 = VG_(threads)[tid].arch.vex.guest_R12; 96 regs->misc.ARM.r11 97 = VG_(threads)[tid].arch.vex.guest_R11; 98 regs->misc.ARM.r7 99 = VG_(threads)[tid].arch.vex.guest_R7; 100# elif defined(VGA_arm64) 101 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 102 regs->r_sp = VG_(threads)[tid].arch.vex.guest_XSP; 103 regs->misc.ARM64.x29 = VG_(threads)[tid].arch.vex.guest_X29; 104 regs->misc.ARM64.x30 = VG_(threads)[tid].arch.vex.guest_X30; 105# elif defined(VGA_s390x) 106 regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA; 107 regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP; 108 regs->misc.S390X.r_fp 109 = VG_(threads)[tid].arch.vex.guest_r11; 110 regs->misc.S390X.r_lr 111 = VG_(threads)[tid].arch.vex.guest_r14; 112# elif defined(VGA_mips32) 113 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 114 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29; 115 regs->misc.MIPS32.r30 116 = VG_(threads)[tid].arch.vex.guest_r30; 117 regs->misc.MIPS32.r31 118 = VG_(threads)[tid].arch.vex.guest_r31; 119 regs->misc.MIPS32.r28 120 = VG_(threads)[tid].arch.vex.guest_r28; 121# elif defined(VGA_mips64) 122 regs->r_pc = VG_(threads)[tid].arch.vex.guest_PC; 123 regs->r_sp = VG_(threads)[tid].arch.vex.guest_r29; 124 regs->misc.MIPS64.r30 125 = VG_(threads)[tid].arch.vex.guest_r30; 126 regs->misc.MIPS64.r31 127 = VG_(threads)[tid].arch.vex.guest_r31; 128 regs->misc.MIPS64.r28 129 = VG_(threads)[tid].arch.vex.guest_r28; 130# else 131# error "Unknown arch" 132# endif 133} 134 135void 136VG_(get_shadow_regs_area) ( ThreadId tid, 137 /*DST*/UChar* dst, 138 /*SRC*/Int shadowNo, PtrdiffT offset, SizeT size ) 139{ 140 void* src; 141 ThreadState* tst; 142 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 143 vg_assert(VG_(is_valid_tid)(tid)); 144 // Bounds check 145 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 146 vg_assert(offset + size <= sizeof(VexGuestArchState)); 147 // Copy 148 tst = & VG_(threads)[tid]; 149 src = NULL; 150 switch (shadowNo) { 151 case 0: src = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 152 case 1: src = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 153 case 2: src = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 154 } 155 vg_assert(src != NULL); 156 VG_(memcpy)( dst, src, size); 157} 158 159void 160VG_(set_shadow_regs_area) ( ThreadId tid, 161 /*DST*/Int shadowNo, PtrdiffT offset, SizeT size, 162 /*SRC*/const UChar* src ) 163{ 164 void* dst; 165 ThreadState* tst; 166 vg_assert(shadowNo == 0 || shadowNo == 1 || shadowNo == 2); 167 vg_assert(VG_(is_valid_tid)(tid)); 168 // Bounds check 169 vg_assert(0 <= offset && offset < sizeof(VexGuestArchState)); 170 vg_assert(offset + size <= sizeof(VexGuestArchState)); 171 // Copy 172 tst = & VG_(threads)[tid]; 173 dst = NULL; 174 switch (shadowNo) { 175 case 0: dst = (void*)(((Addr)&(tst->arch.vex)) + offset); break; 176 case 1: dst = (void*)(((Addr)&(tst->arch.vex_shadow1)) + offset); break; 177 case 2: dst = (void*)(((Addr)&(tst->arch.vex_shadow2)) + offset); break; 178 } 179 vg_assert(dst != NULL); 180 VG_(memcpy)( dst, src, size); 181} 182 183 184static void apply_to_GPs_of_tid(ThreadId tid, void (*f)(ThreadId, 185 const HChar*, Addr)) 186{ 187 VexGuestArchState* vex = &(VG_(get_ThreadState)(tid)->arch.vex); 188 VG_(debugLog)(2, "machine", "apply_to_GPs_of_tid %d\n", tid); 189#if defined(VGA_x86) 190 (*f)(tid, "EAX", vex->guest_EAX); 191 (*f)(tid, "ECX", vex->guest_ECX); 192 (*f)(tid, "EDX", vex->guest_EDX); 193 (*f)(tid, "EBX", vex->guest_EBX); 194 (*f)(tid, "ESI", vex->guest_ESI); 195 (*f)(tid, "EDI", vex->guest_EDI); 196 (*f)(tid, "ESP", vex->guest_ESP); 197 (*f)(tid, "EBP", vex->guest_EBP); 198#elif defined(VGA_amd64) 199 (*f)(tid, "RAX", vex->guest_RAX); 200 (*f)(tid, "RCX", vex->guest_RCX); 201 (*f)(tid, "RDX", vex->guest_RDX); 202 (*f)(tid, "RBX", vex->guest_RBX); 203 (*f)(tid, "RSI", vex->guest_RSI); 204 (*f)(tid, "RDI", vex->guest_RDI); 205 (*f)(tid, "RSP", vex->guest_RSP); 206 (*f)(tid, "RBP", vex->guest_RBP); 207 (*f)(tid, "R8" , vex->guest_R8 ); 208 (*f)(tid, "R9" , vex->guest_R9 ); 209 (*f)(tid, "R10", vex->guest_R10); 210 (*f)(tid, "R11", vex->guest_R11); 211 (*f)(tid, "R12", vex->guest_R12); 212 (*f)(tid, "R13", vex->guest_R13); 213 (*f)(tid, "R14", vex->guest_R14); 214 (*f)(tid, "R15", vex->guest_R15); 215#elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) 216 (*f)(tid, "GPR0" , vex->guest_GPR0 ); 217 (*f)(tid, "GPR1" , vex->guest_GPR1 ); 218 (*f)(tid, "GPR2" , vex->guest_GPR2 ); 219 (*f)(tid, "GPR3" , vex->guest_GPR3 ); 220 (*f)(tid, "GPR4" , vex->guest_GPR4 ); 221 (*f)(tid, "GPR5" , vex->guest_GPR5 ); 222 (*f)(tid, "GPR6" , vex->guest_GPR6 ); 223 (*f)(tid, "GPR7" , vex->guest_GPR7 ); 224 (*f)(tid, "GPR8" , vex->guest_GPR8 ); 225 (*f)(tid, "GPR9" , vex->guest_GPR9 ); 226 (*f)(tid, "GPR10", vex->guest_GPR10); 227 (*f)(tid, "GPR11", vex->guest_GPR11); 228 (*f)(tid, "GPR12", vex->guest_GPR12); 229 (*f)(tid, "GPR13", vex->guest_GPR13); 230 (*f)(tid, "GPR14", vex->guest_GPR14); 231 (*f)(tid, "GPR15", vex->guest_GPR15); 232 (*f)(tid, "GPR16", vex->guest_GPR16); 233 (*f)(tid, "GPR17", vex->guest_GPR17); 234 (*f)(tid, "GPR18", vex->guest_GPR18); 235 (*f)(tid, "GPR19", vex->guest_GPR19); 236 (*f)(tid, "GPR20", vex->guest_GPR20); 237 (*f)(tid, "GPR21", vex->guest_GPR21); 238 (*f)(tid, "GPR22", vex->guest_GPR22); 239 (*f)(tid, "GPR23", vex->guest_GPR23); 240 (*f)(tid, "GPR24", vex->guest_GPR24); 241 (*f)(tid, "GPR25", vex->guest_GPR25); 242 (*f)(tid, "GPR26", vex->guest_GPR26); 243 (*f)(tid, "GPR27", vex->guest_GPR27); 244 (*f)(tid, "GPR28", vex->guest_GPR28); 245 (*f)(tid, "GPR29", vex->guest_GPR29); 246 (*f)(tid, "GPR30", vex->guest_GPR30); 247 (*f)(tid, "GPR31", vex->guest_GPR31); 248 (*f)(tid, "CTR" , vex->guest_CTR ); 249 (*f)(tid, "LR" , vex->guest_LR ); 250#elif defined(VGA_arm) 251 (*f)(tid, "R0" , vex->guest_R0 ); 252 (*f)(tid, "R1" , vex->guest_R1 ); 253 (*f)(tid, "R2" , vex->guest_R2 ); 254 (*f)(tid, "R3" , vex->guest_R3 ); 255 (*f)(tid, "R4" , vex->guest_R4 ); 256 (*f)(tid, "R5" , vex->guest_R5 ); 257 (*f)(tid, "R6" , vex->guest_R6 ); 258 (*f)(tid, "R8" , vex->guest_R8 ); 259 (*f)(tid, "R9" , vex->guest_R9 ); 260 (*f)(tid, "R10", vex->guest_R10); 261 (*f)(tid, "R11", vex->guest_R11); 262 (*f)(tid, "R12", vex->guest_R12); 263 (*f)(tid, "R13", vex->guest_R13); 264 (*f)(tid, "R14", vex->guest_R14); 265#elif defined(VGA_s390x) 266 (*f)(tid, "r0" , vex->guest_r0 ); 267 (*f)(tid, "r1" , vex->guest_r1 ); 268 (*f)(tid, "r2" , vex->guest_r2 ); 269 (*f)(tid, "r3" , vex->guest_r3 ); 270 (*f)(tid, "r4" , vex->guest_r4 ); 271 (*f)(tid, "r5" , vex->guest_r5 ); 272 (*f)(tid, "r6" , vex->guest_r6 ); 273 (*f)(tid, "r7" , vex->guest_r7 ); 274 (*f)(tid, "r8" , vex->guest_r8 ); 275 (*f)(tid, "r9" , vex->guest_r9 ); 276 (*f)(tid, "r10", vex->guest_r10); 277 (*f)(tid, "r11", vex->guest_r11); 278 (*f)(tid, "r12", vex->guest_r12); 279 (*f)(tid, "r13", vex->guest_r13); 280 (*f)(tid, "r14", vex->guest_r14); 281 (*f)(tid, "r15", vex->guest_r15); 282#elif defined(VGA_mips32) || defined(VGA_mips64) 283 (*f)(tid, "r0" , vex->guest_r0 ); 284 (*f)(tid, "r1" , vex->guest_r1 ); 285 (*f)(tid, "r2" , vex->guest_r2 ); 286 (*f)(tid, "r3" , vex->guest_r3 ); 287 (*f)(tid, "r4" , vex->guest_r4 ); 288 (*f)(tid, "r5" , vex->guest_r5 ); 289 (*f)(tid, "r6" , vex->guest_r6 ); 290 (*f)(tid, "r7" , vex->guest_r7 ); 291 (*f)(tid, "r8" , vex->guest_r8 ); 292 (*f)(tid, "r9" , vex->guest_r9 ); 293 (*f)(tid, "r10", vex->guest_r10); 294 (*f)(tid, "r11", vex->guest_r11); 295 (*f)(tid, "r12", vex->guest_r12); 296 (*f)(tid, "r13", vex->guest_r13); 297 (*f)(tid, "r14", vex->guest_r14); 298 (*f)(tid, "r15", vex->guest_r15); 299 (*f)(tid, "r16", vex->guest_r16); 300 (*f)(tid, "r17", vex->guest_r17); 301 (*f)(tid, "r18", vex->guest_r18); 302 (*f)(tid, "r19", vex->guest_r19); 303 (*f)(tid, "r20", vex->guest_r20); 304 (*f)(tid, "r21", vex->guest_r21); 305 (*f)(tid, "r22", vex->guest_r22); 306 (*f)(tid, "r23", vex->guest_r23); 307 (*f)(tid, "r24", vex->guest_r24); 308 (*f)(tid, "r25", vex->guest_r25); 309 (*f)(tid, "r26", vex->guest_r26); 310 (*f)(tid, "r27", vex->guest_r27); 311 (*f)(tid, "r28", vex->guest_r28); 312 (*f)(tid, "r29", vex->guest_r29); 313 (*f)(tid, "r30", vex->guest_r30); 314 (*f)(tid, "r31", vex->guest_r31); 315#elif defined(VGA_arm64) 316 (*f)(tid, "x0" , vex->guest_X0 ); 317 (*f)(tid, "x1" , vex->guest_X1 ); 318 (*f)(tid, "x2" , vex->guest_X2 ); 319 (*f)(tid, "x3" , vex->guest_X3 ); 320 (*f)(tid, "x4" , vex->guest_X4 ); 321 (*f)(tid, "x5" , vex->guest_X5 ); 322 (*f)(tid, "x6" , vex->guest_X6 ); 323 (*f)(tid, "x7" , vex->guest_X7 ); 324 (*f)(tid, "x8" , vex->guest_X8 ); 325 (*f)(tid, "x9" , vex->guest_X9 ); 326 (*f)(tid, "x10", vex->guest_X10); 327 (*f)(tid, "x11", vex->guest_X11); 328 (*f)(tid, "x12", vex->guest_X12); 329 (*f)(tid, "x13", vex->guest_X13); 330 (*f)(tid, "x14", vex->guest_X14); 331 (*f)(tid, "x15", vex->guest_X15); 332 (*f)(tid, "x16", vex->guest_X16); 333 (*f)(tid, "x17", vex->guest_X17); 334 (*f)(tid, "x18", vex->guest_X18); 335 (*f)(tid, "x19", vex->guest_X19); 336 (*f)(tid, "x20", vex->guest_X20); 337 (*f)(tid, "x21", vex->guest_X21); 338 (*f)(tid, "x22", vex->guest_X22); 339 (*f)(tid, "x23", vex->guest_X23); 340 (*f)(tid, "x24", vex->guest_X24); 341 (*f)(tid, "x25", vex->guest_X25); 342 (*f)(tid, "x26", vex->guest_X26); 343 (*f)(tid, "x27", vex->guest_X27); 344 (*f)(tid, "x28", vex->guest_X28); 345 (*f)(tid, "x29", vex->guest_X29); 346 (*f)(tid, "x30", vex->guest_X30); 347#else 348# error Unknown arch 349#endif 350} 351 352 353void VG_(apply_to_GP_regs)(void (*f)(ThreadId, const HChar*, UWord)) 354{ 355 ThreadId tid; 356 357 for (tid = 1; tid < VG_N_THREADS; tid++) { 358 if (VG_(is_valid_tid)(tid) 359 || VG_(threads)[tid].exitreason == VgSrc_ExitProcess) { 360 // live thread or thread instructed to die by another thread that 361 // called exit. 362 apply_to_GPs_of_tid(tid, f); 363 } 364 } 365} 366 367void VG_(thread_stack_reset_iter)(/*OUT*/ThreadId* tid) 368{ 369 *tid = (ThreadId)(-1); 370} 371 372Bool VG_(thread_stack_next)(/*MOD*/ThreadId* tid, 373 /*OUT*/Addr* stack_min, 374 /*OUT*/Addr* stack_max) 375{ 376 ThreadId i; 377 for (i = (*tid)+1; i < VG_N_THREADS; i++) { 378 if (i == VG_INVALID_THREADID) 379 continue; 380 if (VG_(threads)[i].status != VgTs_Empty) { 381 *tid = i; 382 *stack_min = VG_(get_SP)(i); 383 *stack_max = VG_(threads)[i].client_stack_highest_byte; 384 return True; 385 } 386 } 387 return False; 388} 389 390Addr VG_(thread_get_stack_max)(ThreadId tid) 391{ 392 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 393 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 394 return VG_(threads)[tid].client_stack_highest_byte; 395} 396 397SizeT VG_(thread_get_stack_size)(ThreadId tid) 398{ 399 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 400 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 401 return VG_(threads)[tid].client_stack_szB; 402} 403 404Addr VG_(thread_get_altstack_min)(ThreadId tid) 405{ 406 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 407 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 408 return (Addr)VG_(threads)[tid].altstack.ss_sp; 409} 410 411SizeT VG_(thread_get_altstack_size)(ThreadId tid) 412{ 413 vg_assert(0 <= tid && tid < VG_N_THREADS && tid != VG_INVALID_THREADID); 414 vg_assert(VG_(threads)[tid].status != VgTs_Empty); 415 return VG_(threads)[tid].altstack.ss_size; 416} 417 418//------------------------------------------------------------- 419/* Details about the capabilities of the underlying (host) CPU. These 420 details are acquired by (1) enquiring with the CPU at startup, or 421 (2) from the AT_SYSINFO entries the kernel gave us (ppc32 cache 422 line size). It's a bit nasty in the sense that there's no obvious 423 way to stop uses of some of this info before it's ready to go. 424 See pub_core_machine.h for more information about that. 425 426 VG_(machine_get_hwcaps) may use signals (although it attempts to 427 leave signal state unchanged) and therefore should only be 428 called before m_main sets up the client's signal state. 429*/ 430 431/* --------- State --------- */ 432static Bool hwcaps_done = False; 433 434/* --- all archs --- */ 435static VexArch va = VexArch_INVALID; 436static VexArchInfo vai; 437 438#if defined(VGA_x86) 439UInt VG_(machine_x86_have_mxcsr) = 0; 440#endif 441#if defined(VGA_ppc32) 442UInt VG_(machine_ppc32_has_FP) = 0; 443UInt VG_(machine_ppc32_has_VMX) = 0; 444#endif 445#if defined(VGA_ppc64be) || defined(VGA_ppc64le) 446ULong VG_(machine_ppc64_has_VMX) = 0; 447#endif 448#if defined(VGA_arm) 449Int VG_(machine_arm_archlevel) = 4; 450#endif 451 452 453/* For hwcaps detection on ppc32/64, s390x, and arm we'll need to do SIGILL 454 testing, so we need a VG_MINIMAL_JMP_BUF. */ 455#if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ 456 || defined(VGA_arm) || defined(VGA_s390x) || defined(VGA_mips32) 457#include "pub_core_libcsetjmp.h" 458static VG_MINIMAL_JMP_BUF(env_unsup_insn); 459static void handler_unsup_insn ( Int x ) { 460 VG_MINIMAL_LONGJMP(env_unsup_insn); 461} 462#endif 463 464 465/* Helper function for VG_(machine_get_hwcaps), assumes the SIGILL/etc 466 * handlers are installed. Determines the the sizes affected by dcbz 467 * and dcbzl instructions and updates the given VexArchInfo structure 468 * accordingly. 469 * 470 * Not very defensive: assumes that as long as the dcbz/dcbzl 471 * instructions don't raise a SIGILL, that they will zero an aligned, 472 * contiguous block of memory of a sensible size. */ 473#if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) 474static void find_ppc_dcbz_sz(VexArchInfo *arch_info) 475{ 476 Int dcbz_szB = 0; 477 Int dcbzl_szB; 478# define MAX_DCBZL_SZB (128) /* largest known effect of dcbzl */ 479 char test_block[4*MAX_DCBZL_SZB]; 480 char *aligned = test_block; 481 Int i; 482 483 /* round up to next max block size, assumes MAX_DCBZL_SZB is pof2 */ 484 aligned = (char *)(((HWord)aligned + MAX_DCBZL_SZB) & ~(MAX_DCBZL_SZB - 1)); 485 vg_assert((aligned + MAX_DCBZL_SZB) <= &test_block[sizeof(test_block)]); 486 487 /* dcbz often clears 32B, although sometimes whatever the native cache 488 * block size is */ 489 VG_(memset)(test_block, 0xff, sizeof(test_block)); 490 __asm__ __volatile__("dcbz 0,%0" 491 : /*out*/ 492 : "r" (aligned) /*in*/ 493 : "memory" /*clobber*/); 494 for (dcbz_szB = 0, i = 0; i < sizeof(test_block); ++i) { 495 if (!test_block[i]) 496 ++dcbz_szB; 497 } 498 vg_assert(dcbz_szB == 16 || dcbz_szB == 32 || dcbz_szB == 64 || dcbz_szB == 128); 499 500 /* dcbzl clears 128B on G5/PPC970, and usually 32B on other platforms */ 501 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 502 dcbzl_szB = 0; /* indicates unsupported */ 503 } 504 else { 505 VG_(memset)(test_block, 0xff, sizeof(test_block)); 506 /* some older assemblers won't understand the dcbzl instruction 507 * variant, so we directly emit the instruction ourselves */ 508 __asm__ __volatile__("mr 9, %0 ; .long 0x7C204FEC" /*dcbzl 0,9*/ 509 : /*out*/ 510 : "r" (aligned) /*in*/ 511 : "memory", "r9" /*clobber*/); 512 for (dcbzl_szB = 0, i = 0; i < sizeof(test_block); ++i) { 513 if (!test_block[i]) 514 ++dcbzl_szB; 515 } 516 vg_assert(dcbzl_szB == 16 || dcbzl_szB == 32 || dcbzl_szB == 64 || dcbzl_szB == 128); 517 } 518 519 arch_info->ppc_dcbz_szB = dcbz_szB; 520 arch_info->ppc_dcbzl_szB = dcbzl_szB; 521 522 VG_(debugLog)(1, "machine", "dcbz_szB=%d dcbzl_szB=%d\n", 523 dcbz_szB, dcbzl_szB); 524# undef MAX_DCBZL_SZB 525} 526#endif /* defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) */ 527 528#ifdef VGA_s390x 529 530/* Read /proc/cpuinfo. Look for lines like these 531 532 processor 0: version = FF, identification = 0117C9, machine = 2064 533 534 and return the machine model. If the machine model could not be determined 535 or it is an unknown model, return VEX_S390X_MODEL_UNKNOWN. */ 536 537static UInt VG_(get_machine_model)(void) 538{ 539 static struct model_map { 540 const HChar name[5]; 541 UInt id; 542 } model_map[] = { 543 { "2064", VEX_S390X_MODEL_Z900 }, 544 { "2066", VEX_S390X_MODEL_Z800 }, 545 { "2084", VEX_S390X_MODEL_Z990 }, 546 { "2086", VEX_S390X_MODEL_Z890 }, 547 { "2094", VEX_S390X_MODEL_Z9_EC }, 548 { "2096", VEX_S390X_MODEL_Z9_BC }, 549 { "2097", VEX_S390X_MODEL_Z10_EC }, 550 { "2098", VEX_S390X_MODEL_Z10_BC }, 551 { "2817", VEX_S390X_MODEL_Z196 }, 552 { "2818", VEX_S390X_MODEL_Z114 }, 553 { "2827", VEX_S390X_MODEL_ZEC12 }, 554 { "2828", VEX_S390X_MODEL_ZBC12 }, 555 }; 556 557 Int model, n, fh; 558 SysRes fd; 559 SizeT num_bytes, file_buf_size; 560 HChar *p, *m, *model_name, *file_buf; 561 562 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 563 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 564 if ( sr_isError(fd) ) return VEX_S390X_MODEL_UNKNOWN; 565 566 fh = sr_Res(fd); 567 568 /* Determine the size of /proc/cpuinfo. 569 Work around broken-ness in /proc file system implementation. 570 fstat returns a zero size for /proc/cpuinfo although it is 571 claimed to be a regular file. */ 572 num_bytes = 0; 573 file_buf_size = 1000; 574 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 575 while (42) { 576 n = VG_(read)(fh, file_buf, file_buf_size); 577 if (n < 0) break; 578 579 num_bytes += n; 580 if (n < file_buf_size) break; /* reached EOF */ 581 } 582 583 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 584 585 if (num_bytes > file_buf_size) { 586 VG_(free)( file_buf ); 587 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 588 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 589 n = VG_(read)( fh, file_buf, num_bytes ); 590 if (n < 0) num_bytes = 0; 591 } 592 593 file_buf[num_bytes] = '\0'; 594 VG_(close)(fh); 595 596 /* Parse file */ 597 model = VEX_S390X_MODEL_UNKNOWN; 598 for (p = file_buf; *p; ++p) { 599 /* Beginning of line */ 600 if (VG_(strncmp)( p, "processor", sizeof "processor" - 1 ) != 0) continue; 601 602 m = VG_(strstr)( p, "machine" ); 603 if (m == NULL) continue; 604 605 p = m + sizeof "machine" - 1; 606 while ( VG_(isspace)( *p ) || *p == '=') { 607 if (*p == '\n') goto next_line; 608 ++p; 609 } 610 611 model_name = p; 612 for (n = 0; n < sizeof model_map / sizeof model_map[0]; ++n) { 613 struct model_map *mm = model_map + n; 614 SizeT len = VG_(strlen)( mm->name ); 615 if ( VG_(strncmp)( mm->name, model_name, len ) == 0 && 616 VG_(isspace)( model_name[len] )) { 617 if (mm->id < model) model = mm->id; 618 p = model_name + len; 619 break; 620 } 621 } 622 /* Skip until end-of-line */ 623 while (*p != '\n') 624 ++p; 625 next_line: ; 626 } 627 628 VG_(free)( file_buf ); 629 VG_(debugLog)(1, "machine", "model = %s\n", 630 model == VEX_S390X_MODEL_UNKNOWN ? "UNKNOWN" 631 : model_map[model].name); 632 return model; 633} 634 635#endif /* VGA_s390x */ 636 637#if defined(VGA_mips32) || defined(VGA_mips64) 638 639/* Read /proc/cpuinfo and return the machine model. */ 640static UInt VG_(get_machine_model)(void) 641{ 642 const char *search_MIPS_str = "MIPS"; 643 const char *search_Broadcom_str = "Broadcom"; 644 const char *search_Netlogic_str = "Netlogic"; 645 const char *search_Cavium_str= "Cavium"; 646 Int n, fh; 647 SysRes fd; 648 SizeT num_bytes, file_buf_size; 649 HChar *file_buf; 650 651 /* Slurp contents of /proc/cpuinfo into FILE_BUF */ 652 fd = VG_(open)( "/proc/cpuinfo", 0, VKI_S_IRUSR ); 653 if ( sr_isError(fd) ) return -1; 654 655 fh = sr_Res(fd); 656 657 /* Determine the size of /proc/cpuinfo. 658 Work around broken-ness in /proc file system implementation. 659 fstat returns a zero size for /proc/cpuinfo although it is 660 claimed to be a regular file. */ 661 num_bytes = 0; 662 file_buf_size = 1000; 663 file_buf = VG_(malloc)("cpuinfo", file_buf_size + 1); 664 while (42) { 665 n = VG_(read)(fh, file_buf, file_buf_size); 666 if (n < 0) break; 667 668 num_bytes += n; 669 if (n < file_buf_size) break; /* reached EOF */ 670 } 671 672 if (n < 0) num_bytes = 0; /* read error; ignore contents */ 673 674 if (num_bytes > file_buf_size) { 675 VG_(free)( file_buf ); 676 VG_(lseek)( fh, 0, VKI_SEEK_SET ); 677 file_buf = VG_(malloc)( "cpuinfo", num_bytes + 1 ); 678 n = VG_(read)( fh, file_buf, num_bytes ); 679 if (n < 0) num_bytes = 0; 680 } 681 682 file_buf[num_bytes] = '\0'; 683 VG_(close)(fh); 684 685 /* Parse file */ 686 if (VG_(strstr) (file_buf, search_Broadcom_str) != NULL) 687 return VEX_PRID_COMP_BROADCOM; 688 if (VG_(strstr) (file_buf, search_Netlogic_str) != NULL) 689 return VEX_PRID_COMP_NETLOGIC; 690 if (VG_(strstr)(file_buf, search_Cavium_str) != NULL) 691 return VEX_PRID_COMP_CAVIUM; 692 if (VG_(strstr) (file_buf, search_MIPS_str) != NULL) 693 return VEX_PRID_COMP_MIPS; 694 695 /* Did not find string in the proc file. */ 696 return -1; 697} 698 699#endif 700 701/* Determine what insn set and insn set variant the host has, and 702 record it. To be called once at system startup. Returns False if 703 this a CPU incapable of running Valgrind. 704 Also determine information about the caches on this host. */ 705 706Bool VG_(machine_get_hwcaps)( void ) 707{ 708 vg_assert(hwcaps_done == False); 709 hwcaps_done = True; 710 711 // Whack default settings into vai, so that we only need to fill in 712 // any interesting bits. 713 LibVEX_default_VexArchInfo(&vai); 714 715#if defined(VGA_x86) 716 { Bool have_sse1, have_sse2, have_sse3, have_cx8, have_lzcnt, have_mmxext; 717 UInt eax, ebx, ecx, edx, max_extended; 718 HChar vstr[13]; 719 vstr[0] = 0; 720 721 if (!VG_(has_cpuid)()) 722 /* we can't do cpuid at all. Give up. */ 723 return False; 724 725 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 726 if (eax < 1) 727 /* we can't ask for cpuid(x) for x > 0. Give up. */ 728 return False; 729 730 /* Get processor ID string, and max basic/extended index 731 values. */ 732 VG_(memcpy)(&vstr[0], &ebx, 4); 733 VG_(memcpy)(&vstr[4], &edx, 4); 734 VG_(memcpy)(&vstr[8], &ecx, 4); 735 vstr[12] = 0; 736 737 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 738 max_extended = eax; 739 740 /* get capabilities bits into edx */ 741 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 742 743 have_sse1 = (edx & (1<<25)) != 0; /* True => have sse insns */ 744 have_sse2 = (edx & (1<<26)) != 0; /* True => have sse2 insns */ 745 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ 746 747 /* cmpxchg8b is a minimum requirement now; if we don't have it we 748 must simply give up. But all CPUs since Pentium-I have it, so 749 that doesn't seem like much of a restriction. */ 750 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 751 if (!have_cx8) 752 return False; 753 754 /* Figure out if this is an AMD that can do MMXEXT. */ 755 have_mmxext = False; 756 if (0 == VG_(strcmp)(vstr, "AuthenticAMD") 757 && max_extended >= 0x80000001) { 758 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 759 /* Some older AMD processors support a sse1 subset (Integer SSE). */ 760 have_mmxext = !have_sse1 && ((edx & (1<<22)) != 0); 761 } 762 763 /* Figure out if this is an AMD or Intel that can do LZCNT. */ 764 have_lzcnt = False; 765 if ((0 == VG_(strcmp)(vstr, "AuthenticAMD") 766 || 0 == VG_(strcmp)(vstr, "GenuineIntel")) 767 && max_extended >= 0x80000001) { 768 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 769 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 770 } 771 772 /* Intel processors don't define the mmxext extension, but since it 773 is just a sse1 subset always define it when we have sse1. */ 774 if (have_sse1) 775 have_mmxext = True; 776 777 va = VexArchX86; 778 vai.endness = VexEndnessLE; 779 780 if (have_sse3 && have_sse2 && have_sse1 && have_mmxext) { 781 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; 782 vai.hwcaps |= VEX_HWCAPS_X86_SSE1; 783 vai.hwcaps |= VEX_HWCAPS_X86_SSE2; 784 vai.hwcaps |= VEX_HWCAPS_X86_SSE3; 785 if (have_lzcnt) 786 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT; 787 VG_(machine_x86_have_mxcsr) = 1; 788 } else if (have_sse2 && have_sse1 && have_mmxext) { 789 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; 790 vai.hwcaps |= VEX_HWCAPS_X86_SSE1; 791 vai.hwcaps |= VEX_HWCAPS_X86_SSE2; 792 if (have_lzcnt) 793 vai.hwcaps |= VEX_HWCAPS_X86_LZCNT; 794 VG_(machine_x86_have_mxcsr) = 1; 795 } else if (have_sse1 && have_mmxext) { 796 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; 797 vai.hwcaps |= VEX_HWCAPS_X86_SSE1; 798 VG_(machine_x86_have_mxcsr) = 1; 799 } else if (have_mmxext) { 800 vai.hwcaps = VEX_HWCAPS_X86_MMXEXT; /*integer only sse1 subset*/ 801 VG_(machine_x86_have_mxcsr) = 0; 802 } else { 803 vai.hwcaps = 0; /*baseline - no sse at all*/ 804 VG_(machine_x86_have_mxcsr) = 0; 805 } 806 807 VG_(machine_get_cache_info)(&vai); 808 809 return True; 810 } 811 812#elif defined(VGA_amd64) 813 { Bool have_sse3, have_cx8, have_cx16; 814 Bool have_lzcnt, have_avx, have_bmi, have_avx2; 815 Bool have_rdtscp; 816 UInt eax, ebx, ecx, edx, max_basic, max_extended; 817 HChar vstr[13]; 818 vstr[0] = 0; 819 820 if (!VG_(has_cpuid)()) 821 /* we can't do cpuid at all. Give up. */ 822 return False; 823 824 VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx); 825 max_basic = eax; 826 if (max_basic < 1) 827 /* we can't ask for cpuid(x) for x > 0. Give up. */ 828 return False; 829 830 /* Get processor ID string, and max basic/extended index 831 values. */ 832 VG_(memcpy)(&vstr[0], &ebx, 4); 833 VG_(memcpy)(&vstr[4], &edx, 4); 834 VG_(memcpy)(&vstr[8], &ecx, 4); 835 vstr[12] = 0; 836 837 VG_(cpuid)(0x80000000, 0, &eax, &ebx, &ecx, &edx); 838 max_extended = eax; 839 840 /* get capabilities bits into edx */ 841 VG_(cpuid)(1, 0, &eax, &ebx, &ecx, &edx); 842 843 // we assume that SSE1 and SSE2 are available by default 844 have_sse3 = (ecx & (1<<0)) != 0; /* True => have sse3 insns */ 845 // ssse3 is ecx:9 846 // sse41 is ecx:19 847 // sse42 is ecx:20 848 849 // osxsave is ecx:27 850 // avx is ecx:28 851 // fma is ecx:12 852 have_avx = False; 853 /* have_fma = False; */ 854 if ( (ecx & ((1<<27)|(1<<28))) == ((1<<27)|(1<<28)) ) { 855 /* processor supports AVX instructions and XGETBV is enabled 856 by OS */ 857 ULong w; 858 __asm__ __volatile__("movq $0,%%rcx ; " 859 ".byte 0x0F,0x01,0xD0 ; " /* xgetbv */ 860 "movq %%rax,%0" 861 :/*OUT*/"=r"(w) :/*IN*/ 862 :/*TRASH*/"rdx","rcx"); 863 if ((w & 6) == 6) { 864 /* OS has enabled both XMM and YMM state support */ 865 have_avx = True; 866 /* have_fma = (ecx & (1<<12)) != 0; */ 867 /* have_fma: Probably correct, but gcc complains due to 868 unusedness. &*/ 869 } 870 } 871 872 /* cmpxchg8b is a minimum requirement now; if we don't have it we 873 must simply give up. But all CPUs since Pentium-I have it, so 874 that doesn't seem like much of a restriction. */ 875 have_cx8 = (edx & (1<<8)) != 0; /* True => have cmpxchg8b */ 876 if (!have_cx8) 877 return False; 878 879 /* on amd64 we tolerate older cpus, which don't have cmpxchg16b */ 880 have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */ 881 882 /* Figure out if this CPU can do LZCNT. */ 883 have_lzcnt = False; 884 if (max_extended >= 0x80000001) { 885 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 886 have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */ 887 } 888 889 /* Can we do RDTSCP? */ 890 have_rdtscp = False; 891 if (max_extended >= 0x80000001) { 892 VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx); 893 have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */ 894 } 895 896 /* Check for BMI1 and AVX2. If we have AVX1 (plus OS support). */ 897 have_bmi = False; 898 have_avx2 = False; 899 if (have_avx && max_basic >= 7) { 900 VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx); 901 have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */ 902 have_avx2 = (ebx & (1<<5)) != 0; /* True => have AVX2 */ 903 } 904 905 va = VexArchAMD64; 906 vai.endness = VexEndnessLE; 907 vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0) 908 | (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0) 909 | (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0) 910 | (have_avx ? VEX_HWCAPS_AMD64_AVX : 0) 911 | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0) 912 | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0) 913 | (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0); 914 915 VG_(machine_get_cache_info)(&vai); 916 917 return True; 918 } 919 920#elif defined(VGA_ppc32) 921 { 922 /* Find out which subset of the ppc32 instruction set is supported by 923 verifying whether various ppc32 instructions generate a SIGILL 924 or a SIGFPE. An alternative approach is to check the AT_HWCAP and 925 AT_PLATFORM entries in the ELF auxiliary table -- see also 926 the_iifii.client_auxv in m_main.c. 927 */ 928 vki_sigset_t saved_set, tmp_set; 929 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 930 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 931 932 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 933 volatile Bool have_isa_2_07; 934 Int r; 935 936 /* This is a kludge. Really we ought to back-convert saved_act 937 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 938 since that's a no-op on all ppc32 platforms so far supported, 939 it's not worth the typing effort. At least include most basic 940 sanity check: */ 941 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 942 943 VG_(sigemptyset)(&tmp_set); 944 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 945 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 946 947 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 948 vg_assert(r == 0); 949 950 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 951 vg_assert(r == 0); 952 tmp_sigill_act = saved_sigill_act; 953 954 r = VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 955 vg_assert(r == 0); 956 tmp_sigfpe_act = saved_sigfpe_act; 957 958 /* NODEFER: signal handler does not return (from the kernel's point of 959 view), hence if it is to successfully catch a signal more than once, 960 we need the NODEFER flag. */ 961 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 962 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 963 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 964 tmp_sigill_act.ksa_handler = handler_unsup_insn; 965 r = VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 966 vg_assert(r == 0); 967 968 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 969 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 970 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 971 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 972 r = VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 973 vg_assert(r == 0); 974 975 /* standard FP insns */ 976 have_F = True; 977 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 978 have_F = False; 979 } else { 980 __asm__ __volatile__(".long 0xFC000090"); /*fmr 0,0 */ 981 } 982 983 /* Altivec insns */ 984 have_V = True; 985 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 986 have_V = False; 987 } else { 988 /* Unfortunately some older assemblers don't speak Altivec (or 989 choose not to), so to be safe we directly emit the 32-bit 990 word corresponding to "vor 0,0,0". This fixes a build 991 problem that happens on Debian 3.1 (ppc32), and probably 992 various other places. */ 993 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 994 } 995 996 /* General-Purpose optional (fsqrt, fsqrts) */ 997 have_FX = True; 998 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 999 have_FX = False; 1000 } else { 1001 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0 */ 1002 } 1003 1004 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 1005 have_GX = True; 1006 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1007 have_GX = False; 1008 } else { 1009 __asm__ __volatile__(".long 0xFC000034"); /* frsqrte 0,0 */ 1010 } 1011 1012 /* VSX support implies Power ISA 2.06 */ 1013 have_VX = True; 1014 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1015 have_VX = False; 1016 } else { 1017 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 1018 } 1019 1020 /* Check for Decimal Floating Point (DFP) support. */ 1021 have_DFP = True; 1022 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1023 have_DFP = False; 1024 } else { 1025 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 1026 } 1027 1028 /* Check for ISA 2.07 support. */ 1029 have_isa_2_07 = True; 1030 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1031 have_isa_2_07 = False; 1032 } else { 1033 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */ 1034 } 1035 1036 /* determine dcbz/dcbzl sizes while we still have the signal 1037 * handlers registered */ 1038 find_ppc_dcbz_sz(&vai); 1039 1040 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1041 vg_assert(r == 0); 1042 r = VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 1043 vg_assert(r == 0); 1044 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1045 vg_assert(r == 0); 1046 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n", 1047 (Int)have_F, (Int)have_V, (Int)have_FX, 1048 (Int)have_GX, (Int)have_VX, (Int)have_DFP, 1049 (Int)have_isa_2_07); 1050 /* Make FP a prerequisite for VMX (bogusly so), and for FX and GX. */ 1051 if (have_V && !have_F) 1052 have_V = False; 1053 if (have_FX && !have_F) 1054 have_FX = False; 1055 if (have_GX && !have_F) 1056 have_GX = False; 1057 1058 VG_(machine_ppc32_has_FP) = have_F ? 1 : 0; 1059 VG_(machine_ppc32_has_VMX) = have_V ? 1 : 0; 1060 1061 va = VexArchPPC32; 1062 vai.endness = VexEndnessBE; 1063 1064 vai.hwcaps = 0; 1065 if (have_F) vai.hwcaps |= VEX_HWCAPS_PPC32_F; 1066 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC32_V; 1067 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC32_FX; 1068 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC32_GX; 1069 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC32_VX; 1070 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC32_DFP; 1071 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC32_ISA2_07; 1072 1073 VG_(machine_get_cache_info)(&vai); 1074 1075 /* But we're not done yet: VG_(machine_ppc32_set_clszB) must be 1076 called before we're ready to go. */ 1077 return True; 1078 } 1079 1080#elif defined(VGA_ppc64be)|| defined(VGA_ppc64le) 1081 { 1082 /* Same instruction set detection algorithm as for ppc32. */ 1083 vki_sigset_t saved_set, tmp_set; 1084 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1085 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1086 1087 volatile Bool have_F, have_V, have_FX, have_GX, have_VX, have_DFP; 1088 volatile Bool have_isa_2_07; 1089 Int r; 1090 1091 /* This is a kludge. Really we ought to back-convert saved_act 1092 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1093 since that's a no-op on all ppc64 platforms so far supported, 1094 it's not worth the typing effort. At least include most basic 1095 sanity check: */ 1096 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1097 1098 VG_(sigemptyset)(&tmp_set); 1099 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1100 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1101 1102 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1103 vg_assert(r == 0); 1104 1105 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1106 vg_assert(r == 0); 1107 tmp_sigill_act = saved_sigill_act; 1108 1109 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1110 tmp_sigfpe_act = saved_sigfpe_act; 1111 1112 /* NODEFER: signal handler does not return (from the kernel's point of 1113 view), hence if it is to successfully catch a signal more than once, 1114 we need the NODEFER flag. */ 1115 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1116 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1117 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1118 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1119 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1120 1121 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1122 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1123 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1124 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1125 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1126 1127 /* standard FP insns */ 1128 have_F = True; 1129 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1130 have_F = False; 1131 } else { 1132 __asm__ __volatile__("fmr 0,0"); 1133 } 1134 1135 /* Altivec insns */ 1136 have_V = True; 1137 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1138 have_V = False; 1139 } else { 1140 __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/ 1141 } 1142 1143 /* General-Purpose optional (fsqrt, fsqrts) */ 1144 have_FX = True; 1145 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1146 have_FX = False; 1147 } else { 1148 __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/ 1149 } 1150 1151 /* Graphics optional (stfiwx, fres, frsqrte, fsel) */ 1152 have_GX = True; 1153 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1154 have_GX = False; 1155 } else { 1156 __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/ 1157 } 1158 1159 /* VSX support implies Power ISA 2.06 */ 1160 have_VX = True; 1161 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1162 have_VX = False; 1163 } else { 1164 __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */ 1165 } 1166 1167 /* Check for Decimal Floating Point (DFP) support. */ 1168 have_DFP = True; 1169 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1170 have_DFP = False; 1171 } else { 1172 __asm__ __volatile__(".long 0xee4e8005"); /* dadd FRT,FRA, FRB */ 1173 } 1174 1175 /* Check for ISA 2.07 support. */ 1176 have_isa_2_07 = True; 1177 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1178 have_isa_2_07 = False; 1179 } else { 1180 __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */ 1181 } 1182 1183 /* determine dcbz/dcbzl sizes while we still have the signal 1184 * handlers registered */ 1185 find_ppc_dcbz_sz(&vai); 1186 1187 VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1188 VG_(sigaction)(VKI_SIGFPE, &saved_sigfpe_act, NULL); 1189 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1190 VG_(debugLog)(1, "machine", "F %d V %d FX %d GX %d VX %d DFP %d ISA2.07 %d\n", 1191 (Int)have_F, (Int)have_V, (Int)have_FX, 1192 (Int)have_GX, (Int)have_VX, (Int)have_DFP, 1193 (Int)have_isa_2_07); 1194 /* on ppc64be, if we don't even have FP, just give up. */ 1195 if (!have_F) 1196 return False; 1197 1198 VG_(machine_ppc64_has_VMX) = have_V ? 1 : 0; 1199 1200 va = VexArchPPC64; 1201# if defined(VKI_LITTLE_ENDIAN) 1202 vai.endness = VexEndnessLE; 1203# elif defined(VKI_BIG_ENDIAN) 1204 vai.endness = VexEndnessBE; 1205# else 1206 vai.endness = VexEndness_INVALID; 1207# endif 1208 1209 vai.hwcaps = 0; 1210 if (have_V) vai.hwcaps |= VEX_HWCAPS_PPC64_V; 1211 if (have_FX) vai.hwcaps |= VEX_HWCAPS_PPC64_FX; 1212 if (have_GX) vai.hwcaps |= VEX_HWCAPS_PPC64_GX; 1213 if (have_VX) vai.hwcaps |= VEX_HWCAPS_PPC64_VX; 1214 if (have_DFP) vai.hwcaps |= VEX_HWCAPS_PPC64_DFP; 1215 if (have_isa_2_07) vai.hwcaps |= VEX_HWCAPS_PPC64_ISA2_07; 1216 1217 VG_(machine_get_cache_info)(&vai); 1218 1219 /* But we're not done yet: VG_(machine_ppc64_set_clszB) must be 1220 called before we're ready to go. */ 1221 return True; 1222 } 1223 1224#elif defined(VGA_s390x) 1225 1226# include "libvex_s390x_common.h" 1227 1228 { 1229 /* Instruction set detection code borrowed from ppc above. */ 1230 vki_sigset_t saved_set, tmp_set; 1231 vki_sigaction_fromK_t saved_sigill_act; 1232 vki_sigaction_toK_t tmp_sigill_act; 1233 1234 volatile Bool have_LDISP, have_STFLE; 1235 Int i, r, model; 1236 1237 /* If the model is "unknown" don't treat this as an error. Assume 1238 this is a brand-new machine model for which we don't have the 1239 identification yet. Keeping fingers crossed. */ 1240 model = VG_(get_machine_model)(); 1241 1242 /* Unblock SIGILL and stash away the old action for that signal */ 1243 VG_(sigemptyset)(&tmp_set); 1244 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1245 1246 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1247 vg_assert(r == 0); 1248 1249 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1250 vg_assert(r == 0); 1251 tmp_sigill_act = saved_sigill_act; 1252 1253 /* NODEFER: signal handler does not return (from the kernel's point of 1254 view), hence if it is to successfully catch a signal more than once, 1255 we need the NODEFER flag. */ 1256 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1257 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1258 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1259 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1260 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1261 1262 /* Determine hwcaps. Note, we cannot use the stfle insn because it 1263 is not supported on z900. */ 1264 1265 have_LDISP = True; 1266 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1267 have_LDISP = False; 1268 } else { 1269 /* BASR loads the address of the next insn into r1. Needed to avoid 1270 a segfault in XY. */ 1271 __asm__ __volatile__("basr %%r1,%%r0\n\t" 1272 ".long 0xe3001000\n\t" /* XY 0,0(%r1) */ 1273 ".short 0x0057" : : : "r0", "r1", "cc", "memory"); 1274 } 1275 1276 /* Check availability of STFLE. If available store facility bits 1277 in hoststfle. */ 1278 ULong hoststfle[S390_NUM_FACILITY_DW]; 1279 1280 for (i = 0; i < S390_NUM_FACILITY_DW; ++i) 1281 hoststfle[i] = 0; 1282 1283 have_STFLE = True; 1284 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1285 have_STFLE = False; 1286 } else { 1287 register ULong reg0 asm("0") = S390_NUM_FACILITY_DW - 1; 1288 1289 __asm__ __volatile__(" .insn s,0xb2b00000,%0\n" /* stfle */ 1290 : "=m" (hoststfle), "+d"(reg0) 1291 : : "cc", "memory"); 1292 } 1293 1294 /* Restore signals */ 1295 r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL); 1296 vg_assert(r == 0); 1297 r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1298 vg_assert(r == 0); 1299 va = VexArchS390X; 1300 vai.endness = VexEndnessBE; 1301 1302 vai.hwcaps = model; 1303 if (have_STFLE) vai.hwcaps |= VEX_HWCAPS_S390X_STFLE; 1304 if (have_LDISP) { 1305 /* Use long displacement only on machines >= z990. For all other 1306 machines it is millicoded and therefore slow. */ 1307 if (model >= VEX_S390X_MODEL_Z990) 1308 vai.hwcaps |= VEX_HWCAPS_S390X_LDISP; 1309 } 1310 1311 /* Detect presence of certain facilities using the STFLE insn. 1312 Note, that these facilities were introduced at the same time or later 1313 as STFLE, so the absence of STLFE implies the absence of the facility 1314 we're trying to detect. */ 1315 struct fac_hwcaps_map { 1316 UInt installed; 1317 UInt facility_bit; 1318 UInt hwcaps_bit; 1319 const HChar name[6]; // may need adjustment for new facility names 1320 } fac_hwcaps[] = { 1321 { False, S390_FAC_EIMM, VEX_HWCAPS_S390X_EIMM, "EIMM" }, 1322 { False, S390_FAC_GIE, VEX_HWCAPS_S390X_GIE, "GIE" }, 1323 { False, S390_FAC_DFP, VEX_HWCAPS_S390X_DFP, "DFP" }, 1324 { False, S390_FAC_FPSE, VEX_HWCAPS_S390X_FGX, "FGX" }, 1325 { False, S390_FAC_ETF2, VEX_HWCAPS_S390X_ETF2, "ETF2" }, 1326 { False, S390_FAC_ETF3, VEX_HWCAPS_S390X_ETF3, "ETF3" }, 1327 { False, S390_FAC_STCKF, VEX_HWCAPS_S390X_STCKF, "STCKF" }, 1328 { False, S390_FAC_FPEXT, VEX_HWCAPS_S390X_FPEXT, "FPEXT" }, 1329 { False, S390_FAC_LSC, VEX_HWCAPS_S390X_LSC, "LSC" }, 1330 { False, S390_FAC_PFPO, VEX_HWCAPS_S390X_PFPO, "PFPO" }, 1331 }; 1332 1333 /* Set hwcaps according to the detected facilities */ 1334 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) { 1335 vg_assert(fac_hwcaps[i].facility_bit <= 63); // for now 1336 if (hoststfle[0] & (1ULL << (63 - fac_hwcaps[i].facility_bit))) { 1337 fac_hwcaps[i].installed = True; 1338 vai.hwcaps |= fac_hwcaps[i].hwcaps_bit; 1339 } 1340 } 1341 1342 /* Build up a string showing the probed-for facilities */ 1343 HChar fac_str[(sizeof fac_hwcaps / sizeof fac_hwcaps[0]) * 1344 (sizeof fac_hwcaps[0].name + 3) + // %s %d 1345 7 + 1 + 4 + 2 // machine %4d 1346 + 1]; // \0 1347 HChar *p = fac_str; 1348 p += VG_(sprintf)(p, "machine %4d ", model); 1349 for (i=0; i < sizeof fac_hwcaps / sizeof fac_hwcaps[0]; ++i) { 1350 p += VG_(sprintf)(p, " %s %1d", fac_hwcaps[i].name, 1351 fac_hwcaps[i].installed); 1352 } 1353 *p++ = '\0'; 1354 1355 VG_(debugLog)(1, "machine", "%s\n", fac_str); 1356 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps); 1357 1358 VG_(machine_get_cache_info)(&vai); 1359 1360 return True; 1361 } 1362 1363#elif defined(VGA_arm) 1364 { 1365 /* Same instruction set detection algorithm as for ppc32. */ 1366 vki_sigset_t saved_set, tmp_set; 1367 vki_sigaction_fromK_t saved_sigill_act, saved_sigfpe_act; 1368 vki_sigaction_toK_t tmp_sigill_act, tmp_sigfpe_act; 1369 1370 volatile Bool have_VFP, have_VFP2, have_VFP3, have_NEON; 1371 volatile Int archlevel; 1372 Int r; 1373 1374 /* This is a kludge. Really we ought to back-convert saved_act 1375 into a toK_t using VG_(convert_sigaction_fromK_to_toK), but 1376 since that's a no-op on all ppc64 platforms so far supported, 1377 it's not worth the typing effort. At least include most basic 1378 sanity check: */ 1379 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1380 1381 VG_(sigemptyset)(&tmp_set); 1382 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1383 VG_(sigaddset)(&tmp_set, VKI_SIGFPE); 1384 1385 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1386 vg_assert(r == 0); 1387 1388 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1389 vg_assert(r == 0); 1390 tmp_sigill_act = saved_sigill_act; 1391 1392 VG_(sigaction)(VKI_SIGFPE, NULL, &saved_sigfpe_act); 1393 tmp_sigfpe_act = saved_sigfpe_act; 1394 1395 /* NODEFER: signal handler does not return (from the kernel's point of 1396 view), hence if it is to successfully catch a signal more than once, 1397 we need the NODEFER flag. */ 1398 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1399 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1400 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1401 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1402 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1403 1404 tmp_sigfpe_act.sa_flags &= ~VKI_SA_RESETHAND; 1405 tmp_sigfpe_act.sa_flags &= ~VKI_SA_SIGINFO; 1406 tmp_sigfpe_act.sa_flags |= VKI_SA_NODEFER; 1407 tmp_sigfpe_act.ksa_handler = handler_unsup_insn; 1408 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1409 1410 /* VFP insns */ 1411 have_VFP = True; 1412 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1413 have_VFP = False; 1414 } else { 1415 __asm__ __volatile__(".word 0xEEB02B42"); /* VMOV.F64 d2, d2 */ 1416 } 1417 /* There are several generation of VFP extension but they differs very 1418 little so for now we will not distinguish them. */ 1419 have_VFP2 = have_VFP; 1420 have_VFP3 = have_VFP; 1421 1422 /* NEON insns */ 1423 have_NEON = True; 1424 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1425 have_NEON = False; 1426 } else { 1427 __asm__ __volatile__(".word 0xF2244154"); /* VMOV q2, q2 */ 1428 } 1429 1430 /* ARM architecture level */ 1431 archlevel = 5; /* v5 will be base level */ 1432 if (archlevel < 7) { 1433 archlevel = 7; 1434 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1435 archlevel = 5; 1436 } else { 1437 __asm__ __volatile__(".word 0xF45FF000"); /* PLI [PC,#-0] */ 1438 } 1439 } 1440 if (archlevel < 6) { 1441 archlevel = 6; 1442 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1443 archlevel = 5; 1444 } else { 1445 __asm__ __volatile__(".word 0xE6822012"); /* PKHBT r2, r2, r2 */ 1446 } 1447 } 1448 1449 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); 1450 VG_(convert_sigaction_fromK_to_toK)(&saved_sigfpe_act, &tmp_sigfpe_act); 1451 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1452 VG_(sigaction)(VKI_SIGFPE, &tmp_sigfpe_act, NULL); 1453 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1454 1455 VG_(debugLog)(1, "machine", "ARMv%d VFP %d VFP2 %d VFP3 %d NEON %d\n", 1456 archlevel, (Int)have_VFP, (Int)have_VFP2, (Int)have_VFP3, 1457 (Int)have_NEON); 1458 1459 VG_(machine_arm_archlevel) = archlevel; 1460 1461 va = VexArchARM; 1462 vai.endness = VexEndnessLE; 1463 1464 vai.hwcaps = VEX_ARM_ARCHLEVEL(archlevel); 1465 if (have_VFP3) vai.hwcaps |= VEX_HWCAPS_ARM_VFP3; 1466 if (have_VFP2) vai.hwcaps |= VEX_HWCAPS_ARM_VFP2; 1467 if (have_VFP) vai.hwcaps |= VEX_HWCAPS_ARM_VFP; 1468 if (have_NEON) vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1469 1470 VG_(machine_get_cache_info)(&vai); 1471 1472 return True; 1473 } 1474 1475#elif defined(VGA_arm64) 1476 { 1477 va = VexArchARM64; 1478 vai.endness = VexEndnessLE; 1479 1480 /* So far there are no variants. */ 1481 vai.hwcaps = 0; 1482 1483 VG_(machine_get_cache_info)(&vai); 1484 1485 /* 0 denotes 'not set'. The range of legitimate values here, 1486 after being set that is, is 2 though 17 inclusive. */ 1487 vg_assert(vai.arm64_dMinLine_lg2_szB == 0); 1488 vg_assert(vai.arm64_iMinLine_lg2_szB == 0); 1489 ULong ctr_el0; 1490 __asm__ __volatile__("mrs %0, ctr_el0" : "=r"(ctr_el0)); 1491 vai.arm64_dMinLine_lg2_szB = ((ctr_el0 >> 16) & 0xF) + 2; 1492 vai.arm64_iMinLine_lg2_szB = ((ctr_el0 >> 0) & 0xF) + 2; 1493 VG_(debugLog)(1, "machine", "ARM64: ctr_el0.dMinLine_szB = %d, " 1494 "ctr_el0.iMinLine_szB = %d\n", 1495 1 << vai.arm64_dMinLine_lg2_szB, 1496 1 << vai.arm64_iMinLine_lg2_szB); 1497 1498 return True; 1499 } 1500 1501#elif defined(VGA_mips32) 1502 { 1503 /* Define the position of F64 bit in FIR register. */ 1504# define FP64 22 1505 va = VexArchMIPS32; 1506 UInt model = VG_(get_machine_model)(); 1507 if (model == -1) 1508 return False; 1509 1510 vai.hwcaps = model; 1511 1512# if defined(VKI_LITTLE_ENDIAN) 1513 vai.endness = VexEndnessLE; 1514# elif defined(VKI_BIG_ENDIAN) 1515 vai.endness = VexEndnessBE; 1516# else 1517 vai.endness = VexEndness_INVALID; 1518# endif 1519 1520 /* Same instruction set detection algorithm as for ppc32/arm... */ 1521 vki_sigset_t saved_set, tmp_set; 1522 vki_sigaction_fromK_t saved_sigill_act; 1523 vki_sigaction_toK_t tmp_sigill_act; 1524 1525 volatile Bool have_DSP, have_DSPr2; 1526 Int r; 1527 1528 vg_assert(sizeof(vki_sigaction_fromK_t) == sizeof(vki_sigaction_toK_t)); 1529 1530 VG_(sigemptyset)(&tmp_set); 1531 VG_(sigaddset)(&tmp_set, VKI_SIGILL); 1532 1533 r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set); 1534 vg_assert(r == 0); 1535 1536 r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act); 1537 vg_assert(r == 0); 1538 tmp_sigill_act = saved_sigill_act; 1539 1540 /* NODEFER: signal handler does not return (from the kernel's point of 1541 view), hence if it is to successfully catch a signal more than once, 1542 we need the NODEFER flag. */ 1543 tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND; 1544 tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO; 1545 tmp_sigill_act.sa_flags |= VKI_SA_NODEFER; 1546 tmp_sigill_act.ksa_handler = handler_unsup_insn; 1547 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1548 1549 if (model == VEX_PRID_COMP_MIPS) { 1550 /* DSPr2 instructions. */ 1551 have_DSPr2 = True; 1552 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1553 have_DSPr2 = False; 1554 } else { 1555 __asm__ __volatile__(".word 0x7d095351"); /* precr.qb.ph t2, t0, t1 */ 1556 } 1557 if (have_DSPr2) { 1558 /* We assume it's 74K, since it can run DSPr2. */ 1559 vai.hwcaps |= VEX_PRID_IMP_74K; 1560 } else { 1561 /* DSP instructions. */ 1562 have_DSP = True; 1563 if (VG_MINIMAL_SETJMP(env_unsup_insn)) { 1564 have_DSP = False; 1565 } else { 1566 __asm__ __volatile__(".word 0x7c3f44b8"); /* rddsp t0, 0x3f */ 1567 } 1568 if (have_DSP) { 1569 /* We assume it's 34K, since it has support for DSP. */ 1570 vai.hwcaps |= VEX_PRID_IMP_34K; 1571 } 1572 } 1573 } 1574 1575 /* Check if CPU has FPU and 32 dbl. prec. FP registers */ 1576 int FIR = 0; 1577 __asm__ __volatile__( 1578 "cfc1 %0, $0" "\n\t" 1579 : "=r" (FIR) 1580 ); 1581 if (FIR & (1 << FP64)) { 1582 vai.hwcaps |= VEX_PRID_CPU_32FPR; 1583 } 1584 1585 VG_(convert_sigaction_fromK_to_toK)(&saved_sigill_act, &tmp_sigill_act); 1586 VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL); 1587 VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL); 1588 1589 VG_(debugLog)(1, "machine", "hwcaps = 0x%x\n", vai.hwcaps); 1590 VG_(machine_get_cache_info)(&vai); 1591 1592 return True; 1593 } 1594 1595#elif defined(VGA_mips64) 1596 { 1597 va = VexArchMIPS64; 1598 UInt model = VG_(get_machine_model)(); 1599 if (model == -1) 1600 return False; 1601 1602 vai.hwcaps = model; 1603 1604# if defined(VKI_LITTLE_ENDIAN) 1605 vai.endness = VexEndnessLE; 1606# elif defined(VKI_BIG_ENDIAN) 1607 vai.endness = VexEndnessBE; 1608# else 1609 vai.endness = VexEndness_INVALID; 1610# endif 1611 1612 VG_(machine_get_cache_info)(&vai); 1613 1614 return True; 1615 } 1616 1617#else 1618# error "Unknown arch" 1619#endif 1620} 1621 1622/* Notify host cpu instruction cache line size. */ 1623#if defined(VGA_ppc32) 1624void VG_(machine_ppc32_set_clszB)( Int szB ) 1625{ 1626 vg_assert(hwcaps_done); 1627 1628 /* Either the value must not have been set yet (zero) or we can 1629 tolerate it being set to the same value multiple times, as the 1630 stack scanning logic in m_main is a bit stupid. */ 1631 vg_assert(vai.ppc_icache_line_szB == 0 1632 || vai.ppc_icache_line_szB == szB); 1633 1634 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128); 1635 vai.ppc_icache_line_szB = szB; 1636} 1637#endif 1638 1639 1640/* Notify host cpu instruction cache line size. */ 1641#if defined(VGA_ppc64be)|| defined(VGA_ppc64le) 1642void VG_(machine_ppc64_set_clszB)( Int szB ) 1643{ 1644 vg_assert(hwcaps_done); 1645 1646 /* Either the value must not have been set yet (zero) or we can 1647 tolerate it being set to the same value multiple times, as the 1648 stack scanning logic in m_main is a bit stupid. */ 1649 vg_assert(vai.ppc_icache_line_szB == 0 1650 || vai.ppc_icache_line_szB == szB); 1651 1652 vg_assert(szB == 16 || szB == 32 || szB == 64 || szB == 128); 1653 vai.ppc_icache_line_szB = szB; 1654} 1655#endif 1656 1657 1658/* Notify host's ability to handle NEON instructions. */ 1659#if defined(VGA_arm) 1660void VG_(machine_arm_set_has_NEON)( Bool has_neon ) 1661{ 1662 vg_assert(hwcaps_done); 1663 /* There's nothing else we can sanity check. */ 1664 1665 if (has_neon) { 1666 vai.hwcaps |= VEX_HWCAPS_ARM_NEON; 1667 } else { 1668 vai.hwcaps &= ~VEX_HWCAPS_ARM_NEON; 1669 } 1670} 1671#endif 1672 1673 1674/* Fetch host cpu info, once established. */ 1675void VG_(machine_get_VexArchInfo)( /*OUT*/VexArch* pVa, 1676 /*OUT*/VexArchInfo* pVai ) 1677{ 1678 vg_assert(hwcaps_done); 1679 if (pVa) *pVa = va; 1680 if (pVai) *pVai = vai; 1681} 1682 1683 1684/* Returns the size of the largest guest register that we will 1685 simulate in this run. This depends on both the guest architecture 1686 and on the specific capabilities we are simulating for that guest 1687 (eg, AVX or non-AVX ?, for amd64). Should return either 4, 8, 16 1688 or 32. General rule: if in doubt, return a value larger than 1689 reality. 1690 1691 This information is needed by Cachegrind and Callgrind to decide 1692 what the minimum cache line size they are prepared to simulate is. 1693 Basically require that the minimum cache line size is at least as 1694 large as the largest register that might get transferred to/from 1695 memory, so as to guarantee that any such transaction can straddle 1696 at most 2 cache lines. 1697*/ 1698Int VG_(machine_get_size_of_largest_guest_register) ( void ) 1699{ 1700 vg_assert(hwcaps_done); 1701 /* Once hwcaps_done is True, we can fish around inside va/vai to 1702 find the information we need. */ 1703 1704# if defined(VGA_x86) 1705 vg_assert(va == VexArchX86); 1706 /* We don't support AVX, so 32 is out. At the other end, even if 1707 we don't support any SSE, the X87 can generate 10 byte 1708 transfers, so let's say 16 to be on the safe side. Hence the 1709 answer is always 16. */ 1710 return 16; 1711 1712# elif defined(VGA_amd64) 1713 /* if AVX then 32 else 16 */ 1714 return (vai.hwcaps & VEX_HWCAPS_AMD64_AVX) ? 32 : 16; 1715 1716# elif defined(VGA_ppc32) 1717 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1718 if (vai.hwcaps & VEX_HWCAPS_PPC32_V) return 16; 1719 if (vai.hwcaps & VEX_HWCAPS_PPC32_VX) return 16; 1720 if (vai.hwcaps & VEX_HWCAPS_PPC32_DFP) return 16; 1721 return 8; 1722 1723# elif defined(VGA_ppc64be) || defined(VGA_ppc64le) 1724 /* 8 if boring; 16 if signs of Altivec or other exotic stuff */ 1725 if (vai.hwcaps & VEX_HWCAPS_PPC64_V) return 16; 1726 if (vai.hwcaps & VEX_HWCAPS_PPC64_VX) return 16; 1727 if (vai.hwcaps & VEX_HWCAPS_PPC64_DFP) return 16; 1728 return 8; 1729 1730# elif defined(VGA_s390x) 1731 return 8; 1732 1733# elif defined(VGA_arm) 1734 /* Really it depends whether or not we have NEON, but let's just 1735 assume we always do. */ 1736 return 16; 1737 1738# elif defined(VGA_arm64) 1739 /* ARM64 always has Neon, AFAICS. */ 1740 return 16; 1741 1742# elif defined(VGA_mips32) 1743 /* The guest state implies 4, but that can't really be true, can 1744 it? */ 1745 return 8; 1746 1747# elif defined(VGA_mips64) 1748 return 8; 1749 1750# else 1751# error "Unknown arch" 1752# endif 1753} 1754 1755 1756// Given a pointer to a function as obtained by "& functionname" in C, 1757// produce a pointer to the actual entry point for the function. 1758void* VG_(fnptr_to_fnentry)( void* f ) 1759{ 1760# if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \ 1761 || defined(VGP_arm_linux) || defined(VGO_darwin) \ 1762 || defined(VGP_ppc32_linux) || defined(VGP_ppc64le_linux) \ 1763 || defined(VGP_s390x_linux) || defined(VGP_mips32_linux) \ 1764 || defined(VGP_mips64_linux) || defined(VGP_arm64_linux) 1765 return f; 1766# elif defined(VGP_ppc64be_linux) 1767 /* ppc64-linux uses the AIX scheme, in which f is a pointer to a 1768 3-word function descriptor, of which the first word is the entry 1769 address. */ 1770 UWord* descr = (UWord*)f; 1771 return (void*)(descr[0]); 1772# else 1773# error "Unknown platform" 1774# endif 1775} 1776 1777/*--------------------------------------------------------------------*/ 1778/*--- end ---*/ 1779/*--------------------------------------------------------------------*/ 1780