1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- x86- and AMD64-specific definitions. cg-x86-amd64.c ---*/ 4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* 7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This file is part of Cachegrind, a Valgrind tool for cache 8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown profiling programs. 9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 10b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov Copyright (C) 2002-2011 Nicholas Nethercote 11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown njn@valgrind.org 12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This program is free software; you can redistribute it and/or 14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown modify it under the terms of the GNU General Public License as 15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown published by the Free Software Foundation; either version 2 of the 16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown License, or (at your option) any later version. 17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This program is distributed in the hope that it will be useful, but 19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown WITHOUT ANY WARRANTY; without even the implied warranty of 20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown General Public License for more details. 22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown You should have received a copy of the GNU General Public License 24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown along with this program; if not, write to the Free Software 25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 02111-1307, USA. 27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown The GNU General Public License is contained in the file COPYING. 29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#if defined(VGA_x86) || defined(VGA_amd64) 32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "pub_tool_basics.h" 34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "pub_tool_cpuid.h" 35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "pub_tool_libcbase.h" 36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "pub_tool_libcassert.h" 37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "pub_tool_libcprint.h" 38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "cg_arch.h" 40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown// All CPUID info taken from sandpile.org/ia32/cpuid.htm */ 42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown// Probably only works for Intel and AMD chips, and probably only for some of 43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown// them. 44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void micro_ops_warn(Int actual_size, Int used_size, Int line_size) 46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("warning: Pentium 4 with %d KB micro-op instruction trace cache\n", 48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown actual_size); 49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)(" Simulating a %d KB I-cache with %d B lines\n", 50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown used_size, line_size); 51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Intel method is truly wretched. We have to do an insane indexing into an 54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * array of pre-defined configurations for various parts of the memory 55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * hierarchy. 56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * According to Intel Processor Identification, App Note 485. 57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * If a L3 cache is found, then data for it rather than the L2 59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * is returned via *LLc. 60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic 62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownInt Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc) 63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int cpuid1_eax; 65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int cpuid1_ignore; 66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int family; 67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int model; 68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar info[16]; 69b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov Int i, j, trials; 70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Bool L2_found = False; 71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* If we see L3 cache info, copy it into L3c. Then, at the end, 72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown copy it into *LLc. Hence if a L3 cache is specified, *LLc will 73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown eventually contain a description of it rather than the L2 cache. 74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown The use of the L3c intermediary makes this process independent 75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown of the order in which the cache specifications appear in 76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown info[]. */ 77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Bool L3_found = False; 78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown cache_t L3c = { 0, 0, 0 }; 79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (level < 2) { 81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("warning: CPUID level < 2 for Intel processor (%d)\n", level); 82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return -1; 83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* family/model needed to distinguish code reuse (currently 0x49) */ 86b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov VG_(cpuid)(1, 0, &cpuid1_eax, &cpuid1_ignore, 87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown &cpuid1_ignore, &cpuid1_ignore); 88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown family = (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & 0xf); 89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown model = (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0xf); 90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 91b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov VG_(cpuid)(2, 0, (Int*)&info[0], (Int*)&info[4], 92b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov (Int*)&info[8], (Int*)&info[12]); 93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown trials = info[0] - 1; /* AL register - bits 0..7 of %eax */ 94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown info[0] = 0x0; /* reset AL */ 95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (0 != trials) { 97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("warning: non-zero CPUID trials for Intel processor (%d)\n", 98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown trials); 99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return -1; 100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 0; i < 16; i++) { 103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown switch (info[i]) { 105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x0: /* ignore zeros */ 107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* TLB info, ignore */ 110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: 111b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 0x0b: 112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x4f: case 0x50: case 0x51: case 0x52: case 0x55: 113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x56: case 0x57: case 0x59: 114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x5a: case 0x5b: case 0x5c: case 0x5d: 115b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 0x76: 116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xb0: case 0xb1: case 0xb2: 117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xb3: case 0xb4: case 0xba: case 0xc0: 118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xca: 119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break; 122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break; 123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x09: *I1c = (cache_t) { 32, 4, 64 }; break; 124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break; 125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break; 127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break; 128b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 0x0d: *D1c = (cache_t) { 16, 4, 64 }; break; 129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x0e: *D1c = (cache_t) { 24, 6, 64 }; break; 130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break; 131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* IA-64 info -- panic! */ 133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x10: case 0x15: case 0x1a: 134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x88: case 0x89: case 0x8a: case 0x8d: 135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x90: case 0x96: case 0x9b: 136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(tool_panic)("IA-64 cache detected?!"); 137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* L3 cache info. */ 139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x22: L3c = (cache_t) { 512, 4, 64 }; L3_found = True; break; 140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x23: L3c = (cache_t) { 1024, 8, 64 }; L3_found = True; break; 141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x25: L3c = (cache_t) { 2048, 8, 64 }; L3_found = True; break; 142ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x29: L3c = (cache_t) { 4096, 8, 64 }; L3_found = True; break; 143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x46: L3c = (cache_t) { 4096, 4, 64 }; L3_found = True; break; 144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x47: L3c = (cache_t) { 8192, 8, 64 }; L3_found = True; break; 145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x4a: L3c = (cache_t) { 6144, 12, 64 }; L3_found = True; break; 146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x4b: L3c = (cache_t) { 8192, 16, 64 }; L3_found = True; break; 147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x4c: L3c = (cache_t) { 12288, 12, 64 }; L3_found = True; break; 148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x4d: L3c = (cache_t) { 16384, 16, 64 }; L3_found = True; break; 149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xd0: L3c = (cache_t) { 512, 4, 64 }; L3_found = True; break; 150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xd1: L3c = (cache_t) { 1024, 4, 64 }; L3_found = True; break; 151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xd2: L3c = (cache_t) { 2048, 4, 64 }; L3_found = True; break; 152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xd6: L3c = (cache_t) { 1024, 8, 64 }; L3_found = True; break; 153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xd7: L3c = (cache_t) { 2048, 8, 64 }; L3_found = True; break; 154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xd8: L3c = (cache_t) { 4096, 8, 64 }; L3_found = True; break; 155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xdc: L3c = (cache_t) { 1536, 12, 64 }; L3_found = True; break; 156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xdd: L3c = (cache_t) { 3072, 12, 64 }; L3_found = True; break; 157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xde: L3c = (cache_t) { 6144, 12, 64 }; L3_found = True; break; 158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xe2: L3c = (cache_t) { 2048, 16, 64 }; L3_found = True; break; 159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xe3: L3c = (cache_t) { 4096, 16, 64 }; L3_found = True; break; 160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xe4: L3c = (cache_t) { 8192, 16, 64 }; L3_found = True; break; 161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xea: L3c = (cache_t) { 12288, 24, 64 }; L3_found = True; break; 162ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xeb: L3c = (cache_t) { 18432, 24, 64 }; L3_found = True; break; 163ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xec: L3c = (cache_t) { 24576, 24, 64 }; L3_found = True; break; 164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Described as "MLC" in Intel documentation */ 166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x21: *LLc = (cache_t) { 256, 8, 64 }; L2_found = True; break; 167ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 168ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* These are sectored, whatever that means */ 169ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x39: *LLc = (cache_t) { 128, 4, 64 }; L2_found = True; break; 170ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x3c: *LLc = (cache_t) { 256, 4, 64 }; L2_found = True; break; 171ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 172ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* If a P6 core, this means "no L2 cache". 173ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown If a P4 core, this means "no L3 cache". 174ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown We don't know what core it is, so don't issue a warning. To detect 175ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown a missing L2 cache, we use 'L2_found'. */ 176ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x40: 177ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 178ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 179ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x41: *LLc = (cache_t) { 128, 4, 32 }; L2_found = True; break; 180ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x42: *LLc = (cache_t) { 256, 4, 32 }; L2_found = True; break; 181ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x43: *LLc = (cache_t) { 512, 4, 32 }; L2_found = True; break; 182ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x44: *LLc = (cache_t) { 1024, 4, 32 }; L2_found = True; break; 183ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x45: *LLc = (cache_t) { 2048, 4, 32 }; L2_found = True; break; 184ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x48: *LLc = (cache_t) { 3072, 12, 64 }; L2_found = True; break; 185ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x4e: *LLc = (cache_t) { 6144, 24, 64 }; L2_found = True; break; 186ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x49: 187ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (family == 15 && model == 6) { 188ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* On Xeon MP (family F, model 6), this is for L3 */ 189ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown L3c = (cache_t) { 4096, 16, 64 }; L3_found = True; 190ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { 191ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *LLc = (cache_t) { 4096, 16, 64 }; L2_found = True; 192ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 193ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 194ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 195ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* These are sectored, whatever that means */ 196ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x60: *D1c = (cache_t) { 16, 8, 64 }; break; /* sectored */ 197ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */ 198ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */ 199ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */ 200ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 201ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based. 202ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * conversion to byte size is a total guess; treat the 12K and 16K 203ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * cases the same since the cache byte size must be a power of two for 204ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * everything to work!. Also guessing 32 bytes for the line size... 205ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 206ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x70: /* 12K micro-ops, 8-way */ 207ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *I1c = (cache_t) { 16, 8, 32 }; 208ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown micro_ops_warn(12, 16, 32); 209ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 210ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x71: /* 16K micro-ops, 8-way */ 211ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *I1c = (cache_t) { 16, 8, 32 }; 212ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown micro_ops_warn(16, 16, 32); 213ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 214ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x72: /* 32K micro-ops, 8-way */ 215ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *I1c = (cache_t) { 32, 8, 32 }; 216ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown micro_ops_warn(32, 32, 32); 217ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 218ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 219ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* not sectored, whatever that might mean */ 220ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x78: *LLc = (cache_t) { 1024, 4, 64 }; L2_found = True; break; 221ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 222ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* These are sectored, whatever that means */ 223ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x79: *LLc = (cache_t) { 128, 8, 64 }; L2_found = True; break; 224ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x7a: *LLc = (cache_t) { 256, 8, 64 }; L2_found = True; break; 225ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x7b: *LLc = (cache_t) { 512, 8, 64 }; L2_found = True; break; 226ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x7c: *LLc = (cache_t) { 1024, 8, 64 }; L2_found = True; break; 227ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x7d: *LLc = (cache_t) { 2048, 8, 64 }; L2_found = True; break; 228ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x7e: *LLc = (cache_t) { 256, 8, 128 }; L2_found = True; break; 229ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x7f: *LLc = (cache_t) { 512, 2, 64 }; L2_found = True; break; 230ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x80: *LLc = (cache_t) { 512, 8, 64 }; L2_found = True; break; 231ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x81: *LLc = (cache_t) { 128, 8, 32 }; L2_found = True; break; 232ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x82: *LLc = (cache_t) { 256, 8, 32 }; L2_found = True; break; 233ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x83: *LLc = (cache_t) { 512, 8, 32 }; L2_found = True; break; 234ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x84: *LLc = (cache_t) { 1024, 8, 32 }; L2_found = True; break; 235ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x85: *LLc = (cache_t) { 2048, 8, 32 }; L2_found = True; break; 236ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x86: *LLc = (cache_t) { 512, 4, 64 }; L2_found = True; break; 237ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0x87: *LLc = (cache_t) { 1024, 8, 64 }; L2_found = True; break; 238ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 239ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Ignore prefetch information */ 240ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xf0: case 0xf1: 241ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 242ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 243b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 0xff: 244b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov j = 0; 245b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4], 246b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov (Int*)&info[8], (Int*)&info[12]); 247b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 248b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov while ((info[0] & 0x1f) != 0) { 249b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov UInt assoc = ((*(UInt *)&info[4] >> 22) & 0x3ff) + 1; 250b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov UInt parts = ((*(UInt *)&info[4] >> 12) & 0x3ff) + 1; 251b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov UInt line_size = (*(UInt *)&info[4] & 0x7ff) + 1; 252b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov UInt sets = *(UInt *)&info[8] + 1; 253b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov cache_t c; 254b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 255b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov c.size = assoc * parts * line_size * sets / 1024; 256b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov c.assoc = assoc; 257b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov c.line_size = line_size; 258b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 259b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov switch ((info[0] & 0xe0) >> 5) 260b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov { 261b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 1: 262b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov switch (info[0] & 0x1f) 263b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov { 264b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 1: *D1c = c; break; 265b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 2: *I1c = c; break; 266b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 3: VG_(dmsg)("warning: L1 unified cache ignored\n"); break; 267b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov default: VG_(dmsg)("warning: L1 cache of unknown type ignored\n"); break; 268b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov } 269b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov break; 270b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 2: 271b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov switch (info[0] & 0x1f) 272b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov { 273b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 1: VG_(dmsg)("warning: L2 data cache ignored\n"); break; 274b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 2: VG_(dmsg)("warning: L2 instruction cache ignored\n"); break; 275b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 3: *LLc = c; L2_found = True; break; 276b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov default: VG_(dmsg)("warning: L2 cache of unknown type ignored\n"); break; 277b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov } 278b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov break; 279b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 3: 280b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov switch (info[0] & 0x1f) 281b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov { 282b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 1: VG_(dmsg)("warning: L3 data cache ignored\n"); break; 283b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 2: VG_(dmsg)("warning: L3 instruction cache ignored\n"); break; 284b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov case 3: L3c = c; L3_found = True; break; 285b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov default: VG_(dmsg)("warning: L3 cache of unknown type ignored\n"); break; 286b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov } 287b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov break; 288b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov default: 289b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov VG_(dmsg)("warning: L%u cache ignored\n", (info[0] & 0xe0) >> 5); 290b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov break; 291b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov } 292b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 293b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4], 294b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov (Int*)&info[8], (Int*)&info[12]); 295b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov } 296b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov break; 297b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 298ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown default: 299ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("warning: Unknown Intel cache config value (0x%x), ignoring\n", 300ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown info[i]); 301ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 302ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 303ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 304ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 305ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* If we found a L3 cache, throw away the L2 data and use the L3's instead. */ 306ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (L3_found) { 307ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("warning: L3 cache found, using its data for the LL simulation.\n"); 308ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *LLc = L3c; 309ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown L2_found = True; 310ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 311ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 312ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (!L2_found) 313ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n"); 314ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 315ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return 0; 316ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 317ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 318ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* AMD method is straightforward, just extract appropriate bits from the 319ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * result registers. 320ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 321ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * Bits, for D1 and I1: 322ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 31..24 data L1 cache size in KBs 323ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 23..16 data L1 cache associativity (FFh=full) 324ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 15.. 8 data L1 cache lines per tag 325ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 7.. 0 data L1 cache line size in bytes 326ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 327ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * Bits, for L2: 328ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 31..16 unified L2 cache size in KBs 329ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 15..12 unified L2 cache associativity (0=off, FFh=full) 330ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 11.. 8 unified L2 cache lines per tag 331ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 7.. 0 unified L2 cache line size in bytes 332ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 333ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * #3 The AMD K7 processor's L2 cache must be configured prior to relying 334ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * upon this information. (Whatever that means -- njn) 335ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 336ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model 337ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB), 338ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * so we detect that. 339ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 340ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * Returns 0 on success, non-zero on failure. As with the Intel code 341ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * above, if a L3 cache is found, then data for it rather than the L2 342ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * is returned via *LLc. 343ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 344ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 345ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* A small helper */ 346ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic Int decode_AMD_cache_L2_L3_assoc ( Int bits_15_12 ) 347ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 348ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Decode a L2/L3 associativity indication. It is encoded 349ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown differently from the I1/D1 associativity. Returns 1 350ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown (direct-map) as a safe but suboptimal result for unknown 351ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown encodings. */ 352ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown switch (bits_15_12 & 0xF) { 353ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 1: return 1; case 2: return 2; 354ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 4: return 4; case 6: return 8; 355ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 8: return 16; case 0xA: return 32; 356ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xB: return 48; case 0xC: return 64; 357ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xD: return 96; case 0xE: return 128; 358ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0xF: /* fully associative */ 359ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0: /* L2/L3 cache or TLB is disabled */ 360ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown default: 361ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return 1; 362ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 363ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 364ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 365ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic 366ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownInt AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* LLc) 367ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 368ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt ext_level; 369ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt dummy, model; 370ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt I1i, D1i, L2i, L3i; 371ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 372b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov VG_(cpuid)(0x80000000, 0, &ext_level, &dummy, &dummy, &dummy); 373ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 374ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) { 375ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("warning: ext_level < 0x80000006 for AMD processor (0x%x)\n", 376ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ext_level); 377ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return -1; 378ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 379ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 380b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov VG_(cpuid)(0x80000005, 0, &dummy, &dummy, &D1i, &I1i); 381b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov VG_(cpuid)(0x80000006, 0, &dummy, &dummy, &L2i, &L3i); 382ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 383b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov VG_(cpuid)(0x1, 0, &model, &dummy, &dummy, &dummy); 384ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 385ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Check for Duron bug */ 386ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (model == 0x630) { 387ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("warning: Buggy Duron stepping A0. Assuming L2 size=65536 bytes\n"); 388ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown L2i = (64 << 16) | (L2i & 0xffff); 389ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 390ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 391ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown D1c->size = (D1i >> 24) & 0xff; 392ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown D1c->assoc = (D1i >> 16) & 0xff; 393ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown D1c->line_size = (D1i >> 0) & 0xff; 394ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 395ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown I1c->size = (I1i >> 24) & 0xff; 396ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown I1c->assoc = (I1i >> 16) & 0xff; 397ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown I1c->line_size = (I1i >> 0) & 0xff; 398ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 399ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown LLc->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */ 400ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown LLc->assoc = decode_AMD_cache_L2_L3_assoc((L2i >> 12) & 0xf); 401ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown LLc->line_size = (L2i >> 0) & 0xff; 402ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 403ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (((L3i >> 18) & 0x3fff) > 0) { 404ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* There's an L3 cache. Replace *LLc contents with this info. */ 405ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* NB: the test in the if is "if L3 size > 0 ". I don't know if 406ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown this is the right way to test presence-vs-absence of L3. I 407ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown can't see any guidance on this in the AMD documentation. */ 408ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown LLc->size = ((L3i >> 18) & 0x3fff) * 512; 409ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown LLc->assoc = decode_AMD_cache_L2_L3_assoc((L3i >> 12) & 0xf); 410ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown LLc->line_size = (L3i >> 0) & 0xff; 411ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("warning: L3 cache found, using its data for the L2 simulation.\n"); 412ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 413ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 414ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return 0; 415ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 416ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 417ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic 418ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownInt get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* LLc) 419ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 420ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int level, ret; 421ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Char vendor_id[13]; 422ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 423ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (!VG_(has_cpuid)()) { 424ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("CPUID instruction not supported\n"); 425ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return -1; 426ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 427ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 428b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov VG_(cpuid)(0, 0, &level, (int*)&vendor_id[0], 429ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown (int*)&vendor_id[8], (int*)&vendor_id[4]); 430ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown vendor_id[12] = '\0'; 431ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 432ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (0 == level) { 433ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("CPUID level is 0, early Pentium?\n"); 434ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return -1; 435ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 436ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 437ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */ 438ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) { 439ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ret = Intel_cache_info(level, I1c, D1c, LLc); 440ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 441ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) { 442ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ret = AMD_cache_info(I1c, D1c, LLc); 443ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 444ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) { 445ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Total kludge. Pretend to be a VIA Nehemiah. */ 446ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown D1c->size = 64; 447ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown D1c->assoc = 16; 448ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown D1c->line_size = 16; 449ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown I1c->size = 64; 450ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown I1c->assoc = 4; 451ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown I1c->line_size = 16; 452ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown LLc->size = 64; 453ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown LLc->assoc = 16; 454ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown LLc->line_size = 16; 455ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ret = 0; 456ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 457ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { 458ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("CPU vendor ID not recognised (%s)\n", vendor_id); 459ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return -1; 460ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 461ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 462ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Successful! Convert sizes from KB to bytes */ 463ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown I1c->size *= 1024; 464ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown D1c->size *= 1024; 465ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown LLc->size *= 1024; 466b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 467b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov /* If the LL cache config isn't something the simulation functions 468b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov can handle, try to adjust it so it is. Caches are characterised 469b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov by (total size T, line size L, associativity A), and then we 470b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov have 471b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 472b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov number of sets S = T / (L * A) 473b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 474b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov The required constraints are: 475b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 476b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov * L must be a power of 2, but it always is in practice, so 477b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov no problem there 478b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 479b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov * A can be any value >= 1 480b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 481b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov * T can be any value, but .. 482b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 483b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov * S must be a power of 2. 484b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 485b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov That sometimes gives a problem. For example, some Core iX based 486b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288 487b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov sets. The "fix" in this case is to increase the associativity 488b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov by 50% to 24, which reduces the number of sets to 8192, making 489b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov it a power of 2. That's what the following code does (handing 490b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov the "3/2 rescaling case".) We might need to deal with other 491b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov ratios later (5/4 ?). 492b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 493b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov The "fix" is "justified" (cough, cough) by alleging that 494b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov increases of associativity above about 4 have very little effect 495b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov on the actual miss rate. It would be far more inaccurate to 496b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov fudge this by changing the size of the simulated cache -- 497b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov changing the associativity is a much better option. 498b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov */ 499b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov if (LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0) { 500b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov Long nSets = (Long)LLc->size / (Long)(LLc->line_size * LLc->assoc); 501b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov if (/* stay sane */ 502b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov nSets >= 4 503b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov /* nSets is not a power of 2 */ 504b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov && VG_(log2_64)( (ULong)nSets ) == -1 505b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov /* nSets is 50% above a power of 2 */ 506b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov && VG_(log2_64)( (ULong)((2 * nSets) / (Long)3) ) != -1 507b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov /* associativity can be increased by exactly 50% */ 508b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov && (LLc->assoc % 2) == 0 509b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov ) { 510b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov /* # sets is 1.5 * a power of two, but the associativity is 511b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov even, so we can increase that up by 50% and implicitly 512b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov scale the # sets down accordingly. */ 513b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov Int new_assoc = LLc->assoc + (LLc->assoc / 2); 514b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov VG_(dmsg)("warning: pretending that LL cache has associativity" 515b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov " %d instead of actual %d\n", new_assoc, LLc->assoc); 516b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov LLc->assoc = new_assoc; 517b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov } 518b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov } 519b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 520ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return ret; 521ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 522ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 523ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 524ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* LLc, 525ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Bool all_caches_clo_defined) 526ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 527ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int res; 528ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 529ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // Set caches to default. 530ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *I1c = (cache_t) { 65536, 2, 64 }; 531ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *D1c = (cache_t) { 65536, 2, 64 }; 532ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *LLc = (cache_t) { 262144, 8, 64 }; 533ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 534ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // Then replace with any info we can get from CPUID. 535ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res = get_caches_from_CPUID(I1c, D1c, LLc); 536ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 537ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // Warn if CPUID failed and config not completely specified from cmd line. 538ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (res != 0 && !all_caches_clo_defined) { 539ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown VG_(dmsg)("Warning: Couldn't auto-detect cache config, using one " 540ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown "or more defaults \n"); 541ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 542ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 543ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 544ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#endif // defined(VGA_x86) || defined(VGA_amd64) 545ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 546ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 547ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- end ---*/ 548ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/ 549