1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- x86- and AMD64-specific definitions.          cg-x86-amd64.c ---*/
4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*
7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This file is part of Cachegrind, a Valgrind tool for cache
8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   profiling programs.
9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
10b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   Copyright (C) 2002-2011 Nicholas Nethercote
11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      njn@valgrind.org
12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This program is free software; you can redistribute it and/or
14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   modify it under the terms of the GNU General Public License as
15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   published by the Free Software Foundation; either version 2 of the
16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   License, or (at your option) any later version.
17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This program is distributed in the hope that it will be useful, but
19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   WITHOUT ANY WARRANTY; without even the implied warranty of
20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   General Public License for more details.
22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   You should have received a copy of the GNU General Public License
24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   along with this program; if not, write to the Free Software
25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   02111-1307, USA.
27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   The GNU General Public License is contained in the file COPYING.
29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#if defined(VGA_x86) || defined(VGA_amd64)
32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "pub_tool_basics.h"
34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "pub_tool_cpuid.h"
35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "pub_tool_libcbase.h"
36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "pub_tool_libcassert.h"
37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "pub_tool_libcprint.h"
38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "cg_arch.h"
40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown// All CPUID info taken from sandpile.org/ia32/cpuid.htm */
42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown// Probably only works for Intel and AMD chips, and probably only for some of
43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown// them.
44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic void micro_ops_warn(Int actual_size, Int used_size, Int line_size)
46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   VG_(dmsg)("warning: Pentium 4 with %d KB micro-op instruction trace cache\n",
48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown             actual_size);
49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   VG_(dmsg)("         Simulating a %d KB I-cache with %d B lines\n",
50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown             used_size, line_size);
51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Intel method is truly wretched.  We have to do an insane indexing into an
54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * array of pre-defined configurations for various parts of the memory
55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * hierarchy.
56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * According to Intel Processor Identification, App Note 485.
57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *
58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * If a L3 cache is found, then data for it rather than the L2
59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * is returned via *LLc.
60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */
61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic
62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownInt Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* LLc)
63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int cpuid1_eax;
65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int cpuid1_ignore;
66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int family;
67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int model;
68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UChar info[16];
69b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   Int   i, j, trials;
70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Bool  L2_found = False;
71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* If we see L3 cache info, copy it into L3c.  Then, at the end,
72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      copy it into *LLc.  Hence if a L3 cache is specified, *LLc will
73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      eventually contain a description of it rather than the L2 cache.
74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      The use of the L3c intermediary makes this process independent
75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      of the order in which the cache specifications appear in
76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      info[]. */
77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Bool  L3_found = False;
78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   cache_t L3c = { 0, 0, 0 };
79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (level < 2) {
81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(dmsg)("warning: CPUID level < 2 for Intel processor (%d)\n", level);
82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return -1;
83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* family/model needed to distinguish code reuse (currently 0x49) */
86b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   VG_(cpuid)(1, 0, &cpuid1_eax, &cpuid1_ignore,
87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	      &cpuid1_ignore, &cpuid1_ignore);
88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   family = (((cpuid1_eax >> 20) & 0xff) << 4) + ((cpuid1_eax >> 8) & 0xf);
89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   model =  (((cpuid1_eax >> 16) & 0xf) << 4) + ((cpuid1_eax >> 4) & 0xf);
90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
91b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   VG_(cpuid)(2, 0, (Int*)&info[0], (Int*)&info[4],
92b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov                    (Int*)&info[8], (Int*)&info[12]);
93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   trials  = info[0] - 1;   /* AL register - bits 0..7 of %eax */
94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   info[0] = 0x0;           /* reset AL */
95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (0 != trials) {
97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(dmsg)("warning: non-zero CPUID trials for Intel processor (%d)\n",
98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                trials);
99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return -1;
100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   for (i = 0; i < 16; i++) {
103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      switch (info[i]) {
105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x0:       /* ignore zeros */
107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          break;
108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* TLB info, ignore */
110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x01: case 0x02: case 0x03: case 0x04: case 0x05:
111b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      case 0x0b:
112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x4f: case 0x50: case 0x51: case 0x52: case 0x55:
113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x56: case 0x57: case 0x59:
114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x5a: case 0x5b: case 0x5c: case 0x5d:
115b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      case 0x76:
116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xb0: case 0xb1: case 0xb2:
117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xb3: case 0xb4: case 0xba: case 0xc0:
118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xca:
119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          break;
120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x06: *I1c = (cache_t) {  8, 4, 32 }; break;
122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break;
123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x09: *I1c = (cache_t) { 32, 4, 64 }; break;
124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x30: *I1c = (cache_t) { 32, 8, 64 }; break;
125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x0a: *D1c = (cache_t) {  8, 2, 32 }; break;
127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break;
128b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      case 0x0d: *D1c = (cache_t) { 16, 4, 64 }; break;
129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x0e: *D1c = (cache_t) { 24, 6, 64 }; break;
130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x2c: *D1c = (cache_t) { 32, 8, 64 }; break;
131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* IA-64 info -- panic! */
133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x10: case 0x15: case 0x1a:
134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x88: case 0x89: case 0x8a: case 0x8d:
135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x90: case 0x96: case 0x9b:
136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         VG_(tool_panic)("IA-64 cache detected?!");
137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* L3 cache info. */
139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x22: L3c = (cache_t) { 512,    4, 64 }; L3_found = True; break;
140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x23: L3c = (cache_t) { 1024,   8, 64 }; L3_found = True; break;
141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x25: L3c = (cache_t) { 2048,   8, 64 }; L3_found = True; break;
142ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x29: L3c = (cache_t) { 4096,   8, 64 }; L3_found = True; break;
143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x46: L3c = (cache_t) { 4096,   4, 64 }; L3_found = True; break;
144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x47: L3c = (cache_t) { 8192,   8, 64 }; L3_found = True; break;
145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x4a: L3c = (cache_t) { 6144,  12, 64 }; L3_found = True; break;
146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x4b: L3c = (cache_t) { 8192,  16, 64 }; L3_found = True; break;
147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x4c: L3c = (cache_t) { 12288, 12, 64 }; L3_found = True; break;
148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x4d: L3c = (cache_t) { 16384, 16, 64 }; L3_found = True; break;
149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xd0: L3c = (cache_t) { 512,    4, 64 }; L3_found = True; break;
150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xd1: L3c = (cache_t) { 1024,   4, 64 }; L3_found = True; break;
151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xd2: L3c = (cache_t) { 2048,   4, 64 }; L3_found = True; break;
152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xd6: L3c = (cache_t) { 1024,   8, 64 }; L3_found = True; break;
153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xd7: L3c = (cache_t) { 2048,   8, 64 }; L3_found = True; break;
154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xd8: L3c = (cache_t) { 4096,   8, 64 }; L3_found = True; break;
155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xdc: L3c = (cache_t) { 1536,  12, 64 }; L3_found = True; break;
156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xdd: L3c = (cache_t) { 3072,  12, 64 }; L3_found = True; break;
157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xde: L3c = (cache_t) { 6144,  12, 64 }; L3_found = True; break;
158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xe2: L3c = (cache_t) { 2048,  16, 64 }; L3_found = True; break;
159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xe3: L3c = (cache_t) { 4096,  16, 64 }; L3_found = True; break;
160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xe4: L3c = (cache_t) { 8192,  16, 64 }; L3_found = True; break;
161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xea: L3c = (cache_t) { 12288, 24, 64 }; L3_found = True; break;
162ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xeb: L3c = (cache_t) { 18432, 24, 64 }; L3_found = True; break;
163ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xec: L3c = (cache_t) { 24576, 24, 64 }; L3_found = True; break;
164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* Described as "MLC" in Intel documentation */
166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x21: *LLc = (cache_t) {  256, 8, 64 }; L2_found = True; break;
167ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
168ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* These are sectored, whatever that means */
169ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x39: *LLc = (cache_t) {  128, 4, 64 }; L2_found = True; break;
170ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x3c: *LLc = (cache_t) {  256, 4, 64 }; L2_found = True; break;
171ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
172ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* If a P6 core, this means "no L2 cache".
173ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         If a P4 core, this means "no L3 cache".
174ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         We don't know what core it is, so don't issue a warning.  To detect
175ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         a missing L2 cache, we use 'L2_found'. */
176ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x40:
177ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          break;
178ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
179ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x41: *LLc = (cache_t) {  128,  4, 32 }; L2_found = True; break;
180ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x42: *LLc = (cache_t) {  256,  4, 32 }; L2_found = True; break;
181ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x43: *LLc = (cache_t) {  512,  4, 32 }; L2_found = True; break;
182ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x44: *LLc = (cache_t) { 1024,  4, 32 }; L2_found = True; break;
183ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x45: *LLc = (cache_t) { 2048,  4, 32 }; L2_found = True; break;
184ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x48: *LLc = (cache_t) { 3072, 12, 64 }; L2_found = True; break;
185ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x4e: *LLc = (cache_t) { 6144, 24, 64 }; L2_found = True; break;
186ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x49:
187ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         if (family == 15 && model == 6) {
188ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            /* On Xeon MP (family F, model 6), this is for L3 */
189ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            L3c = (cache_t) { 4096, 16, 64 }; L3_found = True;
190ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         } else {
191ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	    *LLc = (cache_t) { 4096, 16, 64 }; L2_found = True;
192ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         }
193ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         break;
194ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
195ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* These are sectored, whatever that means */
196ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x60: *D1c = (cache_t) { 16, 8, 64 };  break;      /* sectored */
197ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x66: *D1c = (cache_t) {  8, 4, 64 };  break;      /* sectored */
198ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x67: *D1c = (cache_t) { 16, 4, 64 };  break;      /* sectored */
199ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x68: *D1c = (cache_t) { 32, 4, 64 };  break;      /* sectored */
200ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
201ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based.
202ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       * conversion to byte size is a total guess;  treat the 12K and 16K
203ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       * cases the same since the cache byte size must be a power of two for
204ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       * everything to work!.  Also guessing 32 bytes for the line size...
205ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       */
206ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x70:    /* 12K micro-ops, 8-way */
207ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         *I1c = (cache_t) { 16, 8, 32 };
208ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         micro_ops_warn(12, 16, 32);
209ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         break;
210ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x71:    /* 16K micro-ops, 8-way */
211ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         *I1c = (cache_t) { 16, 8, 32 };
212ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         micro_ops_warn(16, 16, 32);
213ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         break;
214ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x72:    /* 32K micro-ops, 8-way */
215ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         *I1c = (cache_t) { 32, 8, 32 };
216ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         micro_ops_warn(32, 32, 32);
217ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         break;
218ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
219ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* not sectored, whatever that might mean */
220ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x78: *LLc = (cache_t) { 1024, 4,  64 }; L2_found = True;  break;
221ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
222ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* These are sectored, whatever that means */
223ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x79: *LLc = (cache_t) {  128, 8,  64 }; L2_found = True;  break;
224ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x7a: *LLc = (cache_t) {  256, 8,  64 }; L2_found = True;  break;
225ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x7b: *LLc = (cache_t) {  512, 8,  64 }; L2_found = True;  break;
226ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x7c: *LLc = (cache_t) { 1024, 8,  64 }; L2_found = True;  break;
227ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x7d: *LLc = (cache_t) { 2048, 8,  64 }; L2_found = True;  break;
228ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x7e: *LLc = (cache_t) {  256, 8, 128 }; L2_found = True;  break;
229ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x7f: *LLc = (cache_t) {  512, 2,  64 }; L2_found = True;  break;
230ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x80: *LLc = (cache_t) {  512, 8,  64 }; L2_found = True;  break;
231ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x81: *LLc = (cache_t) {  128, 8,  32 }; L2_found = True;  break;
232ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x82: *LLc = (cache_t) {  256, 8,  32 }; L2_found = True;  break;
233ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x83: *LLc = (cache_t) {  512, 8,  32 }; L2_found = True;  break;
234ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x84: *LLc = (cache_t) { 1024, 8,  32 }; L2_found = True;  break;
235ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x85: *LLc = (cache_t) { 2048, 8,  32 }; L2_found = True;  break;
236ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x86: *LLc = (cache_t) {  512, 4,  64 }; L2_found = True;  break;
237ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0x87: *LLc = (cache_t) { 1024, 8,  64 }; L2_found = True;  break;
238ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
239ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* Ignore prefetch information */
240ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xf0: case 0xf1:
241ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         break;
242ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
243b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      case 0xff:
244b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         j = 0;
245b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4],
246b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov                            (Int*)&info[8], (Int*)&info[12]);
247b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
248b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         while ((info[0] & 0x1f) != 0) {
249b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            UInt assoc = ((*(UInt *)&info[4] >> 22) & 0x3ff) + 1;
250b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            UInt parts = ((*(UInt *)&info[4] >> 12) & 0x3ff) + 1;
251b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            UInt line_size = (*(UInt *)&info[4] & 0x7ff) + 1;
252b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            UInt sets = *(UInt *)&info[8] + 1;
253b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            cache_t c;
254b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
255b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            c.size = assoc * parts * line_size * sets / 1024;
256b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            c.assoc = assoc;
257b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            c.line_size = line_size;
258b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
259b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            switch ((info[0] & 0xe0) >> 5)
260b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            {
261b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            case 1:
262b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               switch (info[0] & 0x1f)
263b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               {
264b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               case 1: *D1c = c; break;
265b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               case 2: *I1c = c; break;
266b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               case 3: VG_(dmsg)("warning: L1 unified cache ignored\n"); break;
267b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               default: VG_(dmsg)("warning: L1 cache of unknown type ignored\n"); break;
268b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               }
269b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               break;
270b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            case 2:
271b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               switch (info[0] & 0x1f)
272b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               {
273b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               case 1: VG_(dmsg)("warning: L2 data cache ignored\n"); break;
274b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               case 2: VG_(dmsg)("warning: L2 instruction cache ignored\n"); break;
275b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               case 3: *LLc = c; L2_found = True; break;
276b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               default: VG_(dmsg)("warning: L2 cache of unknown type ignored\n"); break;
277b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               }
278b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               break;
279b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            case 3:
280b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               switch (info[0] & 0x1f)
281b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               {
282b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               case 1: VG_(dmsg)("warning: L3 data cache ignored\n"); break;
283b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               case 2: VG_(dmsg)("warning: L3 instruction cache ignored\n"); break;
284b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               case 3: L3c = c; L3_found = True; break;
285b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               default: VG_(dmsg)("warning: L3 cache of unknown type ignored\n"); break;
286b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               }
287b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               break;
288b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            default:
289b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               VG_(dmsg)("warning: L%u cache ignored\n", (info[0] & 0xe0) >> 5);
290b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov               break;
291b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            }
292b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
293b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            VG_(cpuid)(4, j++, (Int*)&info[0], (Int*)&info[4],
294b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov                               (Int*)&info[8], (Int*)&info[12]);
295b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         }
296b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         break;
297b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
298ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      default:
299ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         VG_(dmsg)("warning: Unknown Intel cache config value (0x%x), ignoring\n",
300ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                   info[i]);
301ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         break;
302ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
303ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
304ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
305ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* If we found a L3 cache, throw away the L2 data and use the L3's instead. */
306ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (L3_found) {
307ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(dmsg)("warning: L3 cache found, using its data for the LL simulation.\n");
308ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      *LLc = L3c;
309ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      L2_found = True;
310ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
311ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
312ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (!L2_found)
313ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(dmsg)("warning: L2 cache not installed, ignore LL results.\n");
314ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
315ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return 0;
316ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
317ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
318ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* AMD method is straightforward, just extract appropriate bits from the
319ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * result registers.
320ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *
321ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * Bits, for D1 and I1:
322ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  31..24  data L1 cache size in KBs
323ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  23..16  data L1 cache associativity (FFh=full)
324ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  15.. 8  data L1 cache lines per tag
325ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *   7.. 0  data L1 cache line size in bytes
326ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *
327ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * Bits, for L2:
328ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  31..16  unified L2 cache size in KBs
329ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  15..12  unified L2 cache associativity (0=off, FFh=full)
330ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *  11.. 8  unified L2 cache lines per tag
331ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *   7.. 0  unified L2 cache line size in bytes
332ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *
333ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * #3  The AMD K7 processor's L2 cache must be configured prior to relying
334ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *     upon this information. (Whatever that means -- njn)
335ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *
336ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * Also, according to Cyrille Chepelov, Duron stepping A0 processors (model
337ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * 0x630) have a bug and misreport their L2 size as 1KB (it's really 64KB),
338ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * so we detect that.
339ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *
340ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * Returns 0 on success, non-zero on failure.  As with the Intel code
341ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * above, if a L3 cache is found, then data for it rather than the L2
342ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * is returned via *LLc.
343ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */
344ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
345ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* A small helper */
346ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic Int decode_AMD_cache_L2_L3_assoc ( Int bits_15_12 )
347ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
348ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Decode a L2/L3 associativity indication.  It is encoded
349ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      differently from the I1/D1 associativity.  Returns 1
350ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      (direct-map) as a safe but suboptimal result for unknown
351ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      encodings. */
352ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   switch (bits_15_12 & 0xF) {
353ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 1: return 1;    case 2: return 2;
354ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 4: return 4;    case 6: return 8;
355ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 8: return 16;   case 0xA: return 32;
356ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xB: return 48; case 0xC: return 64;
357ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xD: return 96; case 0xE: return 128;
358ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0xF: /* fully associative */
359ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0: /* L2/L3 cache or TLB is disabled */
360ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      default:
361ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        return 1;
362ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
363ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
364ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
365ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic
366ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownInt AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* LLc)
367ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
368ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt ext_level;
369ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt dummy, model;
370ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt I1i, D1i, L2i, L3i;
371ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
372b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   VG_(cpuid)(0x80000000, 0, &ext_level, &dummy, &dummy, &dummy);
373ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
374ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) {
375ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(dmsg)("warning: ext_level < 0x80000006 for AMD processor (0x%x)\n",
376ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                ext_level);
377ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return -1;
378ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
379ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
380b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   VG_(cpuid)(0x80000005, 0, &dummy, &dummy, &D1i, &I1i);
381b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   VG_(cpuid)(0x80000006, 0, &dummy, &dummy, &L2i, &L3i);
382ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
383b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   VG_(cpuid)(0x1, 0, &model, &dummy, &dummy, &dummy);
384ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
385ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Check for Duron bug */
386ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (model == 0x630) {
387ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(dmsg)("warning: Buggy Duron stepping A0. Assuming L2 size=65536 bytes\n");
388ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      L2i = (64 << 16) | (L2i & 0xffff);
389ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
390ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
391ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   D1c->size      = (D1i >> 24) & 0xff;
392ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   D1c->assoc     = (D1i >> 16) & 0xff;
393ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   D1c->line_size = (D1i >>  0) & 0xff;
394ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
395ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   I1c->size      = (I1i >> 24) & 0xff;
396ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   I1c->assoc     = (I1i >> 16) & 0xff;
397ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   I1c->line_size = (I1i >>  0) & 0xff;
398ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
399ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   LLc->size      = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */
400ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   LLc->assoc     = decode_AMD_cache_L2_L3_assoc((L2i >> 12) & 0xf);
401ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   LLc->line_size = (L2i >>  0) & 0xff;
402ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
403ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (((L3i >> 18) & 0x3fff) > 0) {
404ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* There's an L3 cache.  Replace *LLc contents with this info. */
405ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* NB: the test in the if is "if L3 size > 0 ".  I don't know if
406ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         this is the right way to test presence-vs-absence of L3.  I
407ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         can't see any guidance on this in the AMD documentation. */
408ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      LLc->size      = ((L3i >> 18) & 0x3fff) * 512;
409ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      LLc->assoc     = decode_AMD_cache_L2_L3_assoc((L3i >> 12) & 0xf);
410ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      LLc->line_size = (L3i >>  0) & 0xff;
411ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(dmsg)("warning: L3 cache found, using its data for the L2 simulation.\n");
412ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
413ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
414ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return 0;
415ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
416ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
417ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic
418ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownInt get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* LLc)
419ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
420ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int  level, ret;
421ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Char vendor_id[13];
422ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
423ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (!VG_(has_cpuid)()) {
424ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(dmsg)("CPUID instruction not supported\n");
425ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return -1;
426ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
427ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
428b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   VG_(cpuid)(0, 0, &level, (int*)&vendor_id[0],
429ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	      (int*)&vendor_id[8], (int*)&vendor_id[4]);
430ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   vendor_id[12] = '\0';
431ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
432ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (0 == level) {
433ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(dmsg)("CPUID level is 0, early Pentium?\n");
434ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return -1;
435ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
436ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
437ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */
438ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) {
439ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      ret = Intel_cache_info(level, I1c, D1c, LLc);
440ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
441ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) {
442ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      ret = AMD_cache_info(I1c, D1c, LLc);
443ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
444ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } else if (0 == VG_(strcmp)(vendor_id, "CentaurHauls")) {
445ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* Total kludge.  Pretend to be a VIA Nehemiah. */
446ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      D1c->size      = 64;
447ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      D1c->assoc     = 16;
448ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      D1c->line_size = 16;
449ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      I1c->size      = 64;
450ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      I1c->assoc     = 4;
451ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      I1c->line_size = 16;
452ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      LLc->size      = 64;
453ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      LLc->assoc     = 16;
454ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      LLc->line_size = 16;
455ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      ret = 0;
456ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
457ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } else {
458ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(dmsg)("CPU vendor ID not recognised (%s)\n", vendor_id);
459ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return -1;
460ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
461ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
462ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Successful!  Convert sizes from KB to bytes */
463ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   I1c->size *= 1024;
464ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   D1c->size *= 1024;
465ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   LLc->size *= 1024;
466b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
467b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   /* If the LL cache config isn't something the simulation functions
468b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      can handle, try to adjust it so it is.  Caches are characterised
469b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      by (total size T, line size L, associativity A), and then we
470b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      have
471b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
472b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov        number of sets S = T / (L * A)
473b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
474b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      The required constraints are:
475b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
476b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      * L must be a power of 2, but it always is in practice, so
477b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov        no problem there
478b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
479b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      * A can be any value >= 1
480b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
481b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      * T can be any value, but ..
482b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
483b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      * S must be a power of 2.
484b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
485b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      That sometimes gives a problem.  For example, some Core iX based
486b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      Intel CPUs have T = 12MB, A = 16, L = 64, which gives 12288
487b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      sets.  The "fix" in this case is to increase the associativity
488b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      by 50% to 24, which reduces the number of sets to 8192, making
489b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      it a power of 2.  That's what the following code does (handing
490b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      the "3/2 rescaling case".)  We might need to deal with other
491b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      ratios later (5/4 ?).
492b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
493b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      The "fix" is "justified" (cough, cough) by alleging that
494b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      increases of associativity above about 4 have very little effect
495b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      on the actual miss rate.  It would be far more inaccurate to
496b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      fudge this by changing the size of the simulated cache --
497b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      changing the associativity is a much better option.
498b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   */
499b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   if (LLc->size > 0 && LLc->assoc > 0 && LLc->line_size > 0) {
500b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      Long nSets = (Long)LLc->size / (Long)(LLc->line_size * LLc->assoc);
501b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      if (/* stay sane */
502b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov          nSets >= 4
503b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov          /* nSets is not a power of 2 */
504b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov          && VG_(log2_64)( (ULong)nSets ) == -1
505b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov          /* nSets is 50% above a power of 2 */
506b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov          && VG_(log2_64)( (ULong)((2 * nSets) / (Long)3) ) != -1
507b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov          /* associativity can be increased by exactly 50% */
508b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov          && (LLc->assoc % 2) == 0
509b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         ) {
510b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         /* # sets is 1.5 * a power of two, but the associativity is
511b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            even, so we can increase that up by 50% and implicitly
512b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov            scale the # sets down accordingly. */
513b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         Int new_assoc = LLc->assoc + (LLc->assoc / 2);
514b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         VG_(dmsg)("warning: pretending that LL cache has associativity"
515b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov                   " %d instead of actual %d\n", new_assoc, LLc->assoc);
516b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov         LLc->assoc = new_assoc;
517b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov      }
518b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   }
519b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
520ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return ret;
521ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
522ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
523ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
524ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid VG_(configure_caches)(cache_t* I1c, cache_t* D1c, cache_t* LLc,
525ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                           Bool all_caches_clo_defined)
526ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
527ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int res;
528ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
529ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   // Set caches to default.
530ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   *I1c = (cache_t) {  65536, 2, 64 };
531ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   *D1c = (cache_t) {  65536, 2, 64 };
532ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   *LLc = (cache_t) { 262144, 8, 64 };
533ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
534ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   // Then replace with any info we can get from CPUID.
535ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res = get_caches_from_CPUID(I1c, D1c, LLc);
536ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
537ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   // Warn if CPUID failed and config not completely specified from cmd line.
538ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (res != 0 && !all_caches_clo_defined) {
539ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      VG_(dmsg)("Warning: Couldn't auto-detect cache config, using one "
540ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                "or more defaults \n");
541ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
542ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
543ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
544ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#endif // defined(VGA_x86) || defined(VGA_amd64)
545ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
546ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
547ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- end                                                          ---*/
548ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--------------------------------------------------------------------*/
549