intel_cacheinfo.c revision cb19060abfdecac0d1eb2d2f0e7d6b7a3f8bc4f4
1/* 2 * Routines to indentify caches on Intel CPU. 3 * 4 * Changes: 5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. 8 */ 9 10#include <linux/init.h> 11#include <linux/slab.h> 12#include <linux/device.h> 13#include <linux/compiler.h> 14#include <linux/cpu.h> 15#include <linux/sched.h> 16#include <linux/pci.h> 17 18#include <asm/processor.h> 19#include <linux/smp.h> 20#include <asm/k8.h> 21#include <asm/smp.h> 22 23#define LVL_1_INST 1 24#define LVL_1_DATA 2 25#define LVL_2 3 26#define LVL_3 4 27#define LVL_TRACE 5 28 29struct _cache_table { 30 unsigned char descriptor; 31 char cache_type; 32 short size; 33}; 34 35/* All the cache descriptor types we care about (no TLB or 36 trace cache entries) */ 37 38static const struct _cache_table __cpuinitconst cache_table[] = 39{ 40 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 41 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ 42 { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ 43 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ 44 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ 45 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ 46 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ 47 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 48 { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 49 { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 50 { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 51 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ 52 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ 53 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 54 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ 55 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ 56 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 57 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ 58 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 59 { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */ 60 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ 61 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ 62 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ 63 { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ 64 { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ 65 { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ 66 { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ 67 { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ 68 { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ 69 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 70 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ 71 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ 72 { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ 73 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 74 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 75 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 76 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 77 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ 78 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ 79 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ 80 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ 81 { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ 82 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 83 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 84 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 85 { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 86 { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ 87 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ 88 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ 89 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ 90 { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ 91 { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ 92 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ 93 { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ 94 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ 95 { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ 96 { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ 97 { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ 98 { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */ 99 { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ 100 { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ 101 { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ 102 { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ 103 { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ 104 { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ 105 { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 106 { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */ 107 { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */ 108 { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */ 109 { 0x00, 0, 0} 110}; 111 112 113enum _cache_type { 114 CACHE_TYPE_NULL = 0, 115 CACHE_TYPE_DATA = 1, 116 CACHE_TYPE_INST = 2, 117 CACHE_TYPE_UNIFIED = 3 118}; 119 120union _cpuid4_leaf_eax { 121 struct { 122 enum _cache_type type:5; 123 unsigned int level:3; 124 unsigned int is_self_initializing:1; 125 unsigned int is_fully_associative:1; 126 unsigned int reserved:4; 127 unsigned int num_threads_sharing:12; 128 unsigned int num_cores_on_die:6; 129 } split; 130 u32 full; 131}; 132 133union _cpuid4_leaf_ebx { 134 struct { 135 unsigned int coherency_line_size:12; 136 unsigned int physical_line_partition:10; 137 unsigned int ways_of_associativity:10; 138 } split; 139 u32 full; 140}; 141 142union _cpuid4_leaf_ecx { 143 struct { 144 unsigned int number_of_sets:32; 145 } split; 146 u32 full; 147}; 148 149struct _cpuid4_info { 150 union _cpuid4_leaf_eax eax; 151 union _cpuid4_leaf_ebx ebx; 152 union _cpuid4_leaf_ecx ecx; 153 unsigned long size; 154 bool can_disable; 155 unsigned int l3_indices; 156 DECLARE_BITMAP(shared_cpu_map, NR_CPUS); 157}; 158 159/* subset of above _cpuid4_info w/o shared_cpu_map */ 160struct _cpuid4_info_regs { 161 union _cpuid4_leaf_eax eax; 162 union _cpuid4_leaf_ebx ebx; 163 union _cpuid4_leaf_ecx ecx; 164 unsigned long size; 165 bool can_disable; 166 unsigned int l3_indices; 167}; 168 169unsigned short num_cache_leaves; 170 171/* AMD doesn't have CPUID4. Emulate it here to report the same 172 information to the user. This makes some assumptions about the machine: 173 L2 not shared, no SMT etc. that is currently true on AMD CPUs. 174 175 In theory the TLBs could be reported as fake type (they are in "dummy"). 176 Maybe later */ 177union l1_cache { 178 struct { 179 unsigned line_size:8; 180 unsigned lines_per_tag:8; 181 unsigned assoc:8; 182 unsigned size_in_kb:8; 183 }; 184 unsigned val; 185}; 186 187union l2_cache { 188 struct { 189 unsigned line_size:8; 190 unsigned lines_per_tag:4; 191 unsigned assoc:4; 192 unsigned size_in_kb:16; 193 }; 194 unsigned val; 195}; 196 197union l3_cache { 198 struct { 199 unsigned line_size:8; 200 unsigned lines_per_tag:4; 201 unsigned assoc:4; 202 unsigned res:2; 203 unsigned size_encoded:14; 204 }; 205 unsigned val; 206}; 207 208static const unsigned short __cpuinitconst assocs[] = { 209 [1] = 1, 210 [2] = 2, 211 [4] = 4, 212 [6] = 8, 213 [8] = 16, 214 [0xa] = 32, 215 [0xb] = 48, 216 [0xc] = 64, 217 [0xd] = 96, 218 [0xe] = 128, 219 [0xf] = 0xffff /* fully associative - no way to show this currently */ 220}; 221 222static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 }; 223static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 }; 224 225static void __cpuinit 226amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, 227 union _cpuid4_leaf_ebx *ebx, 228 union _cpuid4_leaf_ecx *ecx) 229{ 230 unsigned dummy; 231 unsigned line_size, lines_per_tag, assoc, size_in_kb; 232 union l1_cache l1i, l1d; 233 union l2_cache l2; 234 union l3_cache l3; 235 union l1_cache *l1 = &l1d; 236 237 eax->full = 0; 238 ebx->full = 0; 239 ecx->full = 0; 240 241 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); 242 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); 243 244 switch (leaf) { 245 case 1: 246 l1 = &l1i; 247 case 0: 248 if (!l1->val) 249 return; 250 assoc = assocs[l1->assoc]; 251 line_size = l1->line_size; 252 lines_per_tag = l1->lines_per_tag; 253 size_in_kb = l1->size_in_kb; 254 break; 255 case 2: 256 if (!l2.val) 257 return; 258 assoc = assocs[l2.assoc]; 259 line_size = l2.line_size; 260 lines_per_tag = l2.lines_per_tag; 261 /* cpu_data has errata corrections for K7 applied */ 262 size_in_kb = current_cpu_data.x86_cache_size; 263 break; 264 case 3: 265 if (!l3.val) 266 return; 267 assoc = assocs[l3.assoc]; 268 line_size = l3.line_size; 269 lines_per_tag = l3.lines_per_tag; 270 size_in_kb = l3.size_encoded * 512; 271 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { 272 size_in_kb = size_in_kb >> 1; 273 assoc = assoc >> 1; 274 } 275 break; 276 default: 277 return; 278 } 279 280 eax->split.is_self_initializing = 1; 281 eax->split.type = types[leaf]; 282 eax->split.level = levels[leaf]; 283 eax->split.num_threads_sharing = 0; 284 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; 285 286 287 if (assoc == 0xffff) 288 eax->split.is_fully_associative = 1; 289 ebx->split.coherency_line_size = line_size - 1; 290 ebx->split.ways_of_associativity = assoc - 1; 291 ebx->split.physical_line_partition = lines_per_tag - 1; 292 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / 293 (ebx->split.ways_of_associativity + 1) - 1; 294} 295 296struct _cache_attr { 297 struct attribute attr; 298 ssize_t (*show)(struct _cpuid4_info *, char *); 299 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); 300}; 301 302#ifdef CONFIG_CPU_SUP_AMD 303static unsigned int __cpuinit amd_calc_l3_indices(void) 304{ 305 /* 306 * We're called over smp_call_function_single() and therefore 307 * are on the correct cpu. 308 */ 309 int cpu = smp_processor_id(); 310 int node = cpu_to_node(cpu); 311 struct pci_dev *dev = node_to_k8_nb_misc(node); 312 unsigned int sc0, sc1, sc2, sc3; 313 u32 val = 0; 314 315 pci_read_config_dword(dev, 0x1C4, &val); 316 317 /* calculate subcache sizes */ 318 sc0 = !(val & BIT(0)); 319 sc1 = !(val & BIT(4)); 320 sc2 = !(val & BIT(8)) + !(val & BIT(9)); 321 sc3 = !(val & BIT(12)) + !(val & BIT(13)); 322 323 return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; 324} 325 326static void __cpuinit 327amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) 328{ 329 if (index < 3) 330 return; 331 332 if (boot_cpu_data.x86 == 0x11) 333 return; 334 335 /* see errata #382 and #388 */ 336 if ((boot_cpu_data.x86 == 0x10) && 337 ((boot_cpu_data.x86_model < 0x8) || 338 (boot_cpu_data.x86_mask < 0x1))) 339 return; 340 341 this_leaf->can_disable = true; 342 this_leaf->l3_indices = amd_calc_l3_indices(); 343} 344 345static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, 346 unsigned int index) 347{ 348 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); 349 int node = amd_get_nb_id(cpu); 350 struct pci_dev *dev = node_to_k8_nb_misc(node); 351 unsigned int reg = 0; 352 353 if (!this_leaf->can_disable) 354 return -EINVAL; 355 356 if (!dev) 357 return -EINVAL; 358 359 pci_read_config_dword(dev, 0x1BC + index * 4, ®); 360 return sprintf(buf, "0x%08x\n", reg); 361} 362 363#define SHOW_CACHE_DISABLE(index) \ 364static ssize_t \ 365show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ 366{ \ 367 return show_cache_disable(this_leaf, buf, index); \ 368} 369SHOW_CACHE_DISABLE(0) 370SHOW_CACHE_DISABLE(1) 371 372static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, 373 const char *buf, size_t count, unsigned int index) 374{ 375 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); 376 int node = amd_get_nb_id(cpu); 377 struct pci_dev *dev = node_to_k8_nb_misc(node); 378 unsigned long val = 0; 379 380#define SUBCACHE_MASK (3UL << 20) 381#define SUBCACHE_INDEX 0xfff 382 383 if (!this_leaf->can_disable) 384 return -EINVAL; 385 386 if (!capable(CAP_SYS_ADMIN)) 387 return -EPERM; 388 389 if (!dev) 390 return -EINVAL; 391 392 if (strict_strtoul(buf, 10, &val) < 0) 393 return -EINVAL; 394 395 /* do not allow writes outside of allowed bits */ 396 if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || 397 ((val & SUBCACHE_INDEX) > this_leaf->l3_indices)) 398 return -EINVAL; 399 400 val |= BIT(30); 401 pci_write_config_dword(dev, 0x1BC + index * 4, val); 402 /* 403 * We need to WBINVD on a core on the node containing the L3 cache which 404 * indices we disable therefore a simple wbinvd() is not sufficient. 405 */ 406 wbinvd_on_cpu(cpu); 407 pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31)); 408 return count; 409} 410 411#define STORE_CACHE_DISABLE(index) \ 412static ssize_t \ 413store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ 414 const char *buf, size_t count) \ 415{ \ 416 return store_cache_disable(this_leaf, buf, count, index); \ 417} 418STORE_CACHE_DISABLE(0) 419STORE_CACHE_DISABLE(1) 420 421static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, 422 show_cache_disable_0, store_cache_disable_0); 423static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 424 show_cache_disable_1, store_cache_disable_1); 425 426#else /* CONFIG_CPU_SUP_AMD */ 427static void __cpuinit 428amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) 429{ 430}; 431#endif /* CONFIG_CPU_SUP_AMD */ 432 433static int 434__cpuinit cpuid4_cache_lookup_regs(int index, 435 struct _cpuid4_info_regs *this_leaf) 436{ 437 union _cpuid4_leaf_eax eax; 438 union _cpuid4_leaf_ebx ebx; 439 union _cpuid4_leaf_ecx ecx; 440 unsigned edx; 441 442 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { 443 amd_cpuid4(index, &eax, &ebx, &ecx); 444 if (boot_cpu_data.x86 >= 0x10) 445 amd_check_l3_disable(index, this_leaf); 446 } else { 447 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 448 } 449 450 if (eax.split.type == CACHE_TYPE_NULL) 451 return -EIO; /* better error ? */ 452 453 this_leaf->eax = eax; 454 this_leaf->ebx = ebx; 455 this_leaf->ecx = ecx; 456 this_leaf->size = (ecx.split.number_of_sets + 1) * 457 (ebx.split.coherency_line_size + 1) * 458 (ebx.split.physical_line_partition + 1) * 459 (ebx.split.ways_of_associativity + 1); 460 return 0; 461} 462 463static int __cpuinit find_num_cache_leaves(void) 464{ 465 unsigned int eax, ebx, ecx, edx; 466 union _cpuid4_leaf_eax cache_eax; 467 int i = -1; 468 469 do { 470 ++i; 471 /* Do cpuid(4) loop to find out num_cache_leaves */ 472 cpuid_count(4, i, &eax, &ebx, &ecx, &edx); 473 cache_eax.full = eax; 474 } while (cache_eax.split.type != CACHE_TYPE_NULL); 475 return i; 476} 477 478unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) 479{ 480 /* Cache sizes */ 481 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; 482 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 483 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ 484 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; 485#ifdef CONFIG_X86_HT 486 unsigned int cpu = c->cpu_index; 487#endif 488 489 if (c->cpuid_level > 3) { 490 static int is_initialized; 491 492 if (is_initialized == 0) { 493 /* Init num_cache_leaves from boot CPU */ 494 num_cache_leaves = find_num_cache_leaves(); 495 is_initialized++; 496 } 497 498 /* 499 * Whenever possible use cpuid(4), deterministic cache 500 * parameters cpuid leaf to find the cache details 501 */ 502 for (i = 0; i < num_cache_leaves; i++) { 503 struct _cpuid4_info_regs this_leaf; 504 int retval; 505 506 retval = cpuid4_cache_lookup_regs(i, &this_leaf); 507 if (retval >= 0) { 508 switch (this_leaf.eax.split.level) { 509 case 1: 510 if (this_leaf.eax.split.type == 511 CACHE_TYPE_DATA) 512 new_l1d = this_leaf.size/1024; 513 else if (this_leaf.eax.split.type == 514 CACHE_TYPE_INST) 515 new_l1i = this_leaf.size/1024; 516 break; 517 case 2: 518 new_l2 = this_leaf.size/1024; 519 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 520 index_msb = get_count_order(num_threads_sharing); 521 l2_id = c->apicid >> index_msb; 522 break; 523 case 3: 524 new_l3 = this_leaf.size/1024; 525 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 526 index_msb = get_count_order( 527 num_threads_sharing); 528 l3_id = c->apicid >> index_msb; 529 break; 530 default: 531 break; 532 } 533 } 534 } 535 } 536 /* 537 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for 538 * trace cache 539 */ 540 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { 541 /* supports eax=2 call */ 542 int j, n; 543 unsigned int regs[4]; 544 unsigned char *dp = (unsigned char *)regs; 545 int only_trace = 0; 546 547 if (num_cache_leaves != 0 && c->x86 == 15) 548 only_trace = 1; 549 550 /* Number of times to iterate */ 551 n = cpuid_eax(2) & 0xFF; 552 553 for (i = 0 ; i < n ; i++) { 554 cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); 555 556 /* If bit 31 is set, this is an unknown format */ 557 for (j = 0 ; j < 3 ; j++) 558 if (regs[j] & (1 << 31)) 559 regs[j] = 0; 560 561 /* Byte 0 is level count, not a descriptor */ 562 for (j = 1 ; j < 16 ; j++) { 563 unsigned char des = dp[j]; 564 unsigned char k = 0; 565 566 /* look up this descriptor in the table */ 567 while (cache_table[k].descriptor != 0) { 568 if (cache_table[k].descriptor == des) { 569 if (only_trace && cache_table[k].cache_type != LVL_TRACE) 570 break; 571 switch (cache_table[k].cache_type) { 572 case LVL_1_INST: 573 l1i += cache_table[k].size; 574 break; 575 case LVL_1_DATA: 576 l1d += cache_table[k].size; 577 break; 578 case LVL_2: 579 l2 += cache_table[k].size; 580 break; 581 case LVL_3: 582 l3 += cache_table[k].size; 583 break; 584 case LVL_TRACE: 585 trace += cache_table[k].size; 586 break; 587 } 588 589 break; 590 } 591 592 k++; 593 } 594 } 595 } 596 } 597 598 if (new_l1d) 599 l1d = new_l1d; 600 601 if (new_l1i) 602 l1i = new_l1i; 603 604 if (new_l2) { 605 l2 = new_l2; 606#ifdef CONFIG_X86_HT 607 per_cpu(cpu_llc_id, cpu) = l2_id; 608#endif 609 } 610 611 if (new_l3) { 612 l3 = new_l3; 613#ifdef CONFIG_X86_HT 614 per_cpu(cpu_llc_id, cpu) = l3_id; 615#endif 616 } 617 618 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); 619 620 return l2; 621} 622 623#ifdef CONFIG_SYSFS 624 625/* pointer to _cpuid4_info array (for each cache leaf) */ 626static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); 627#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) 628 629#ifdef CONFIG_SMP 630static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 631{ 632 struct _cpuid4_info *this_leaf, *sibling_leaf; 633 unsigned long num_threads_sharing; 634 int index_msb, i, sibling; 635 struct cpuinfo_x86 *c = &cpu_data(cpu); 636 637 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { 638 for_each_cpu(i, c->llc_shared_map) { 639 if (!per_cpu(ici_cpuid4_info, i)) 640 continue; 641 this_leaf = CPUID4_INFO_IDX(i, index); 642 for_each_cpu(sibling, c->llc_shared_map) { 643 if (!cpu_online(sibling)) 644 continue; 645 set_bit(sibling, this_leaf->shared_cpu_map); 646 } 647 } 648 return; 649 } 650 this_leaf = CPUID4_INFO_IDX(cpu, index); 651 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 652 653 if (num_threads_sharing == 1) 654 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map)); 655 else { 656 index_msb = get_count_order(num_threads_sharing); 657 658 for_each_online_cpu(i) { 659 if (cpu_data(i).apicid >> index_msb == 660 c->apicid >> index_msb) { 661 cpumask_set_cpu(i, 662 to_cpumask(this_leaf->shared_cpu_map)); 663 if (i != cpu && per_cpu(ici_cpuid4_info, i)) { 664 sibling_leaf = 665 CPUID4_INFO_IDX(i, index); 666 cpumask_set_cpu(cpu, to_cpumask( 667 sibling_leaf->shared_cpu_map)); 668 } 669 } 670 } 671 } 672} 673static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) 674{ 675 struct _cpuid4_info *this_leaf, *sibling_leaf; 676 int sibling; 677 678 this_leaf = CPUID4_INFO_IDX(cpu, index); 679 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) { 680 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 681 cpumask_clear_cpu(cpu, 682 to_cpumask(sibling_leaf->shared_cpu_map)); 683 } 684} 685#else 686static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 687{ 688} 689 690static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) 691{ 692} 693#endif 694 695static void __cpuinit free_cache_attributes(unsigned int cpu) 696{ 697 int i; 698 699 for (i = 0; i < num_cache_leaves; i++) 700 cache_remove_shared_cpu_map(cpu, i); 701 702 kfree(per_cpu(ici_cpuid4_info, cpu)); 703 per_cpu(ici_cpuid4_info, cpu) = NULL; 704} 705 706static int 707__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 708{ 709 struct _cpuid4_info_regs *leaf_regs = 710 (struct _cpuid4_info_regs *)this_leaf; 711 712 return cpuid4_cache_lookup_regs(index, leaf_regs); 713} 714 715static void __cpuinit get_cpu_leaves(void *_retval) 716{ 717 int j, *retval = _retval, cpu = smp_processor_id(); 718 719 /* Do cpuid and store the results */ 720 for (j = 0; j < num_cache_leaves; j++) { 721 struct _cpuid4_info *this_leaf; 722 this_leaf = CPUID4_INFO_IDX(cpu, j); 723 *retval = cpuid4_cache_lookup(j, this_leaf); 724 if (unlikely(*retval < 0)) { 725 int i; 726 727 for (i = 0; i < j; i++) 728 cache_remove_shared_cpu_map(cpu, i); 729 break; 730 } 731 cache_shared_cpu_map_setup(cpu, j); 732 } 733} 734 735static int __cpuinit detect_cache_attributes(unsigned int cpu) 736{ 737 int retval; 738 739 if (num_cache_leaves == 0) 740 return -ENOENT; 741 742 per_cpu(ici_cpuid4_info, cpu) = kzalloc( 743 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); 744 if (per_cpu(ici_cpuid4_info, cpu) == NULL) 745 return -ENOMEM; 746 747 smp_call_function_single(cpu, get_cpu_leaves, &retval, true); 748 if (retval) { 749 kfree(per_cpu(ici_cpuid4_info, cpu)); 750 per_cpu(ici_cpuid4_info, cpu) = NULL; 751 } 752 753 return retval; 754} 755 756#include <linux/kobject.h> 757#include <linux/sysfs.h> 758 759extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ 760 761/* pointer to kobject for cpuX/cache */ 762static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); 763 764struct _index_kobject { 765 struct kobject kobj; 766 unsigned int cpu; 767 unsigned short index; 768}; 769 770/* pointer to array of kobjects for cpuX/cache/indexY */ 771static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject); 772#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) 773 774#define show_one_plus(file_name, object, val) \ 775static ssize_t show_##file_name \ 776 (struct _cpuid4_info *this_leaf, char *buf) \ 777{ \ 778 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ 779} 780 781show_one_plus(level, eax.split.level, 0); 782show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1); 783show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1); 784show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); 785show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); 786 787static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) 788{ 789 return sprintf(buf, "%luK\n", this_leaf->size / 1024); 790} 791 792static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, 793 int type, char *buf) 794{ 795 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; 796 int n = 0; 797 798 if (len > 1) { 799 const struct cpumask *mask; 800 801 mask = to_cpumask(this_leaf->shared_cpu_map); 802 n = type ? 803 cpulist_scnprintf(buf, len-2, mask) : 804 cpumask_scnprintf(buf, len-2, mask); 805 buf[n++] = '\n'; 806 buf[n] = '\0'; 807 } 808 return n; 809} 810 811static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf) 812{ 813 return show_shared_cpu_map_func(leaf, 0, buf); 814} 815 816static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf) 817{ 818 return show_shared_cpu_map_func(leaf, 1, buf); 819} 820 821static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) 822{ 823 switch (this_leaf->eax.split.type) { 824 case CACHE_TYPE_DATA: 825 return sprintf(buf, "Data\n"); 826 case CACHE_TYPE_INST: 827 return sprintf(buf, "Instruction\n"); 828 case CACHE_TYPE_UNIFIED: 829 return sprintf(buf, "Unified\n"); 830 default: 831 return sprintf(buf, "Unknown\n"); 832 } 833} 834 835#define to_object(k) container_of(k, struct _index_kobject, kobj) 836#define to_attr(a) container_of(a, struct _cache_attr, attr) 837 838#define define_one_ro(_name) \ 839static struct _cache_attr _name = \ 840 __ATTR(_name, 0444, show_##_name, NULL) 841 842define_one_ro(level); 843define_one_ro(type); 844define_one_ro(coherency_line_size); 845define_one_ro(physical_line_partition); 846define_one_ro(ways_of_associativity); 847define_one_ro(number_of_sets); 848define_one_ro(size); 849define_one_ro(shared_cpu_map); 850define_one_ro(shared_cpu_list); 851 852#define DEFAULT_SYSFS_CACHE_ATTRS \ 853 &type.attr, \ 854 &level.attr, \ 855 &coherency_line_size.attr, \ 856 &physical_line_partition.attr, \ 857 &ways_of_associativity.attr, \ 858 &number_of_sets.attr, \ 859 &size.attr, \ 860 &shared_cpu_map.attr, \ 861 &shared_cpu_list.attr 862 863static struct attribute *default_attrs[] = { 864 DEFAULT_SYSFS_CACHE_ATTRS, 865 NULL 866}; 867 868static struct attribute *default_l3_attrs[] = { 869 DEFAULT_SYSFS_CACHE_ATTRS, 870#ifdef CONFIG_CPU_SUP_AMD 871 &cache_disable_0.attr, 872 &cache_disable_1.attr, 873#endif 874 NULL 875}; 876 877static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) 878{ 879 struct _cache_attr *fattr = to_attr(attr); 880 struct _index_kobject *this_leaf = to_object(kobj); 881 ssize_t ret; 882 883 ret = fattr->show ? 884 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 885 buf) : 886 0; 887 return ret; 888} 889 890static ssize_t store(struct kobject *kobj, struct attribute *attr, 891 const char *buf, size_t count) 892{ 893 struct _cache_attr *fattr = to_attr(attr); 894 struct _index_kobject *this_leaf = to_object(kobj); 895 ssize_t ret; 896 897 ret = fattr->store ? 898 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 899 buf, count) : 900 0; 901 return ret; 902} 903 904static struct sysfs_ops sysfs_ops = { 905 .show = show, 906 .store = store, 907}; 908 909static struct kobj_type ktype_cache = { 910 .sysfs_ops = &sysfs_ops, 911 .default_attrs = default_attrs, 912}; 913 914static struct kobj_type ktype_percpu_entry = { 915 .sysfs_ops = &sysfs_ops, 916}; 917 918static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) 919{ 920 kfree(per_cpu(ici_cache_kobject, cpu)); 921 kfree(per_cpu(ici_index_kobject, cpu)); 922 per_cpu(ici_cache_kobject, cpu) = NULL; 923 per_cpu(ici_index_kobject, cpu) = NULL; 924 free_cache_attributes(cpu); 925} 926 927static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) 928{ 929 int err; 930 931 if (num_cache_leaves == 0) 932 return -ENOENT; 933 934 err = detect_cache_attributes(cpu); 935 if (err) 936 return err; 937 938 /* Allocate all required memory */ 939 per_cpu(ici_cache_kobject, cpu) = 940 kzalloc(sizeof(struct kobject), GFP_KERNEL); 941 if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL)) 942 goto err_out; 943 944 per_cpu(ici_index_kobject, cpu) = kzalloc( 945 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); 946 if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL)) 947 goto err_out; 948 949 return 0; 950 951err_out: 952 cpuid4_cache_sysfs_exit(cpu); 953 return -ENOMEM; 954} 955 956static DECLARE_BITMAP(cache_dev_map, NR_CPUS); 957 958/* Add/Remove cache interface for CPU device */ 959static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 960{ 961 unsigned int cpu = sys_dev->id; 962 unsigned long i, j; 963 struct _index_kobject *this_object; 964 struct _cpuid4_info *this_leaf; 965 int retval; 966 967 retval = cpuid4_cache_sysfs_init(cpu); 968 if (unlikely(retval < 0)) 969 return retval; 970 971 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), 972 &ktype_percpu_entry, 973 &sys_dev->kobj, "%s", "cache"); 974 if (retval < 0) { 975 cpuid4_cache_sysfs_exit(cpu); 976 return retval; 977 } 978 979 for (i = 0; i < num_cache_leaves; i++) { 980 this_object = INDEX_KOBJECT_PTR(cpu, i); 981 this_object->cpu = cpu; 982 this_object->index = i; 983 984 this_leaf = CPUID4_INFO_IDX(cpu, i); 985 986 if (this_leaf->can_disable) 987 ktype_cache.default_attrs = default_l3_attrs; 988 else 989 ktype_cache.default_attrs = default_attrs; 990 991 retval = kobject_init_and_add(&(this_object->kobj), 992 &ktype_cache, 993 per_cpu(ici_cache_kobject, cpu), 994 "index%1lu", i); 995 if (unlikely(retval)) { 996 for (j = 0; j < i; j++) 997 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); 998 kobject_put(per_cpu(ici_cache_kobject, cpu)); 999 cpuid4_cache_sysfs_exit(cpu); 1000 return retval; 1001 } 1002 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 1003 } 1004 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); 1005 1006 kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD); 1007 return 0; 1008} 1009 1010static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) 1011{ 1012 unsigned int cpu = sys_dev->id; 1013 unsigned long i; 1014 1015 if (per_cpu(ici_cpuid4_info, cpu) == NULL) 1016 return; 1017 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) 1018 return; 1019 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); 1020 1021 for (i = 0; i < num_cache_leaves; i++) 1022 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); 1023 kobject_put(per_cpu(ici_cache_kobject, cpu)); 1024 cpuid4_cache_sysfs_exit(cpu); 1025} 1026 1027static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, 1028 unsigned long action, void *hcpu) 1029{ 1030 unsigned int cpu = (unsigned long)hcpu; 1031 struct sys_device *sys_dev; 1032 1033 sys_dev = get_cpu_sysdev(cpu); 1034 switch (action) { 1035 case CPU_ONLINE: 1036 case CPU_ONLINE_FROZEN: 1037 cache_add_dev(sys_dev); 1038 break; 1039 case CPU_DEAD: 1040 case CPU_DEAD_FROZEN: 1041 cache_remove_dev(sys_dev); 1042 break; 1043 } 1044 return NOTIFY_OK; 1045} 1046 1047static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { 1048 .notifier_call = cacheinfo_cpu_callback, 1049}; 1050 1051static int __cpuinit cache_sysfs_init(void) 1052{ 1053 int i; 1054 1055 if (num_cache_leaves == 0) 1056 return 0; 1057 1058 for_each_online_cpu(i) { 1059 int err; 1060 struct sys_device *sys_dev = get_cpu_sysdev(i); 1061 1062 err = cache_add_dev(sys_dev); 1063 if (err) 1064 return err; 1065 } 1066 register_hotcpu_notifier(&cacheinfo_cpu_notifier); 1067 return 0; 1068} 1069 1070device_initcall(cache_sysfs_init); 1071 1072#endif 1073