op_model_p4.c revision cc2ee177dbb3befca43e36cfc56778b006c3d050
1/** 2 * @file op_model_p4.c 3 * P4 model-specific MSR operations 4 * 5 * @remark Copyright 2002 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author Graydon Hoare 9 */ 10 11#include "op_x86_model.h" 12#include "op_msr.h" 13#include "op_apic.h" 14#include "op_arch.h" 15 16#define NUM_EVENTS 39 17 18#define NUM_COUNTERS_NON_HT 8 19#define NUM_ESCRS_NON_HT 45 20#define NUM_CCCRS_NON_HT 18 21#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT) 22 23#define NUM_COUNTERS_HT2 4 24#define NUM_ESCRS_HT2 23 25#define NUM_CCCRS_HT2 9 26#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) 27 28static unsigned int num_counters = NUM_COUNTERS_NON_HT; 29 30 31/* this has to be checked dynamically since the 32 hyper-threadedness of a chip is discovered at 33 kernel boot-time. */ 34static inline void setup_num_counters(void) 35{ 36#ifdef HT_SUPPORT 37 if (smp_num_siblings == 2) 38 num_counters = NUM_COUNTERS_HT2; 39#endif 40} 41 42static int inline addr_increment(void) 43{ 44#ifdef HT_SUPPORT 45 return smp_num_siblings == 2 ? 2 : 1; 46#else 47 return 1; 48#endif 49} 50 51 52/* tables to simulate simplified hardware view of p4 registers */ 53struct p4_counter_binding { 54 int virt_counter; 55 int counter_address; 56 int cccr_address; 57}; 58 59struct p4_event_binding { 60 int escr_select; /* value to put in CCCR */ 61 int event_select; /* value to put in ESCR */ 62 struct { 63 int virt_counter; /* for this counter... */ 64 int escr_address; /* use this ESCR */ 65 } bindings[2]; 66}; 67 68/* nb: these CTR_* defines are a duplicate of defines in 69 event/i386.p4*events. */ 70 71 72#define CTR_BPU_0 (1 << 0) 73#define CTR_MS_0 (1 << 1) 74#define CTR_FLAME_0 (1 << 2) 75#define CTR_IQ_4 (1 << 3) 76#define CTR_BPU_2 (1 << 4) 77#define CTR_MS_2 (1 << 5) 78#define CTR_FLAME_2 (1 << 6) 79#define CTR_IQ_5 (1 << 7) 80 81static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { 82 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, 83 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, 84 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, 85 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 }, 86 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 }, 87 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 }, 88 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 }, 89 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } 90}; 91 92#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT 93 94/* All cccr we don't use. */ 95static int p4_unused_cccr[NUM_UNUSED_CCCRS] = { 96 MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3, 97 MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3, 98 MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3, 99 MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1, 100 MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3 101}; 102 103/* p4 event codes in libop/op_event.h are indices into this table. */ 104 105static struct p4_event_binding p4_events[NUM_EVENTS] = { 106 107 { /* BRANCH_RETIRED */ 108 0x05, 0x06, 109 { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, 110 {CTR_IQ_5, MSR_P4_CRU_ESCR3} } 111 }, 112 113 { /* MISPRED_BRANCH_RETIRED */ 114 0x04, 0x03, 115 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 116 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 117 }, 118 119 { /* TC_DELIVER_MODE */ 120 0x01, 0x01, 121 { { CTR_MS_0, MSR_P4_TC_ESCR0}, 122 { CTR_MS_2, MSR_P4_TC_ESCR1} } 123 }, 124 125 { /* BPU_FETCH_REQUEST */ 126 0x00, 0x03, 127 { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, 128 { CTR_BPU_2, MSR_P4_BPU_ESCR1} } 129 }, 130 131 { /* ITLB_REFERENCE */ 132 0x03, 0x18, 133 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0}, 134 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} } 135 }, 136 137 { /* MEMORY_CANCEL */ 138 0x05, 0x02, 139 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0}, 140 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} } 141 }, 142 143 { /* MEMORY_COMPLETE */ 144 0x02, 0x08, 145 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, 146 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } 147 }, 148 149 { /* LOAD_PORT_REPLAY */ 150 0x02, 0x04, 151 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, 152 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } 153 }, 154 155 { /* STORE_PORT_REPLAY */ 156 0x02, 0x05, 157 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, 158 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } 159 }, 160 161 { /* MOB_LOAD_REPLAY */ 162 0x02, 0x03, 163 { { CTR_BPU_0, MSR_P4_MOB_ESCR0}, 164 { CTR_BPU_2, MSR_P4_MOB_ESCR1} } 165 }, 166 167 { /* PAGE_WALK_TYPE */ 168 0x04, 0x01, 169 { { CTR_BPU_0, MSR_P4_PMH_ESCR0}, 170 { CTR_BPU_2, MSR_P4_PMH_ESCR1} } 171 }, 172 173 { /* BSQ_CACHE_REFERENCE */ 174 0x07, 0x0c, 175 { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, 176 { CTR_BPU_2, MSR_P4_BSU_ESCR1} } 177 }, 178 179 /* intel doc vol 3 table A-1: P4 and xeon with cpuid signature < 0xf27 180 * doen't allow MSR_FSB_ESCR1 so only counter 0 is available */ 181 { /* IOQ_ALLOCATION */ 182 0x06, 0x03, 183 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 184 { 0, 0 } } 185 }, 186 187 { /* IOQ_ACTIVE_ENTRIES */ 188 0x06, 0x1a, 189 { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, 190 { 0, 0 } } 191 }, 192 193 { /* FSB_DATA_ACTIVITY */ 194 0x06, 0x17, 195 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 196 { CTR_BPU_2, MSR_P4_FSB_ESCR1} } 197 }, 198 199 { /* BSQ_ALLOCATION */ 200 0x07, 0x05, 201 { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, 202 { 0, 0 } } 203 }, 204 205 { /* BSQ_ACTIVE_ENTRIES */ 206 0x07, 0x06, 207 /* FIXME intel doc don't say which ESCR1 to use, using 208 BSU_ESCR1 is a sensible guess but will need validation */ 209 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 }, 210 { 0, 0 } } 211 }, 212 213 { /* X87_ASSIST */ 214 0x05, 0x03, 215 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 216 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 217 }, 218 219 { /* SSE_INPUT_ASSIST */ 220 0x01, 0x34, 221 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 222 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 223 }, 224 225 { /* PACKED_SP_UOP */ 226 0x01, 0x08, 227 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 228 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 229 }, 230 231 { /* PACKED_DP_UOP */ 232 0x01, 0x0c, 233 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 234 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 235 }, 236 237 { /* SCALAR_SP_UOP */ 238 0x01, 0x0a, 239 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 240 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 241 }, 242 243 { /* SCALAR_DP_UOP */ 244 0x01, 0x0e, 245 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 246 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 247 }, 248 249 { /* 64BIT_MMX_UOP */ 250 0x01, 0x02, 251 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 252 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 253 }, 254 255 { /* 128BIT_MMX_UOP */ 256 0x01, 0x1a, 257 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 258 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 259 }, 260 261 { /* X87_FP_UOP */ 262 0x01, 0x04, 263 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 264 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 265 }, 266 267 { /* X87_SIMD_MOVES_UOP */ 268 0x01, 0x2e, 269 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, 270 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } 271 }, 272 273 { /* MACHINE_CLEAR */ 274 0x05, 0x02, 275 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 276 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 277 }, 278 279 { /* GLOBAL_POWER_EVENTS */ 280 0x06, 0x13 /* older manual says 0x05, newer 0x13 */, 281 { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, 282 { CTR_BPU_2, MSR_P4_FSB_ESCR1} } 283 }, 284 285 { /* TC_MS_XFER */ 286 0x00, 0x05, 287 { { CTR_MS_0, MSR_P4_MS_ESCR0}, 288 { CTR_MS_2, MSR_P4_MS_ESCR1} } 289 }, 290 291 { /* UOP_QUEUE_WRITES */ 292 0x00, 0x09, 293 { { CTR_MS_0, MSR_P4_MS_ESCR0}, 294 { CTR_MS_2, MSR_P4_MS_ESCR1} } 295 }, 296 297 { /* FRONT_END_EVENT */ 298 0x05, 0x08, 299 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 300 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 301 }, 302 303 { /* EXECUTION_EVENT */ 304 0x05, 0x0c, 305 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 306 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 307 }, 308 309 { /* REPLAY_EVENT */ 310 0x05, 0x09, 311 { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, 312 { CTR_IQ_5, MSR_P4_CRU_ESCR3} } 313 }, 314 315 { /* INSTR_RETIRED */ 316 0x04, 0x02, 317 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 318 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 319 }, 320 321 { /* UOPS_RETIRED */ 322 0x04, 0x01, 323 { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, 324 { CTR_IQ_5, MSR_P4_CRU_ESCR1} } 325 }, 326 327 { /* UOP_TYPE */ 328 0x02, 0x02, 329 { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, 330 { CTR_IQ_5, MSR_P4_RAT_ESCR1} } 331 }, 332 333 { /* RETIRED_MISPRED_BRANCH_TYPE */ 334 0x02, 0x05, 335 { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, 336 { CTR_MS_2, MSR_P4_TBPU_ESCR1} } 337 }, 338 339 { /* RETIRED_BRANCH_TYPE */ 340 0x02, 0x04, 341 { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, 342 { CTR_MS_2, MSR_P4_TBPU_ESCR1} } 343 } 344}; 345 346 347#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7) 348 349#define ESCR_RESERVED_BITS 0x80000003 350#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS) 351#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2)) 352#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3)) 353#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1))) 354#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) 355#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) 356#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) 357#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) 358#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) 359 360#define CCCR_RESERVED_BITS 0x38030FFF 361#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) 362#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000) 363#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13)) 364#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26)) 365#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) 366#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) 367#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) 368#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) 369#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) 370#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) 371#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) 372 373#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) 374#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) 375#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) 376 377 378/* this assigns a "stagger" to the current CPU, which is used throughout 379 the code in this module as an extra array offset, to select the "even" 380 or "odd" part of all the divided resources. */ 381static unsigned int get_stagger(void) 382{ 383#ifdef HT_SUPPORT 384 int cpu; 385 if (smp_num_siblings > 1) { 386 cpu = smp_processor_id(); 387 return (cpu_sibling_map[cpu] > cpu) ? 0 : 1; 388 } 389#endif 390 return 0; 391} 392 393 394/* finally, mediate access to a real hardware counter 395 by passing a "virtual" counter numer to this macro, 396 along with your stagger setting. */ 397#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger))) 398 399 400static void p4_fill_in_addresses(struct op_msrs * const msrs) 401{ 402 unsigned int i; 403 unsigned int addr, stag; 404 405 setup_num_counters(); 406 stag = get_stagger(); 407 408 /* the counter registers we pay attention to */ 409 for (i = 0; i < num_counters; ++i) { 410 msrs->counters.addrs[i] = 411 p4_counters[VIRT_CTR(stag, i)].counter_address; 412 } 413 414 /* FIXME: bad feeling, we don't save the 10 counters we don't use. */ 415 416 /* 18 CCCR registers */ 417 for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag; 418 addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) { 419 msrs->controls.addrs[i] = addr; 420 } 421 422 /* 43 ESCR registers in three discontiguous group */ 423 for (addr = MSR_P4_BSU_ESCR0 + stag; 424 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) { 425 msrs->controls.addrs[i] = addr; 426 } 427 428 for (addr = MSR_P4_MS_ESCR0 + stag; 429 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 430 msrs->controls.addrs[i] = addr; 431 } 432 433 for (addr = MSR_P4_IX_ESCR0 + stag; 434 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 435 msrs->controls.addrs[i] = addr; 436 } 437 438 /* there are 2 remaining non-contiguously located ESCRs */ 439 440 if (num_counters == NUM_COUNTERS_NON_HT) { 441 /* standard non-HT CPUs handle both remaining ESCRs*/ 442 msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5; 443 msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4; 444 445 } else if (stag == 0) { 446 /* HT CPUs give the first remainder to the even thread, as 447 the 32nd control register */ 448 msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR4; 449 450 } else { 451 /* and two copies of the second to the odd thread, 452 for the 22st and 23nd control registers */ 453 msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5; 454 msrs->controls.addrs[i++] = MSR_P4_CRU_ESCR5; 455 } 456} 457 458 459static void pmc_setup_one_p4_counter(unsigned int ctr) 460{ 461 int i; 462 int const maxbind = 2; 463 unsigned int cccr = 0; 464 unsigned int escr = 0; 465 unsigned int high = 0; 466 unsigned int counter_bit; 467 struct p4_event_binding * ev = 0; 468 unsigned int stag; 469 470 stag = get_stagger(); 471 472 /* convert from counter *number* to counter *bit* */ 473 counter_bit = 1 << VIRT_CTR(stag, ctr); 474 475 /* find our event binding structure. */ 476 if (sysctl.ctr[ctr].event <= 0 || sysctl.ctr[ctr].event > NUM_EVENTS) { 477 printk(KERN_ERR 478 "oprofile: P4 event code 0x%x out of range\n", 479 sysctl.ctr[ctr].event); 480 return; 481 } 482 483 ev = &(p4_events[sysctl.ctr[ctr].event - 1]); 484 485 for (i = 0; i < maxbind; i++) { 486 if (ev->bindings[i].virt_counter & counter_bit) { 487 488 /* modify ESCR */ 489 ESCR_READ(escr, high, ev, i); 490 ESCR_CLEAR(escr); 491 if (stag == 0) { 492 ESCR_SET_USR_0(escr, sysctl.ctr[ctr].user); 493 ESCR_SET_OS_0(escr, sysctl.ctr[ctr].kernel); 494 } else { 495 ESCR_SET_USR_1(escr, sysctl.ctr[ctr].user); 496 ESCR_SET_OS_1(escr, sysctl.ctr[ctr].kernel); 497 } 498 ESCR_SET_EVENT_SELECT(escr, ev->event_select); 499 ESCR_SET_EVENT_MASK(escr, sysctl.ctr[ctr].unit_mask); 500 ESCR_WRITE(escr, high, ev, i); 501 502 /* modify CCCR */ 503 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); 504 CCCR_CLEAR(cccr); 505 CCCR_SET_REQUIRED_BITS(cccr); 506 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); 507 if (stag == 0) { 508 CCCR_SET_PMI_OVF_0(cccr); 509 } else { 510 CCCR_SET_PMI_OVF_1(cccr); 511 } 512 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); 513 return; 514 } 515 } 516 517 printk(KERN_ERR 518 "oprofile: P4 event code 0x%x no binding, ctr %d\n", 519 sysctl.ctr[ctr].event, ctr); 520} 521 522 523static void p4_setup_ctrs(struct op_msrs const * const msrs) 524{ 525 unsigned int i; 526 unsigned int low, high; 527 unsigned int addr; 528 unsigned int stag; 529 530 stag = get_stagger(); 531 532 rdmsr(MSR_IA32_MISC_ENABLE, low, high); 533 if (! MISC_PMC_ENABLED_P(low)) { 534 printk(KERN_ERR "oprofile: P4 PMC not available\n"); 535 return; 536 } 537 538 /* clear the cccrs we will use */ 539 for (i = 0 ; i < num_counters ; i++) { 540 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 541 CCCR_CLEAR(low); 542 CCCR_SET_REQUIRED_BITS(low); 543 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); 544 } 545 546 /* clear cccrs outside our concern */ 547 for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) { 548 rdmsr(p4_unused_cccr[i], low, high); 549 CCCR_CLEAR(low); 550 CCCR_SET_REQUIRED_BITS(low); 551 wrmsr(p4_unused_cccr[i], low, high); 552 } 553 554 /* clear all escrs (including those outside our concern) */ 555 for (addr = MSR_P4_BSU_ESCR0 + stag; 556 addr <= MSR_P4_SSU_ESCR0; addr += addr_increment()) { 557 wrmsr(addr, 0, 0); 558 } 559 560 for (addr = MSR_P4_MS_ESCR0 + stag; 561 addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 562 wrmsr(addr, 0, 0); 563 } 564 565 for (addr = MSR_P4_IX_ESCR0 + stag; 566 addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 567 wrmsr(addr, 0, 0); 568 } 569 570 if (num_counters == NUM_COUNTERS_NON_HT) { 571 wrmsr(MSR_P4_CRU_ESCR4, 0, 0); 572 wrmsr(MSR_P4_CRU_ESCR5, 0, 0); 573 } else if (stag == 0) { 574 wrmsr(MSR_P4_CRU_ESCR4, 0, 0); 575 } else { 576 wrmsr(MSR_P4_CRU_ESCR5, 0, 0); 577 } 578 579 /* setup all counters */ 580 for (i = 0 ; i < num_counters ; ++i) { 581 if (sysctl.ctr[i].event) { 582 pmc_setup_one_p4_counter(i); 583 CTR_WRITE(sysctl.ctr[i].count, VIRT_CTR(stag, i)); 584 } 585 } 586} 587 588 589static void p4_check_ctrs(unsigned int const cpu, 590 struct op_msrs const * const msrs, 591 struct pt_regs * const regs) 592{ 593 unsigned long ctr, low, high, stag, real; 594 int i; 595 596 stag = get_stagger(); 597 598 for (i = 0; i < num_counters; ++i) { 599 600 if (!sysctl.ctr[i].enabled) 601 continue; 602 603 /* 604 * there is some eccentricity in the hardware which 605 * requires that we perform 2 extra corrections: 606 * 607 * - check both the CCCR:OVF flag for overflow and the 608 * counter high bit for un-flagged overflows. 609 * 610 * - write the counter back twice to ensure it gets 611 * updated properly. 612 * 613 * the former seems to be related to extra NMIs happening 614 * during the current NMI; the latter is reported as errata 615 * N15 in intel doc 249199-029, pentium 4 specification 616 * update, though their suggested work-around does not 617 * appear to solve the problem. 618 */ 619 620 real = VIRT_CTR(stag, i); 621 622 CCCR_READ(low, high, real); 623 CTR_READ(ctr, high, real); 624 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { 625 op_do_profile(cpu, instruction_pointer(regs), IRQ_ENABLED(regs), i); 626 CTR_WRITE(oprof_data[cpu].ctr_count[i], real); 627 CCCR_CLEAR_OVF(low); 628 CCCR_WRITE(low, high, real); 629 CTR_WRITE(oprof_data[cpu].ctr_count[i], real); 630 } 631 } 632 633 /* P4 quirk: you have to re-unmask the apic vector */ 634 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); 635} 636 637 638static void p4_start(struct op_msrs const * const msrs) 639{ 640 unsigned int low, high, stag; 641 int i; 642 643 stag = get_stagger(); 644 645 for (i = 0; i < num_counters; ++i) { 646 if (!sysctl.ctr[i].enabled) 647 continue; 648 CCCR_READ(low, high, VIRT_CTR(stag, i)); 649 CCCR_SET_ENABLE(low); 650 CCCR_WRITE(low, high, VIRT_CTR(stag, i)); 651 } 652} 653 654 655static void p4_stop(struct op_msrs const * const msrs) 656{ 657 unsigned int low, high, stag; 658 int i; 659 660 stag = get_stagger(); 661 662 for (i = 0; i < num_counters; ++i) { 663 /* FIXME: 2.6 driver doesn't do that ? */ 664 if (!sysctl.ctr[i].enabled) 665 continue; 666 CCCR_READ(low, high, VIRT_CTR(stag, i)); 667 CCCR_SET_DISABLE(low); 668 CCCR_WRITE(low, high, VIRT_CTR(stag, i)); 669 } 670} 671 672#ifdef HT_SUPPORT 673 674struct op_x86_model_spec const op_p4_ht2_spec = { 675 .num_counters = NUM_COUNTERS_HT2, 676 .num_controls = NUM_CONTROLS_HT2, 677 .fill_in_addresses = &p4_fill_in_addresses, 678 .setup_ctrs = &p4_setup_ctrs, 679 .check_ctrs = &p4_check_ctrs, 680 .start = &p4_start, 681 .stop = &p4_stop 682}; 683#endif 684 685struct op_x86_model_spec const op_p4_spec = { 686 .num_counters = NUM_COUNTERS_NON_HT, 687 .num_controls = NUM_CONTROLS_NON_HT, 688 .fill_in_addresses = &p4_fill_in_addresses, 689 .setup_ctrs = &p4_setup_ctrs, 690 .check_ctrs = &p4_check_ctrs, 691 .start = &p4_start, 692 .stop = &p4_stop 693}; 694