op_pmu.c revision 8cfa702f803c5ef6a2b062a489a1b2cf66b45b5e
1/** 2 * @file op_pmu.c 3 * Setup and handling of IA64 Performance Monitoring Unit (PMU) 4 * 5 * @remark Copyright 2002 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author Bob Montgomery 9 * @author Will Cohen 10 * @author John Levon 11 * @author Philippe Elie 12 */ 13 14 15#include "oprofile.h" 16#include "op_util.h" 17#include <asm/perfmon.h> 18#include "op_ia64_model.h" 19 20/* number of counters physically present */ 21static uint op_nr_counters = 4; 22 23/* performance counters are in pairs: pmcN and pmdN. The pmc register acts 24 * as the event selection; the pmd register is the counter. */ 25#define perf_reg(c) ((c)+4) 26 27#define IA64_1_PMD_MASK_VAL ((1UL << 32) - 1) 28#define IA64_2_PMD_MASK_VAL ((1UL << 47) - 1) 29 30/* The appropriate value is selected in pmu_init() */ 31unsigned long pmd_mask = IA64_2_PMD_MASK_VAL; 32 33#define pmd_overflowed(r, c) ((r) & (1 << perf_reg(c))) 34#define set_pmd_neg(v, c) do { \ 35 ia64_set_pmd(perf_reg(c), -(ulong)(v) & pmd_mask); \ 36 ia64_srlz_d(); } while (0) 37#define set_pmd(v, c) do { \ 38 ia64_set_pmd(perf_reg(c), (v) & pmd_mask); \ 39 ia64_srlz_d(); } while (0) 40#define set_pmc(v, c) do { ia64_set_pmc(perf_reg(c), (v)); ia64_srlz_d(); } while (0) 41#define get_pmd(c) ia64_get_pmd(perf_reg(c)) 42#define get_pmc(c) ia64_get_pmc(perf_reg(c)) 43 44/* ---------------- IRQ handler ------------------ */ 45 46/* The args match the args for pfm_overflow_handler in perfmon.c. 47 * The task_struct is currently filled in with the perfmon "owner" of 48 * the PMU. This might change. I'm not sure it makes sense in perfmon 49 * either with system-wide profiling. 50 * pmc0 is a bit mask for overflowed counters (bits 4-7) 51 * This routine should return 0 to resume interrupts. 52 */ 53inline static void 54op_do_pmu_interrupt(u64 pmc0, struct pt_regs * regs) 55{ 56 uint cpu = op_cpu_id(); 57 int ctr; 58 59 for (ctr = 0 ; ctr < op_nr_counters ; ++ctr) { 60 if (pmd_overflowed(pmc0, ctr)) { 61 op_do_profile(cpu, regs->cr_iip, 1, ctr); 62 set_pmd_neg(oprof_data[cpu].ctr_count[ctr], ctr); 63 } 64 } 65 return; 66} 67 68 69static void 70op_raw_pmu_interrupt(int irq, void * arg, struct pt_regs * regs) 71{ 72 u64 pmc0; 73 74 pmc0 = ia64_get_pmc(0); 75 76 if ((pmc0 & ~0x1UL) != 0UL) { 77 op_do_pmu_interrupt(pmc0, regs); 78 ia64_set_pmc(0, 0); 79 ia64_srlz_d(); 80 } 81} 82 83 84#define MY_OPROFILE_VECTOR (IA64_PERFMON_VECTOR - 2) 85 86static void 87op_set_pmv(void * dummy) 88{ 89 ia64_set_pmv(MY_OPROFILE_VECTOR); 90 ia64_srlz_d(); 91} 92 93 94static void 95op_restore_pmv(void* dummy) 96{ 97 ia64_set_pmv(IA64_PERFMON_VECTOR); 98 ia64_srlz_d(); 99} 100 101 102static int 103install_handler(void) 104{ 105 int err = 0; 106 107 /* Try it legally - confusion about vec vs irq */ 108 err = request_irq(MY_OPROFILE_VECTOR, op_raw_pmu_interrupt, 109 SA_INTERRUPT | SA_PERCPU_IRQ, "oprofile", NULL); 110 111 if (err) { 112 printk(KERN_ALERT "oprofile_IA64: request_irq fails, " 113 "returns %d\n", err); 114 return err; 115 } 116 117 if ((smp_call_function(op_set_pmv, NULL, 0, 1))) { 118 printk(KERN_ALERT "oprofile_IA64: unexpected failure " 119 "of smp_call_function(op_set_pmv)\n"); 120 } 121 122 op_set_pmv(NULL); 123 124 return err; 125} 126 127 128static int 129restore_handler(void) 130{ 131 int err = 0; 132 133 if ((smp_call_function(op_restore_pmv, NULL, 0, 1))) { 134 printk(KERN_ALERT "oprofile_IA64: unexpected failure " 135 "of smp_call_function(op_restore_pmv)\n"); 136 } 137 138 op_restore_pmv(NULL); 139 140 free_irq(MY_OPROFILE_VECTOR, NULL); 141 return err; 142} 143 144 145/* ---------------- PMU setup ------------------ */ 146 147/* This is kind of artificial. The proc interface might really want to 148 * accept register values directly. There are other features not exposed 149 * by this limited interface. Of course that might require all sorts of 150 * validity checking??? */ 151static void 152pmc_fill_in(ulong * val, u8 kernel, u8 user, u8 event, u8 um) 153{ 154 /* enable interrupt generation */ 155 *val |= (1 << 5); 156 157 /* setup as a privileged monitor */ 158 *val |= (1 << 6); 159 160 /* McKinley requires pmc4 to have bit 23 set (enable PMU). 161 * It is supposedly ignored in other pmc registers. 162 * Try assuming it's ignored in Itanium, too, and just 163 * set it for everyone. 164 */ 165 166 *val |= (1 << 23); 167 168 /* enable/disable chosen OS and USR counting */ 169 (user) ? (*val |= (1 << 3)) 170 : (*val &= ~(1 << 3)); 171 172 (kernel) ? (*val |= (1 << 0)) 173 : (*val &= ~(1 << 0)); 174 175 /* what are we counting ? */ 176 *val &= ~(0xff << 8); 177 *val |= ((event & 0xff) << 8); 178 *val &= ~(0xf << 16); 179 *val |= ((um & 0xf) << 16); 180} 181 182 183static void 184pmu_setup(void * dummy) 185{ 186 ulong pmc_val; 187 int ii; 188 189 /* setup each counter */ 190 for (ii = 0 ; ii < op_nr_counters ; ++ii) { 191 if (sysctl.ctr[ii].enabled) { 192 pmc_val = 0; 193 194 set_pmd_neg(sysctl.ctr[ii].count, ii); 195 pmc_fill_in(&pmc_val, sysctl.ctr[ii].kernel, 196 sysctl.ctr[ii].user, sysctl.ctr[ii].event, 197 sysctl.ctr[ii].unit_mask); 198 199 set_pmc(pmc_val, ii); 200 } 201 } 202} 203 204 205void 206disable_psr(void * dummy) 207{ 208 struct pt_regs * regs; 209 /* disable profiling for my saved state */ 210 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET); 211 regs--; 212 ia64_psr(regs)->pp = 0; 213 /* shouldn't need to */ 214 ia64_psr(regs)->up = 0; 215 216 /* disable profiling for my current state */ 217 __asm__ __volatile__ ("rsm psr.pp;;"::: "memory"); 218 219#if defined(CONFIG_PERFMON) && defined(CONFIG_SMP) 220#if V_AT_LEAST(2, 4, 21) 221 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE; 222 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP; 223 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */ 224#else 225 /* disable profiling for everyone else */ 226 local_cpu_data->pfm_syst_wide = 1; 227 local_cpu_data->pfm_dcr_pp = 0; 228#endif 229#endif 230 ia64_set_pmc(0, 0); 231 ia64_srlz_d(); 232} 233 234 235static int 236pmu_setup_all(void) 237{ 238 239 /* This would be a great place to reserve all cpus with 240 * some sort of call to perfmonctl (something like the 241 * CREATE_CONTEXT command). The current interface to 242 * perfmonctl wants to be called from a different task id 243 * for each CPU to be set up (and doesn't allow calls from 244 * modules. 245 */ 246 247 /* disable profiling with the psr.pp bit */ 248 if ((smp_call_function(disable_psr, NULL, 0, 1))) 249 return -EFAULT; 250 251 disable_psr(NULL); 252 253 /* now I've reserved the PMUs and they should be quiet */ 254 255 if ((smp_call_function(pmu_setup, NULL, 0, 1))) 256 return -EFAULT; 257 258 pmu_setup(NULL); 259 return 0; 260} 261 262 263#ifndef CONFIG_SMP 264/* from linux/arch/ia64/kernel/perfmon.c */ 265/* 266 * Originaly Written by Ganesh Venkitachalam, IBM Corp. 267 * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com> 268 * 269 * Modifications by Stephane Eranian, Hewlett-Packard Co. 270 * Modifications by David Mosberger-Tang, Hewlett-Packard Co. 271 * 272 * Copyright (C) 1999-2002 Hewlett Packard Co 273 * Stephane Eranian <eranian@hpl.hp.com> 274 * David Mosberger-Tang <davidm@hpl.hp.com> 275 */ 276 277/* 278 * On UP kernels, we do not need to constantly set the psr.pp bit 279 * when a task is scheduled. The psr.pp bit can only be changed in 280 * the kernel because of a user request. Given we are on a UP non preeemptive 281 * kernel we know that no other task is running, so we cna simply update their 282 * psr.pp from their saved state. There is this no impact on the context switch 283 * code compared to the SMP case. 284 */ 285static void 286op_tasklist_toggle_pp(unsigned int val) 287{ 288 struct task_struct * p; 289 struct pt_regs * regs; 290 291 read_lock(&tasklist_lock); 292 293 for_each_task(p) { 294 regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET); 295 296 /* 297 * position on pt_regs saved on stack on 1st entry into the kernel 298 */ 299 regs--; 300 301 /* 302 * update psr.pp 303 */ 304 ia64_psr(regs)->pp = val; 305 } 306 read_unlock(&tasklist_lock); 307} 308#endif 309 310 311static void 312pmu_start(void * info) 313{ 314 struct pt_regs * regs; 315 316 if (info && (*((uint *)info) != op_cpu_id())) 317 return; 318 319 /* printk(KERN_ALERT "oprofile_IA64: pmu_start on cpu %d\n", 320 op_cpu_id()); */ 321 /* The default control register pp value is copied into psr.pp 322 * on an interrupt. This allows interrupt service routines to 323 * be monitored. 324 */ 325 ia64_set_dcr(ia64_get_dcr() | IA64_DCR_PP); 326 327#ifdef CONFIG_PERFMON 328#ifdef CONFIG_SMP 329#if V_AT_LEAST(2, 4, 21) 330 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE; 331 local_cpu_data->pfm_syst_info |= PFM_CPUINFO_DCR_PP; 332 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */ 333#else 334 local_cpu_data->pfm_syst_wide = 1; 335 local_cpu_data->pfm_dcr_pp = 1; 336#endif 337#else 338 op_tasklist_toggle_pp(1); 339#endif 340#endif 341 /* set it in my saved state */ 342 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET); 343 regs--; 344 ia64_psr(regs)->pp = 1; 345 346 /* set it in my current state */ 347 __asm__ __volatile__ ("ssm psr.pp;;"::: "memory"); 348 ia64_srlz_d(); 349} 350 351 352static void 353pmu_stop(void * info) 354{ 355 struct pt_regs * regs; 356 357 if (info && (*((uint *)info) != op_cpu_id())) 358 return; 359 360 /* stop in my current state */ 361 __asm__ __volatile__ ("rsm psr.pp;;"::: "memory"); 362 363 /* disable the dcr pp */ 364 ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP); 365 366#ifdef CONFIG_PERFMON 367#ifdef CONFIG_SMP 368#if V_AT_LEAST(2, 4, 21) 369 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_SYST_WIDE; 370 local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP; 371 /* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */ 372#else 373 local_cpu_data->pfm_syst_wide = 0; 374 local_cpu_data->pfm_dcr_pp = 0; 375#endif 376#else 377 pfm_tasklist_toggle_pp(0); 378#endif 379#endif 380 381 /* disable in my saved state */ 382 regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET); 383 regs--; 384 ia64_psr(regs)->pp = 0; 385} 386 387 388static void 389pmu_select_start(uint cpu) 390{ 391 if (cpu == op_cpu_id()) 392 pmu_start(NULL); 393 else 394 smp_call_function(pmu_start, &cpu, 0, 1); 395} 396 397 398static void 399pmu_select_stop(uint cpu) 400{ 401 if (cpu == op_cpu_id()) 402 pmu_stop(NULL); 403 else 404 smp_call_function(pmu_stop, &cpu, 0, 1); 405} 406 407 408static void 409pmu_start_all(void) 410{ 411 int cpu, i; 412 413 for (cpu=0; cpu < smp_num_cpus; cpu++) { 414 struct _oprof_data * data = &oprof_data[cpu]; 415 416 for (i = 0 ; i < op_nr_counters ; ++i) { 417 if (sysctl.ctr[i].enabled) { 418 data->ctr_count[i] = sysctl.ctr[i].count; 419 } else { 420 data->ctr_count[i] = 0; 421 } 422 } 423 } 424 425 if (!install_handler()) { 426 smp_call_function(pmu_start, NULL, 0, 1); 427 pmu_start(NULL); 428 } 429 /* FIXME need some way to fail here */; 430} 431 432 433static void 434pmu_stop_all(void) 435{ 436 smp_call_function(pmu_stop, NULL, 0, 1); 437 pmu_stop(NULL); 438 restore_handler(); 439} 440 441 442static int 443pmu_check_params(void) 444{ 445 int i; 446 int enabled = 0; 447 448 for (i = 0; i < op_nr_counters ; i++) { 449 if (!sysctl.ctr[i].enabled) 450 continue; 451 452 enabled = 1; 453 454 if (!sysctl.ctr[i].user && !sysctl.ctr[i].kernel) { 455 printk(KERN_ERR "oprofile: neither kernel nor user " 456 "set for counter %d\n", i); 457 return -EINVAL; 458 } 459 460 if (check_range(sysctl.ctr[i].count, 1, OP_MAX_PERF_COUNT, 461 "ctr count value %d not in range (%d %ld)\n")) 462 return -EINVAL; 463 } 464 465 if (!enabled) { 466 printk(KERN_ERR "oprofile: no counters have been enabled.\n"); 467 return -EINVAL; 468 } 469 470 return 0; 471} 472 473 474static struct op_msrs cpu_msrs[NR_CPUS]; 475 476 477static void free_msr_group(struct op_msr_group * group) 478{ 479 if (group->addrs) 480 kfree(group->addrs); 481 if (group->saved) 482 kfree(group->saved); 483 group->addrs = NULL; 484 group->saved = NULL; 485} 486 487 488static void pmu_save_registers(void * dummy) 489{ 490 uint i; 491 uint const cpu = op_cpu_id(); 492 struct op_msr_group * counters = &cpu_msrs[cpu].counters; 493 struct op_msr_group * controls = &cpu_msrs[cpu].controls; 494 495 counters->addrs = NULL; 496 counters->saved = NULL; 497 controls->addrs = NULL; 498 controls->saved = NULL; 499 500 counters->saved = kmalloc( 501 op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL); 502 if (!counters->saved) 503 goto fault; 504 505 controls->saved = kmalloc( 506 op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL); 507 if (!controls->saved) 508 goto fault; 509 510 for (i = 0; i < op_nr_counters; ++i) { 511 controls->saved[i].low = get_pmc(i); 512 counters->saved[i].low = get_pmd(i); 513 } 514 return; 515 516fault: 517 free_msr_group(counters); 518 free_msr_group(controls); 519} 520 521 522static void pmu_restore_registers(void * dummy) 523{ 524 uint i; 525 uint const cpu = op_cpu_id(); 526 struct op_msr_group * counters = &cpu_msrs[cpu].counters; 527 struct op_msr_group * controls = &cpu_msrs[cpu].controls; 528 529 for (i = 0; i < op_nr_counters; ++i) { 530 set_pmc(controls->saved[i].low, i); 531 set_pmd(counters->saved[i].low, i); 532 } 533 534 free_msr_group(counters); 535 free_msr_group(controls); 536} 537 538 539 540static int 541pmu_init(void) 542{ 543 int err = 0; 544 545 /* figure out processor type configure number of bits in pmd 546 and number of counters */ 547 switch (get_cpu_type()) { 548 case CPU_IA64_1: 549 pmd_mask = IA64_1_PMD_MASK_VAL; break; 550 case CPU_IA64_2: 551 case CPU_IA64: 552 pmd_mask = IA64_2_PMD_MASK_VAL; break; 553 default: 554 err = -EIO; break; 555 } 556 557 op_nr_counters = 4; 558 559 if ((err = smp_call_function(pmu_save_registers, NULL, 0, 1))) 560 goto out; 561 562 pmu_save_registers(NULL); 563 564out: 565 return err; 566} 567 568 569static void 570pmu_deinit(void) 571{ 572 smp_call_function(pmu_restore_registers, NULL, 0, 1); 573 pmu_restore_registers(NULL); 574} 575 576 577static char * names[] = { "0", "1", "2", "3", }; 578 579 580static int 581pmu_add_sysctls(ctl_table * next) 582{ 583 ctl_table * start = next; 584 ctl_table * tab; 585 int i, j; 586 587 for (i=0; i < op_nr_counters; i++) { 588 next->ctl_name = 1; 589 next->procname = names[i]; 590 next->mode = 0700; 591 592 if (!(tab = kmalloc(sizeof(ctl_table)*7, GFP_KERNEL))) 593 goto cleanup; 594 595 next->child = tab; 596 597 memset(tab, 0, sizeof(ctl_table)*7); 598 tab[0] = ((ctl_table) { 1, "enabled", &sysctl_parms.ctr[i].enabled, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 599 tab[1] = ((ctl_table) { 1, "event", &sysctl_parms.ctr[i].event, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 600 tab[2] = ((ctl_table) { 1, "count", &sysctl_parms.ctr[i].count, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 601 tab[3] = ((ctl_table) { 1, "unit_mask", &sysctl_parms.ctr[i].unit_mask, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 602 tab[4] = ((ctl_table) { 1, "kernel", &sysctl_parms.ctr[i].kernel, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 603 tab[5] = ((ctl_table) { 1, "user", &sysctl_parms.ctr[i].user, sizeof(int), 0600, NULL, lproc_dointvec, NULL, }); 604 next++; 605 } 606 607 return 0; 608 609cleanup: 610 next = start; 611 for (j = 0; j < i; j++) { 612 kfree(next->child); 613 next++; 614 } 615 return -EFAULT; 616} 617 618 619static void pmu_remove_sysctls(ctl_table * next) 620{ 621 int ii; 622 623 for (ii=0; ii < op_nr_counters; ii++) { 624 kfree(next->child); 625 next++; 626 } 627} 628 629 630struct op_int_operations op_nmi_ops = { 631 init: pmu_init, 632 deinit: pmu_deinit, 633 add_sysctls: pmu_add_sysctls, 634 remove_sysctls: pmu_remove_sysctls, 635 check_params: pmu_check_params, 636 setup: pmu_setup_all, 637 start: pmu_start_all, 638 stop: pmu_stop_all, 639 start_cpu: pmu_select_start, 640 stop_cpu: pmu_select_stop, 641}; 642 643 644struct op_int_operations const * op_int_interface() 645{ 646 return &op_nmi_ops; 647} 648 649/* Need this dummy so module/oprofile.c links */ 650struct op_int_operations op_rtc_ops = { 651 init: NULL, 652 deinit: NULL, 653 add_sysctls: NULL, 654 remove_sysctls: NULL, 655 check_params: NULL, 656 setup: NULL, 657 start: NULL, 658 stop: NULL, 659 start_cpu: NULL, 660 stop_cpu: NULL, 661}; 662