perf_event.c revision 57ce9bb39b476accf8fba6e16aea67ed76ea523d
1#undef DEBUG 2 3/* 4 * ARM performance counter support. 5 * 6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles 7 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> 8 * 9 * This code is based on the sparc64 perf event code, which is in turn based 10 * on the x86 code. Callchain code is based on the ARM OProfile backtrace 11 * code. 12 */ 13#define pr_fmt(fmt) "hw perfevents: " fmt 14 15#include <linux/interrupt.h> 16#include <linux/kernel.h> 17#include <linux/module.h> 18#include <linux/perf_event.h> 19#include <linux/platform_device.h> 20#include <linux/spinlock.h> 21#include <linux/uaccess.h> 22 23#include <asm/cputype.h> 24#include <asm/irq.h> 25#include <asm/irq_regs.h> 26#include <asm/pmu.h> 27#include <asm/stacktrace.h> 28 29static struct platform_device *pmu_device; 30 31/* 32 * Hardware lock to serialize accesses to PMU registers. Needed for the 33 * read/modify/write sequences. 34 */ 35static DEFINE_RAW_SPINLOCK(pmu_lock); 36 37/* 38 * ARMv6 supports a maximum of 3 events, starting from index 1. If we add 39 * another platform that supports more, we need to increase this to be the 40 * largest of all platforms. 41 * 42 * ARMv7 supports up to 32 events: 43 * cycle counter CCNT + 31 events counters CNT0..30. 44 * Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters. 45 */ 46#define ARMPMU_MAX_HWEVENTS 33 47 48/* The events for a given CPU. */ 49struct cpu_hw_events { 50 /* 51 * The events that are active on the CPU for the given index. Index 0 52 * is reserved. 53 */ 54 struct perf_event *events[ARMPMU_MAX_HWEVENTS]; 55 56 /* 57 * A 1 bit for an index indicates that the counter is being used for 58 * an event. A 0 means that the counter can be used. 59 */ 60 unsigned long used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; 61 62 /* 63 * A 1 bit for an index indicates that the counter is actively being 64 * used. 65 */ 66 unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; 67}; 68static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 69 70struct arm_pmu { 71 enum arm_perf_pmu_ids id; 72 const char *name; 73 irqreturn_t (*handle_irq)(int irq_num, void *dev); 74 void (*enable)(struct hw_perf_event *evt, int idx); 75 void (*disable)(struct hw_perf_event *evt, int idx); 76 int (*get_event_idx)(struct cpu_hw_events *cpuc, 77 struct hw_perf_event *hwc); 78 u32 (*read_counter)(int idx); 79 void (*write_counter)(int idx, u32 val); 80 void (*start)(void); 81 void (*stop)(void); 82 void (*reset)(void *); 83 const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] 84 [PERF_COUNT_HW_CACHE_OP_MAX] 85 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 86 const unsigned (*event_map)[PERF_COUNT_HW_MAX]; 87 u32 raw_event_mask; 88 int num_events; 89 u64 max_period; 90}; 91 92/* Set at runtime when we know what CPU type we are. */ 93static const struct arm_pmu *armpmu; 94 95enum arm_perf_pmu_ids 96armpmu_get_pmu_id(void) 97{ 98 int id = -ENODEV; 99 100 if (armpmu != NULL) 101 id = armpmu->id; 102 103 return id; 104} 105EXPORT_SYMBOL_GPL(armpmu_get_pmu_id); 106 107int 108armpmu_get_max_events(void) 109{ 110 int max_events = 0; 111 112 if (armpmu != NULL) 113 max_events = armpmu->num_events; 114 115 return max_events; 116} 117EXPORT_SYMBOL_GPL(armpmu_get_max_events); 118 119int perf_num_counters(void) 120{ 121 return armpmu_get_max_events(); 122} 123EXPORT_SYMBOL_GPL(perf_num_counters); 124 125#define HW_OP_UNSUPPORTED 0xFFFF 126 127#define C(_x) \ 128 PERF_COUNT_HW_CACHE_##_x 129 130#define CACHE_OP_UNSUPPORTED 0xFFFF 131 132static int 133armpmu_map_cache_event(u64 config) 134{ 135 unsigned int cache_type, cache_op, cache_result, ret; 136 137 cache_type = (config >> 0) & 0xff; 138 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 139 return -EINVAL; 140 141 cache_op = (config >> 8) & 0xff; 142 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 143 return -EINVAL; 144 145 cache_result = (config >> 16) & 0xff; 146 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 147 return -EINVAL; 148 149 ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; 150 151 if (ret == CACHE_OP_UNSUPPORTED) 152 return -ENOENT; 153 154 return ret; 155} 156 157static int 158armpmu_map_event(u64 config) 159{ 160 int mapping = (*armpmu->event_map)[config]; 161 return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; 162} 163 164static int 165armpmu_map_raw_event(u64 config) 166{ 167 return (int)(config & armpmu->raw_event_mask); 168} 169 170static int 171armpmu_event_set_period(struct perf_event *event, 172 struct hw_perf_event *hwc, 173 int idx) 174{ 175 s64 left = local64_read(&hwc->period_left); 176 s64 period = hwc->sample_period; 177 int ret = 0; 178 179 if (unlikely(left <= -period)) { 180 left = period; 181 local64_set(&hwc->period_left, left); 182 hwc->last_period = period; 183 ret = 1; 184 } 185 186 if (unlikely(left <= 0)) { 187 left += period; 188 local64_set(&hwc->period_left, left); 189 hwc->last_period = period; 190 ret = 1; 191 } 192 193 if (left > (s64)armpmu->max_period) 194 left = armpmu->max_period; 195 196 local64_set(&hwc->prev_count, (u64)-left); 197 198 armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); 199 200 perf_event_update_userpage(event); 201 202 return ret; 203} 204 205static u64 206armpmu_event_update(struct perf_event *event, 207 struct hw_perf_event *hwc, 208 int idx, int overflow) 209{ 210 u64 delta, prev_raw_count, new_raw_count; 211 212again: 213 prev_raw_count = local64_read(&hwc->prev_count); 214 new_raw_count = armpmu->read_counter(idx); 215 216 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 217 new_raw_count) != prev_raw_count) 218 goto again; 219 220 new_raw_count &= armpmu->max_period; 221 prev_raw_count &= armpmu->max_period; 222 223 if (overflow) 224 delta = armpmu->max_period - prev_raw_count + new_raw_count + 1; 225 else 226 delta = new_raw_count - prev_raw_count; 227 228 local64_add(delta, &event->count); 229 local64_sub(delta, &hwc->period_left); 230 231 return new_raw_count; 232} 233 234static void 235armpmu_read(struct perf_event *event) 236{ 237 struct hw_perf_event *hwc = &event->hw; 238 239 /* Don't read disabled counters! */ 240 if (hwc->idx < 0) 241 return; 242 243 armpmu_event_update(event, hwc, hwc->idx, 0); 244} 245 246static void 247armpmu_stop(struct perf_event *event, int flags) 248{ 249 struct hw_perf_event *hwc = &event->hw; 250 251 if (!armpmu) 252 return; 253 254 /* 255 * ARM pmu always has to update the counter, so ignore 256 * PERF_EF_UPDATE, see comments in armpmu_start(). 257 */ 258 if (!(hwc->state & PERF_HES_STOPPED)) { 259 armpmu->disable(hwc, hwc->idx); 260 barrier(); /* why? */ 261 armpmu_event_update(event, hwc, hwc->idx, 0); 262 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 263 } 264} 265 266static void 267armpmu_start(struct perf_event *event, int flags) 268{ 269 struct hw_perf_event *hwc = &event->hw; 270 271 if (!armpmu) 272 return; 273 274 /* 275 * ARM pmu always has to reprogram the period, so ignore 276 * PERF_EF_RELOAD, see the comment below. 277 */ 278 if (flags & PERF_EF_RELOAD) 279 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 280 281 hwc->state = 0; 282 /* 283 * Set the period again. Some counters can't be stopped, so when we 284 * were stopped we simply disabled the IRQ source and the counter 285 * may have been left counting. If we don't do this step then we may 286 * get an interrupt too soon or *way* too late if the overflow has 287 * happened since disabling. 288 */ 289 armpmu_event_set_period(event, hwc, hwc->idx); 290 armpmu->enable(hwc, hwc->idx); 291} 292 293static void 294armpmu_del(struct perf_event *event, int flags) 295{ 296 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 297 struct hw_perf_event *hwc = &event->hw; 298 int idx = hwc->idx; 299 300 WARN_ON(idx < 0); 301 302 clear_bit(idx, cpuc->active_mask); 303 armpmu_stop(event, PERF_EF_UPDATE); 304 cpuc->events[idx] = NULL; 305 clear_bit(idx, cpuc->used_mask); 306 307 perf_event_update_userpage(event); 308} 309 310static int 311armpmu_add(struct perf_event *event, int flags) 312{ 313 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 314 struct hw_perf_event *hwc = &event->hw; 315 int idx; 316 int err = 0; 317 318 perf_pmu_disable(event->pmu); 319 320 /* If we don't have a space for the counter then finish early. */ 321 idx = armpmu->get_event_idx(cpuc, hwc); 322 if (idx < 0) { 323 err = idx; 324 goto out; 325 } 326 327 /* 328 * If there is an event in the counter we are going to use then make 329 * sure it is disabled. 330 */ 331 event->hw.idx = idx; 332 armpmu->disable(hwc, idx); 333 cpuc->events[idx] = event; 334 set_bit(idx, cpuc->active_mask); 335 336 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 337 if (flags & PERF_EF_START) 338 armpmu_start(event, PERF_EF_RELOAD); 339 340 /* Propagate our changes to the userspace mapping. */ 341 perf_event_update_userpage(event); 342 343out: 344 perf_pmu_enable(event->pmu); 345 return err; 346} 347 348static struct pmu pmu; 349 350static int 351validate_event(struct cpu_hw_events *cpuc, 352 struct perf_event *event) 353{ 354 struct hw_perf_event fake_event = event->hw; 355 356 if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF) 357 return 1; 358 359 return armpmu->get_event_idx(cpuc, &fake_event) >= 0; 360} 361 362static int 363validate_group(struct perf_event *event) 364{ 365 struct perf_event *sibling, *leader = event->group_leader; 366 struct cpu_hw_events fake_pmu; 367 368 memset(&fake_pmu, 0, sizeof(fake_pmu)); 369 370 if (!validate_event(&fake_pmu, leader)) 371 return -ENOSPC; 372 373 list_for_each_entry(sibling, &leader->sibling_list, group_entry) { 374 if (!validate_event(&fake_pmu, sibling)) 375 return -ENOSPC; 376 } 377 378 if (!validate_event(&fake_pmu, event)) 379 return -ENOSPC; 380 381 return 0; 382} 383 384static irqreturn_t armpmu_platform_irq(int irq, void *dev) 385{ 386 struct arm_pmu_platdata *plat = dev_get_platdata(&pmu_device->dev); 387 388 return plat->handle_irq(irq, dev, armpmu->handle_irq); 389} 390 391static int 392armpmu_reserve_hardware(void) 393{ 394 struct arm_pmu_platdata *plat; 395 irq_handler_t handle_irq; 396 int i, err = -ENODEV, irq; 397 398 pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU); 399 if (IS_ERR(pmu_device)) { 400 pr_warning("unable to reserve pmu\n"); 401 return PTR_ERR(pmu_device); 402 } 403 404 init_pmu(ARM_PMU_DEVICE_CPU); 405 406 plat = dev_get_platdata(&pmu_device->dev); 407 if (plat && plat->handle_irq) 408 handle_irq = armpmu_platform_irq; 409 else 410 handle_irq = armpmu->handle_irq; 411 412 if (pmu_device->num_resources < 1) { 413 pr_err("no irqs for PMUs defined\n"); 414 return -ENODEV; 415 } 416 417 for (i = 0; i < pmu_device->num_resources; ++i) { 418 irq = platform_get_irq(pmu_device, i); 419 if (irq < 0) 420 continue; 421 422 err = request_irq(irq, handle_irq, 423 IRQF_DISABLED | IRQF_NOBALANCING, 424 "armpmu", NULL); 425 if (err) { 426 pr_warning("unable to request IRQ%d for ARM perf " 427 "counters\n", irq); 428 break; 429 } 430 } 431 432 if (err) { 433 for (i = i - 1; i >= 0; --i) { 434 irq = platform_get_irq(pmu_device, i); 435 if (irq >= 0) 436 free_irq(irq, NULL); 437 } 438 release_pmu(pmu_device); 439 pmu_device = NULL; 440 } 441 442 return err; 443} 444 445static void 446armpmu_release_hardware(void) 447{ 448 int i, irq; 449 450 for (i = pmu_device->num_resources - 1; i >= 0; --i) { 451 irq = platform_get_irq(pmu_device, i); 452 if (irq >= 0) 453 free_irq(irq, NULL); 454 } 455 armpmu->stop(); 456 457 release_pmu(pmu_device); 458 pmu_device = NULL; 459} 460 461static atomic_t active_events = ATOMIC_INIT(0); 462static DEFINE_MUTEX(pmu_reserve_mutex); 463 464static void 465hw_perf_event_destroy(struct perf_event *event) 466{ 467 if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { 468 armpmu_release_hardware(); 469 mutex_unlock(&pmu_reserve_mutex); 470 } 471} 472 473static int 474__hw_perf_event_init(struct perf_event *event) 475{ 476 struct hw_perf_event *hwc = &event->hw; 477 int mapping, err; 478 479 /* Decode the generic type into an ARM event identifier. */ 480 if (PERF_TYPE_HARDWARE == event->attr.type) { 481 mapping = armpmu_map_event(event->attr.config); 482 } else if (PERF_TYPE_HW_CACHE == event->attr.type) { 483 mapping = armpmu_map_cache_event(event->attr.config); 484 } else if (PERF_TYPE_RAW == event->attr.type) { 485 mapping = armpmu_map_raw_event(event->attr.config); 486 } else { 487 pr_debug("event type %x not supported\n", event->attr.type); 488 return -EOPNOTSUPP; 489 } 490 491 if (mapping < 0) { 492 pr_debug("event %x:%llx not supported\n", event->attr.type, 493 event->attr.config); 494 return mapping; 495 } 496 497 /* 498 * Check whether we need to exclude the counter from certain modes. 499 * The ARM performance counters are on all of the time so if someone 500 * has asked us for some excludes then we have to fail. 501 */ 502 if (event->attr.exclude_kernel || event->attr.exclude_user || 503 event->attr.exclude_hv || event->attr.exclude_idle) { 504 pr_debug("ARM performance counters do not support " 505 "mode exclusion\n"); 506 return -EPERM; 507 } 508 509 /* 510 * We don't assign an index until we actually place the event onto 511 * hardware. Use -1 to signify that we haven't decided where to put it 512 * yet. For SMP systems, each core has it's own PMU so we can't do any 513 * clever allocation or constraints checking at this point. 514 */ 515 hwc->idx = -1; 516 517 /* 518 * Store the event encoding into the config_base field. config and 519 * event_base are unused as the only 2 things we need to know are 520 * the event mapping and the counter to use. The counter to use is 521 * also the indx and the config_base is the event type. 522 */ 523 hwc->config_base = (unsigned long)mapping; 524 hwc->config = 0; 525 hwc->event_base = 0; 526 527 if (!hwc->sample_period) { 528 hwc->sample_period = armpmu->max_period; 529 hwc->last_period = hwc->sample_period; 530 local64_set(&hwc->period_left, hwc->sample_period); 531 } 532 533 err = 0; 534 if (event->group_leader != event) { 535 err = validate_group(event); 536 if (err) 537 return -EINVAL; 538 } 539 540 return err; 541} 542 543static int armpmu_event_init(struct perf_event *event) 544{ 545 int err = 0; 546 547 switch (event->attr.type) { 548 case PERF_TYPE_RAW: 549 case PERF_TYPE_HARDWARE: 550 case PERF_TYPE_HW_CACHE: 551 break; 552 553 default: 554 return -ENOENT; 555 } 556 557 if (!armpmu) 558 return -ENODEV; 559 560 event->destroy = hw_perf_event_destroy; 561 562 if (!atomic_inc_not_zero(&active_events)) { 563 mutex_lock(&pmu_reserve_mutex); 564 if (atomic_read(&active_events) == 0) { 565 err = armpmu_reserve_hardware(); 566 } 567 568 if (!err) 569 atomic_inc(&active_events); 570 mutex_unlock(&pmu_reserve_mutex); 571 } 572 573 if (err) 574 return err; 575 576 err = __hw_perf_event_init(event); 577 if (err) 578 hw_perf_event_destroy(event); 579 580 return err; 581} 582 583static void armpmu_enable(struct pmu *pmu) 584{ 585 /* Enable all of the perf events on hardware. */ 586 int idx; 587 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 588 589 if (!armpmu) 590 return; 591 592 for (idx = 0; idx <= armpmu->num_events; ++idx) { 593 struct perf_event *event = cpuc->events[idx]; 594 595 if (!event) 596 continue; 597 598 armpmu->enable(&event->hw, idx); 599 } 600 601 armpmu->start(); 602} 603 604static void armpmu_disable(struct pmu *pmu) 605{ 606 if (armpmu) 607 armpmu->stop(); 608} 609 610static struct pmu pmu = { 611 .pmu_enable = armpmu_enable, 612 .pmu_disable = armpmu_disable, 613 .event_init = armpmu_event_init, 614 .add = armpmu_add, 615 .del = armpmu_del, 616 .start = armpmu_start, 617 .stop = armpmu_stop, 618 .read = armpmu_read, 619}; 620 621/* Include the PMU-specific implementations. */ 622#include "perf_event_xscale.c" 623#include "perf_event_v6.c" 624#include "perf_event_v7.c" 625 626/* 627 * Ensure the PMU has sane values out of reset. 628 * This requires SMP to be available, so exists as a separate initcall. 629 */ 630static int __init 631armpmu_reset(void) 632{ 633 if (armpmu && armpmu->reset) 634 return on_each_cpu(armpmu->reset, NULL, 1); 635 return 0; 636} 637arch_initcall(armpmu_reset); 638 639static int __init 640init_hw_perf_events(void) 641{ 642 unsigned long cpuid = read_cpuid_id(); 643 unsigned long implementor = (cpuid & 0xFF000000) >> 24; 644 unsigned long part_number = (cpuid & 0xFFF0); 645 646 /* ARM Ltd CPUs. */ 647 if (0x41 == implementor) { 648 switch (part_number) { 649 case 0xB360: /* ARM1136 */ 650 case 0xB560: /* ARM1156 */ 651 case 0xB760: /* ARM1176 */ 652 armpmu = armv6pmu_init(); 653 break; 654 case 0xB020: /* ARM11mpcore */ 655 armpmu = armv6mpcore_pmu_init(); 656 break; 657 case 0xC080: /* Cortex-A8 */ 658 armpmu = armv7_a8_pmu_init(); 659 break; 660 case 0xC090: /* Cortex-A9 */ 661 armpmu = armv7_a9_pmu_init(); 662 break; 663 } 664 /* Intel CPUs [xscale]. */ 665 } else if (0x69 == implementor) { 666 part_number = (cpuid >> 13) & 0x7; 667 switch (part_number) { 668 case 1: 669 armpmu = xscale1pmu_init(); 670 break; 671 case 2: 672 armpmu = xscale2pmu_init(); 673 break; 674 } 675 } 676 677 if (armpmu) { 678 pr_info("enabled with %s PMU driver, %d counters available\n", 679 armpmu->name, armpmu->num_events); 680 } else { 681 pr_info("no hardware support available\n"); 682 } 683 684 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); 685 686 return 0; 687} 688early_initcall(init_hw_perf_events); 689 690/* 691 * Callchain handling code. 692 */ 693 694/* 695 * The registers we're interested in are at the end of the variable 696 * length saved register structure. The fp points at the end of this 697 * structure so the address of this struct is: 698 * (struct frame_tail *)(xxx->fp)-1 699 * 700 * This code has been adapted from the ARM OProfile support. 701 */ 702struct frame_tail { 703 struct frame_tail __user *fp; 704 unsigned long sp; 705 unsigned long lr; 706} __attribute__((packed)); 707 708/* 709 * Get the return address for a single stackframe and return a pointer to the 710 * next frame tail. 711 */ 712static struct frame_tail __user * 713user_backtrace(struct frame_tail __user *tail, 714 struct perf_callchain_entry *entry) 715{ 716 struct frame_tail buftail; 717 718 /* Also check accessibility of one struct frame_tail beyond */ 719 if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) 720 return NULL; 721 if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) 722 return NULL; 723 724 perf_callchain_store(entry, buftail.lr); 725 726 /* 727 * Frame pointers should strictly progress back up the stack 728 * (towards higher addresses). 729 */ 730 if (tail + 1 >= buftail.fp) 731 return NULL; 732 733 return buftail.fp - 1; 734} 735 736void 737perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) 738{ 739 struct frame_tail __user *tail; 740 741 742 tail = (struct frame_tail __user *)regs->ARM_fp - 1; 743 744 while ((entry->nr < PERF_MAX_STACK_DEPTH) && 745 tail && !((unsigned long)tail & 0x3)) 746 tail = user_backtrace(tail, entry); 747} 748 749/* 750 * Gets called by walk_stackframe() for every stackframe. This will be called 751 * whist unwinding the stackframe and is like a subroutine return so we use 752 * the PC. 753 */ 754static int 755callchain_trace(struct stackframe *fr, 756 void *data) 757{ 758 struct perf_callchain_entry *entry = data; 759 perf_callchain_store(entry, fr->pc); 760 return 0; 761} 762 763void 764perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) 765{ 766 struct stackframe fr; 767 768 fr.fp = regs->ARM_fp; 769 fr.sp = regs->ARM_sp; 770 fr.lr = regs->ARM_lr; 771 fr.pc = regs->ARM_pc; 772 walk_stackframe(&fr, callchain_trace, entry); 773} 774