perf_event.c revision cb2d8b342aa084d1f3ac29966245dec9163677fb
1#undef DEBUG 2 3/* 4 * ARM performance counter support. 5 * 6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles 7 * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> 8 * 9 * This code is based on the sparc64 perf event code, which is in turn based 10 * on the x86 code. Callchain code is based on the ARM OProfile backtrace 11 * code. 12 */ 13#define pr_fmt(fmt) "hw perfevents: " fmt 14 15#include <linux/kernel.h> 16#include <linux/platform_device.h> 17#include <linux/pm_runtime.h> 18#include <linux/uaccess.h> 19 20#include <asm/irq_regs.h> 21#include <asm/pmu.h> 22#include <asm/stacktrace.h> 23 24static int 25armpmu_map_cache_event(const unsigned (*cache_map) 26 [PERF_COUNT_HW_CACHE_MAX] 27 [PERF_COUNT_HW_CACHE_OP_MAX] 28 [PERF_COUNT_HW_CACHE_RESULT_MAX], 29 u64 config) 30{ 31 unsigned int cache_type, cache_op, cache_result, ret; 32 33 cache_type = (config >> 0) & 0xff; 34 if (cache_type >= PERF_COUNT_HW_CACHE_MAX) 35 return -EINVAL; 36 37 cache_op = (config >> 8) & 0xff; 38 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) 39 return -EINVAL; 40 41 cache_result = (config >> 16) & 0xff; 42 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 43 return -EINVAL; 44 45 ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; 46 47 if (ret == CACHE_OP_UNSUPPORTED) 48 return -ENOENT; 49 50 return ret; 51} 52 53static int 54armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) 55{ 56 int mapping = (*event_map)[config]; 57 return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; 58} 59 60static int 61armpmu_map_raw_event(u32 raw_event_mask, u64 config) 62{ 63 return (int)(config & raw_event_mask); 64} 65 66int 67armpmu_map_event(struct perf_event *event, 68 const unsigned (*event_map)[PERF_COUNT_HW_MAX], 69 const unsigned (*cache_map) 70 [PERF_COUNT_HW_CACHE_MAX] 71 [PERF_COUNT_HW_CACHE_OP_MAX] 72 [PERF_COUNT_HW_CACHE_RESULT_MAX], 73 u32 raw_event_mask) 74{ 75 u64 config = event->attr.config; 76 77 switch (event->attr.type) { 78 case PERF_TYPE_HARDWARE: 79 return armpmu_map_hw_event(event_map, config); 80 case PERF_TYPE_HW_CACHE: 81 return armpmu_map_cache_event(cache_map, config); 82 case PERF_TYPE_RAW: 83 return armpmu_map_raw_event(raw_event_mask, config); 84 } 85 86 return -ENOENT; 87} 88 89int armpmu_event_set_period(struct perf_event *event) 90{ 91 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 92 struct hw_perf_event *hwc = &event->hw; 93 s64 left = local64_read(&hwc->period_left); 94 s64 period = hwc->sample_period; 95 int ret = 0; 96 97 /* The period may have been changed by PERF_EVENT_IOC_PERIOD */ 98 if (unlikely(period != hwc->last_period)) 99 left = period - (hwc->last_period - left); 100 101 if (unlikely(left <= -period)) { 102 left = period; 103 local64_set(&hwc->period_left, left); 104 hwc->last_period = period; 105 ret = 1; 106 } 107 108 if (unlikely(left <= 0)) { 109 left += period; 110 local64_set(&hwc->period_left, left); 111 hwc->last_period = period; 112 ret = 1; 113 } 114 115 if (left > (s64)armpmu->max_period) 116 left = armpmu->max_period; 117 118 local64_set(&hwc->prev_count, (u64)-left); 119 120 armpmu->write_counter(event, (u64)(-left) & 0xffffffff); 121 122 perf_event_update_userpage(event); 123 124 return ret; 125} 126 127u64 armpmu_event_update(struct perf_event *event) 128{ 129 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 130 struct hw_perf_event *hwc = &event->hw; 131 u64 delta, prev_raw_count, new_raw_count; 132 133again: 134 prev_raw_count = local64_read(&hwc->prev_count); 135 new_raw_count = armpmu->read_counter(event); 136 137 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 138 new_raw_count) != prev_raw_count) 139 goto again; 140 141 delta = (new_raw_count - prev_raw_count) & armpmu->max_period; 142 143 local64_add(delta, &event->count); 144 local64_sub(delta, &hwc->period_left); 145 146 return new_raw_count; 147} 148 149static void 150armpmu_read(struct perf_event *event) 151{ 152 armpmu_event_update(event); 153} 154 155static void 156armpmu_stop(struct perf_event *event, int flags) 157{ 158 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 159 struct hw_perf_event *hwc = &event->hw; 160 161 /* 162 * ARM pmu always has to update the counter, so ignore 163 * PERF_EF_UPDATE, see comments in armpmu_start(). 164 */ 165 if (!(hwc->state & PERF_HES_STOPPED)) { 166 armpmu->disable(event); 167 armpmu_event_update(event); 168 hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 169 } 170} 171 172static void armpmu_start(struct perf_event *event, int flags) 173{ 174 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 175 struct hw_perf_event *hwc = &event->hw; 176 177 /* 178 * ARM pmu always has to reprogram the period, so ignore 179 * PERF_EF_RELOAD, see the comment below. 180 */ 181 if (flags & PERF_EF_RELOAD) 182 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 183 184 hwc->state = 0; 185 /* 186 * Set the period again. Some counters can't be stopped, so when we 187 * were stopped we simply disabled the IRQ source and the counter 188 * may have been left counting. If we don't do this step then we may 189 * get an interrupt too soon or *way* too late if the overflow has 190 * happened since disabling. 191 */ 192 armpmu_event_set_period(event); 193 armpmu->enable(event); 194} 195 196static void 197armpmu_del(struct perf_event *event, int flags) 198{ 199 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 200 struct pmu_hw_events *hw_events = armpmu->get_hw_events(); 201 struct hw_perf_event *hwc = &event->hw; 202 int idx = hwc->idx; 203 204 armpmu_stop(event, PERF_EF_UPDATE); 205 hw_events->events[idx] = NULL; 206 clear_bit(idx, hw_events->used_mask); 207 208 perf_event_update_userpage(event); 209} 210 211static int 212armpmu_add(struct perf_event *event, int flags) 213{ 214 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 215 struct pmu_hw_events *hw_events = armpmu->get_hw_events(); 216 struct hw_perf_event *hwc = &event->hw; 217 int idx; 218 int err = 0; 219 220 perf_pmu_disable(event->pmu); 221 222 /* If we don't have a space for the counter then finish early. */ 223 idx = armpmu->get_event_idx(hw_events, event); 224 if (idx < 0) { 225 err = idx; 226 goto out; 227 } 228 229 /* 230 * If there is an event in the counter we are going to use then make 231 * sure it is disabled. 232 */ 233 event->hw.idx = idx; 234 armpmu->disable(event); 235 hw_events->events[idx] = event; 236 237 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; 238 if (flags & PERF_EF_START) 239 armpmu_start(event, PERF_EF_RELOAD); 240 241 /* Propagate our changes to the userspace mapping. */ 242 perf_event_update_userpage(event); 243 244out: 245 perf_pmu_enable(event->pmu); 246 return err; 247} 248 249static int 250validate_event(struct pmu_hw_events *hw_events, 251 struct perf_event *event) 252{ 253 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 254 struct pmu *leader_pmu = event->group_leader->pmu; 255 256 if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) 257 return 1; 258 259 if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) 260 return 1; 261 262 return armpmu->get_event_idx(hw_events, event) >= 0; 263} 264 265static int 266validate_group(struct perf_event *event) 267{ 268 struct perf_event *sibling, *leader = event->group_leader; 269 struct pmu_hw_events fake_pmu; 270 DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS); 271 272 /* 273 * Initialise the fake PMU. We only need to populate the 274 * used_mask for the purposes of validation. 275 */ 276 memset(fake_used_mask, 0, sizeof(fake_used_mask)); 277 fake_pmu.used_mask = fake_used_mask; 278 279 if (!validate_event(&fake_pmu, leader)) 280 return -EINVAL; 281 282 list_for_each_entry(sibling, &leader->sibling_list, group_entry) { 283 if (!validate_event(&fake_pmu, sibling)) 284 return -EINVAL; 285 } 286 287 if (!validate_event(&fake_pmu, event)) 288 return -EINVAL; 289 290 return 0; 291} 292 293static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) 294{ 295 struct arm_pmu *armpmu = (struct arm_pmu *) dev; 296 struct platform_device *plat_device = armpmu->plat_device; 297 struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev); 298 299 if (plat && plat->handle_irq) 300 return plat->handle_irq(irq, dev, armpmu->handle_irq); 301 else 302 return armpmu->handle_irq(irq, dev); 303} 304 305static void 306armpmu_release_hardware(struct arm_pmu *armpmu) 307{ 308 armpmu->free_irq(armpmu); 309 pm_runtime_put_sync(&armpmu->plat_device->dev); 310} 311 312static int 313armpmu_reserve_hardware(struct arm_pmu *armpmu) 314{ 315 int err; 316 struct platform_device *pmu_device = armpmu->plat_device; 317 318 if (!pmu_device) 319 return -ENODEV; 320 321 pm_runtime_get_sync(&pmu_device->dev); 322 err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); 323 if (err) { 324 armpmu_release_hardware(armpmu); 325 return err; 326 } 327 328 return 0; 329} 330 331static void 332hw_perf_event_destroy(struct perf_event *event) 333{ 334 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 335 atomic_t *active_events = &armpmu->active_events; 336 struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; 337 338 if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { 339 armpmu_release_hardware(armpmu); 340 mutex_unlock(pmu_reserve_mutex); 341 } 342} 343 344static int 345event_requires_mode_exclusion(struct perf_event_attr *attr) 346{ 347 return attr->exclude_idle || attr->exclude_user || 348 attr->exclude_kernel || attr->exclude_hv; 349} 350 351static int 352__hw_perf_event_init(struct perf_event *event) 353{ 354 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 355 struct hw_perf_event *hwc = &event->hw; 356 int mapping; 357 358 mapping = armpmu->map_event(event); 359 360 if (mapping < 0) { 361 pr_debug("event %x:%llx not supported\n", event->attr.type, 362 event->attr.config); 363 return mapping; 364 } 365 366 /* 367 * We don't assign an index until we actually place the event onto 368 * hardware. Use -1 to signify that we haven't decided where to put it 369 * yet. For SMP systems, each core has it's own PMU so we can't do any 370 * clever allocation or constraints checking at this point. 371 */ 372 hwc->idx = -1; 373 hwc->config_base = 0; 374 hwc->config = 0; 375 hwc->event_base = 0; 376 377 /* 378 * Check whether we need to exclude the counter from certain modes. 379 */ 380 if ((!armpmu->set_event_filter || 381 armpmu->set_event_filter(hwc, &event->attr)) && 382 event_requires_mode_exclusion(&event->attr)) { 383 pr_debug("ARM performance counters do not support " 384 "mode exclusion\n"); 385 return -EOPNOTSUPP; 386 } 387 388 /* 389 * Store the event encoding into the config_base field. 390 */ 391 hwc->config_base |= (unsigned long)mapping; 392 393 if (!hwc->sample_period) { 394 /* 395 * For non-sampling runs, limit the sample_period to half 396 * of the counter width. That way, the new counter value 397 * is far less likely to overtake the previous one unless 398 * you have some serious IRQ latency issues. 399 */ 400 hwc->sample_period = armpmu->max_period >> 1; 401 hwc->last_period = hwc->sample_period; 402 local64_set(&hwc->period_left, hwc->sample_period); 403 } 404 405 if (event->group_leader != event) { 406 if (validate_group(event) != 0) 407 return -EINVAL; 408 } 409 410 return 0; 411} 412 413static int armpmu_event_init(struct perf_event *event) 414{ 415 struct arm_pmu *armpmu = to_arm_pmu(event->pmu); 416 int err = 0; 417 atomic_t *active_events = &armpmu->active_events; 418 419 /* does not support taken branch sampling */ 420 if (has_branch_stack(event)) 421 return -EOPNOTSUPP; 422 423 if (armpmu->map_event(event) == -ENOENT) 424 return -ENOENT; 425 426 event->destroy = hw_perf_event_destroy; 427 428 if (!atomic_inc_not_zero(active_events)) { 429 mutex_lock(&armpmu->reserve_mutex); 430 if (atomic_read(active_events) == 0) 431 err = armpmu_reserve_hardware(armpmu); 432 433 if (!err) 434 atomic_inc(active_events); 435 mutex_unlock(&armpmu->reserve_mutex); 436 } 437 438 if (err) 439 return err; 440 441 err = __hw_perf_event_init(event); 442 if (err) 443 hw_perf_event_destroy(event); 444 445 return err; 446} 447 448static void armpmu_enable(struct pmu *pmu) 449{ 450 struct arm_pmu *armpmu = to_arm_pmu(pmu); 451 struct pmu_hw_events *hw_events = armpmu->get_hw_events(); 452 int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); 453 454 if (enabled) 455 armpmu->start(armpmu); 456} 457 458static void armpmu_disable(struct pmu *pmu) 459{ 460 struct arm_pmu *armpmu = to_arm_pmu(pmu); 461 armpmu->stop(armpmu); 462} 463 464#ifdef CONFIG_PM_RUNTIME 465static int armpmu_runtime_resume(struct device *dev) 466{ 467 struct arm_pmu_platdata *plat = dev_get_platdata(dev); 468 469 if (plat && plat->runtime_resume) 470 return plat->runtime_resume(dev); 471 472 return 0; 473} 474 475static int armpmu_runtime_suspend(struct device *dev) 476{ 477 struct arm_pmu_platdata *plat = dev_get_platdata(dev); 478 479 if (plat && plat->runtime_suspend) 480 return plat->runtime_suspend(dev); 481 482 return 0; 483} 484#endif 485 486const struct dev_pm_ops armpmu_dev_pm_ops = { 487 SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL) 488}; 489 490static void armpmu_init(struct arm_pmu *armpmu) 491{ 492 atomic_set(&armpmu->active_events, 0); 493 mutex_init(&armpmu->reserve_mutex); 494 495 armpmu->pmu = (struct pmu) { 496 .pmu_enable = armpmu_enable, 497 .pmu_disable = armpmu_disable, 498 .event_init = armpmu_event_init, 499 .add = armpmu_add, 500 .del = armpmu_del, 501 .start = armpmu_start, 502 .stop = armpmu_stop, 503 .read = armpmu_read, 504 }; 505} 506 507int armpmu_register(struct arm_pmu *armpmu, int type) 508{ 509 armpmu_init(armpmu); 510 pm_runtime_enable(&armpmu->plat_device->dev); 511 pr_info("enabled with %s PMU driver, %d counters available\n", 512 armpmu->name, armpmu->num_events); 513 return perf_pmu_register(&armpmu->pmu, armpmu->name, type); 514} 515 516/* 517 * Callchain handling code. 518 */ 519 520/* 521 * The registers we're interested in are at the end of the variable 522 * length saved register structure. The fp points at the end of this 523 * structure so the address of this struct is: 524 * (struct frame_tail *)(xxx->fp)-1 525 * 526 * This code has been adapted from the ARM OProfile support. 527 */ 528struct frame_tail { 529 struct frame_tail __user *fp; 530 unsigned long sp; 531 unsigned long lr; 532} __attribute__((packed)); 533 534/* 535 * Get the return address for a single stackframe and return a pointer to the 536 * next frame tail. 537 */ 538static struct frame_tail __user * 539user_backtrace(struct frame_tail __user *tail, 540 struct perf_callchain_entry *entry) 541{ 542 struct frame_tail buftail; 543 544 /* Also check accessibility of one struct frame_tail beyond */ 545 if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) 546 return NULL; 547 if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) 548 return NULL; 549 550 perf_callchain_store(entry, buftail.lr); 551 552 /* 553 * Frame pointers should strictly progress back up the stack 554 * (towards higher addresses). 555 */ 556 if (tail + 1 >= buftail.fp) 557 return NULL; 558 559 return buftail.fp - 1; 560} 561 562void 563perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) 564{ 565 struct frame_tail __user *tail; 566 567 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 568 /* We don't support guest os callchain now */ 569 return; 570 } 571 572 tail = (struct frame_tail __user *)regs->ARM_fp - 1; 573 574 while ((entry->nr < PERF_MAX_STACK_DEPTH) && 575 tail && !((unsigned long)tail & 0x3)) 576 tail = user_backtrace(tail, entry); 577} 578 579/* 580 * Gets called by walk_stackframe() for every stackframe. This will be called 581 * whist unwinding the stackframe and is like a subroutine return so we use 582 * the PC. 583 */ 584static int 585callchain_trace(struct stackframe *fr, 586 void *data) 587{ 588 struct perf_callchain_entry *entry = data; 589 perf_callchain_store(entry, fr->pc); 590 return 0; 591} 592 593void 594perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) 595{ 596 struct stackframe fr; 597 598 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 599 /* We don't support guest os callchain now */ 600 return; 601 } 602 603 fr.fp = regs->ARM_fp; 604 fr.sp = regs->ARM_sp; 605 fr.lr = regs->ARM_lr; 606 fr.pc = regs->ARM_pc; 607 walk_stackframe(&fr, callchain_trace, entry); 608} 609 610unsigned long perf_instruction_pointer(struct pt_regs *regs) 611{ 612 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) 613 return perf_guest_cbs->get_guest_ip(); 614 615 return instruction_pointer(regs); 616} 617 618unsigned long perf_misc_flags(struct pt_regs *regs) 619{ 620 int misc = 0; 621 622 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { 623 if (perf_guest_cbs->is_user_mode()) 624 misc |= PERF_RECORD_MISC_GUEST_USER; 625 else 626 misc |= PERF_RECORD_MISC_GUEST_KERNEL; 627 } else { 628 if (user_mode(regs)) 629 misc |= PERF_RECORD_MISC_USER; 630 else 631 misc |= PERF_RECORD_MISC_KERNEL; 632 } 633 634 return misc; 635} 636