1/* 2 * This file implements the perfmon-2 subsystem which is used 3 * to program the IA-64 Performance Monitoring Unit (PMU). 4 * 5 * The initial version of perfmon.c was written by 6 * Ganesh Venkitachalam, IBM Corp. 7 * 8 * Then it was modified for perfmon-1.x by Stephane Eranian and 9 * David Mosberger, Hewlett Packard Co. 10 * 11 * Version Perfmon-2.x is a rewrite of perfmon-1.x 12 * by Stephane Eranian, Hewlett Packard Co. 13 * 14 * Copyright (C) 1999-2005 Hewlett Packard Co 15 * Stephane Eranian <eranian@hpl.hp.com> 16 * David Mosberger-Tang <davidm@hpl.hp.com> 17 * 18 * More information about perfmon available at: 19 * http://www.hpl.hp.com/research/linux/perfmon 20 */ 21 22#include <linux/module.h> 23#include <linux/kernel.h> 24#include <linux/sched.h> 25#include <linux/interrupt.h> 26#include <linux/proc_fs.h> 27#include <linux/seq_file.h> 28#include <linux/init.h> 29#include <linux/vmalloc.h> 30#include <linux/mm.h> 31#include <linux/sysctl.h> 32#include <linux/list.h> 33#include <linux/file.h> 34#include <linux/poll.h> 35#include <linux/vfs.h> 36#include <linux/smp.h> 37#include <linux/pagemap.h> 38#include <linux/mount.h> 39#include <linux/bitops.h> 40#include <linux/capability.h> 41#include <linux/rcupdate.h> 42#include <linux/completion.h> 43#include <linux/tracehook.h> 44#include <linux/slab.h> 45 46#include <asm/errno.h> 47#include <asm/intrinsics.h> 48#include <asm/page.h> 49#include <asm/perfmon.h> 50#include <asm/processor.h> 51#include <asm/signal.h> 52#include <asm/uaccess.h> 53#include <asm/delay.h> 54 55#ifdef CONFIG_PERFMON 56/* 57 * perfmon context state 58 */ 59#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ 60#define PFM_CTX_LOADED 2 /* context is loaded onto a task */ 61#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ 62#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ 63 64#define PFM_INVALID_ACTIVATION (~0UL) 65 66#define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ 67#define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ 68 69/* 70 * depth of message queue 71 */ 72#define PFM_MAX_MSGS 32 73#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) 74 75/* 76 * type of a PMU register (bitmask). 77 * bitmask structure: 78 * bit0 : register implemented 79 * bit1 : end marker 80 * bit2-3 : reserved 81 * bit4 : pmc has pmc.pm 82 * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter 83 * bit6-7 : register type 84 * bit8-31: reserved 85 */ 86#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ 87#define PFM_REG_IMPL 0x1 /* register implemented */ 88#define PFM_REG_END 0x2 /* end marker */ 89#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ 90#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ 91#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ 92#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ 93#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ 94 95#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) 96#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) 97 98#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) 99 100/* i assumed unsigned */ 101#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) 102#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) 103 104/* XXX: these assume that register i is implemented */ 105#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 106#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 107#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) 108#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) 109 110#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value 111#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask 112#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] 113#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] 114 115#define PFM_NUM_IBRS IA64_NUM_DBG_REGS 116#define PFM_NUM_DBRS IA64_NUM_DBG_REGS 117 118#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) 119#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) 120#define PFM_CTX_TASK(h) (h)->ctx_task 121 122#define PMU_PMC_OI 5 /* position of pmc.oi bit */ 123 124/* XXX: does not support more than 64 PMDs */ 125#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) 126#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) 127 128#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) 129 130#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) 131#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) 132#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) 133#define PFM_CODE_RR 0 /* requesting code range restriction */ 134#define PFM_DATA_RR 1 /* requestion data range restriction */ 135 136#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) 137#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) 138#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) 139 140#define RDEP(x) (1UL<<(x)) 141 142/* 143 * context protection macros 144 * in SMP: 145 * - we need to protect against CPU concurrency (spin_lock) 146 * - we need to protect against PMU overflow interrupts (local_irq_disable) 147 * in UP: 148 * - we need to protect against PMU overflow interrupts (local_irq_disable) 149 * 150 * spin_lock_irqsave()/spin_unlock_irqrestore(): 151 * in SMP: local_irq_disable + spin_lock 152 * in UP : local_irq_disable 153 * 154 * spin_lock()/spin_lock(): 155 * in UP : removed automatically 156 * in SMP: protect against context accesses from other CPU. interrupts 157 * are not masked. This is useful for the PMU interrupt handler 158 * because we know we will not get PMU concurrency in that code. 159 */ 160#define PROTECT_CTX(c, f) \ 161 do { \ 162 DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \ 163 spin_lock_irqsave(&(c)->ctx_lock, f); \ 164 DPRINT(("spinlocked ctx %p by [%d]\n", c, task_pid_nr(current))); \ 165 } while(0) 166 167#define UNPROTECT_CTX(c, f) \ 168 do { \ 169 DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \ 170 spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 171 } while(0) 172 173#define PROTECT_CTX_NOPRINT(c, f) \ 174 do { \ 175 spin_lock_irqsave(&(c)->ctx_lock, f); \ 176 } while(0) 177 178 179#define UNPROTECT_CTX_NOPRINT(c, f) \ 180 do { \ 181 spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 182 } while(0) 183 184 185#define PROTECT_CTX_NOIRQ(c) \ 186 do { \ 187 spin_lock(&(c)->ctx_lock); \ 188 } while(0) 189 190#define UNPROTECT_CTX_NOIRQ(c) \ 191 do { \ 192 spin_unlock(&(c)->ctx_lock); \ 193 } while(0) 194 195 196#ifdef CONFIG_SMP 197 198#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) 199#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ 200#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() 201 202#else /* !CONFIG_SMP */ 203#define SET_ACTIVATION(t) do {} while(0) 204#define GET_ACTIVATION(t) do {} while(0) 205#define INC_ACTIVATION(t) do {} while(0) 206#endif /* CONFIG_SMP */ 207 208#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) 209#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) 210#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) 211 212#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) 213#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) 214 215#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) 216 217/* 218 * cmp0 must be the value of pmc0 219 */ 220#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) 221 222#define PFMFS_MAGIC 0xa0b4d889 223 224/* 225 * debugging 226 */ 227#define PFM_DEBUGGING 1 228#ifdef PFM_DEBUGGING 229#define DPRINT(a) \ 230 do { \ 231 if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 232 } while (0) 233 234#define DPRINT_ovfl(a) \ 235 do { \ 236 if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 237 } while (0) 238#endif 239 240/* 241 * 64-bit software counter structure 242 * 243 * the next_reset_type is applied to the next call to pfm_reset_regs() 244 */ 245typedef struct { 246 unsigned long val; /* virtual 64bit counter value */ 247 unsigned long lval; /* last reset value */ 248 unsigned long long_reset; /* reset value on sampling overflow */ 249 unsigned long short_reset; /* reset value on overflow */ 250 unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ 251 unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ 252 unsigned long seed; /* seed for random-number generator */ 253 unsigned long mask; /* mask for random-number generator */ 254 unsigned int flags; /* notify/do not notify */ 255 unsigned long eventid; /* overflow event identifier */ 256} pfm_counter_t; 257 258/* 259 * context flags 260 */ 261typedef struct { 262 unsigned int block:1; /* when 1, task will blocked on user notifications */ 263 unsigned int system:1; /* do system wide monitoring */ 264 unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ 265 unsigned int is_sampling:1; /* true if using a custom format */ 266 unsigned int excl_idle:1; /* exclude idle task in system wide session */ 267 unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ 268 unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ 269 unsigned int no_msg:1; /* no message sent on overflow */ 270 unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ 271 unsigned int reserved:22; 272} pfm_context_flags_t; 273 274#define PFM_TRAP_REASON_NONE 0x0 /* default value */ 275#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ 276#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ 277 278 279/* 280 * perfmon context: encapsulates all the state of a monitoring session 281 */ 282 283typedef struct pfm_context { 284 spinlock_t ctx_lock; /* context protection */ 285 286 pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ 287 unsigned int ctx_state; /* state: active/inactive (no bitfield) */ 288 289 struct task_struct *ctx_task; /* task to which context is attached */ 290 291 unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ 292 293 struct completion ctx_restart_done; /* use for blocking notification mode */ 294 295 unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ 296 unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ 297 unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ 298 299 unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ 300 unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ 301 unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ 302 303 unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ 304 305 unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ 306 unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ 307 unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ 308 unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ 309 310 pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ 311 312 unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ 313 unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ 314 315 unsigned long ctx_saved_psr_up; /* only contains psr.up value */ 316 317 unsigned long ctx_last_activation; /* context last activation number for last_cpu */ 318 unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ 319 unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ 320 321 int ctx_fd; /* file descriptor used my this context */ 322 pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ 323 324 pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ 325 void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ 326 unsigned long ctx_smpl_size; /* size of sampling buffer */ 327 void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ 328 329 wait_queue_head_t ctx_msgq_wait; 330 pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; 331 int ctx_msgq_head; 332 int ctx_msgq_tail; 333 struct fasync_struct *ctx_async_queue; 334 335 wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ 336} pfm_context_t; 337 338/* 339 * magic number used to verify that structure is really 340 * a perfmon context 341 */ 342#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) 343 344#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) 345 346#ifdef CONFIG_SMP 347#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) 348#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu 349#else 350#define SET_LAST_CPU(ctx, v) do {} while(0) 351#define GET_LAST_CPU(ctx) do {} while(0) 352#endif 353 354 355#define ctx_fl_block ctx_flags.block 356#define ctx_fl_system ctx_flags.system 357#define ctx_fl_using_dbreg ctx_flags.using_dbreg 358#define ctx_fl_is_sampling ctx_flags.is_sampling 359#define ctx_fl_excl_idle ctx_flags.excl_idle 360#define ctx_fl_going_zombie ctx_flags.going_zombie 361#define ctx_fl_trap_reason ctx_flags.trap_reason 362#define ctx_fl_no_msg ctx_flags.no_msg 363#define ctx_fl_can_restart ctx_flags.can_restart 364 365#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); 366#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking 367 368/* 369 * global information about all sessions 370 * mostly used to synchronize between system wide and per-process 371 */ 372typedef struct { 373 spinlock_t pfs_lock; /* lock the structure */ 374 375 unsigned int pfs_task_sessions; /* number of per task sessions */ 376 unsigned int pfs_sys_sessions; /* number of per system wide sessions */ 377 unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ 378 unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ 379 struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ 380} pfm_session_t; 381 382/* 383 * information about a PMC or PMD. 384 * dep_pmd[]: a bitmask of dependent PMD registers 385 * dep_pmc[]: a bitmask of dependent PMC registers 386 */ 387typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); 388typedef struct { 389 unsigned int type; 390 int pm_pos; 391 unsigned long default_value; /* power-on default value */ 392 unsigned long reserved_mask; /* bitmask of reserved bits */ 393 pfm_reg_check_t read_check; 394 pfm_reg_check_t write_check; 395 unsigned long dep_pmd[4]; 396 unsigned long dep_pmc[4]; 397} pfm_reg_desc_t; 398 399/* assume cnum is a valid monitor */ 400#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) 401 402/* 403 * This structure is initialized at boot time and contains 404 * a description of the PMU main characteristics. 405 * 406 * If the probe function is defined, detection is based 407 * on its return value: 408 * - 0 means recognized PMU 409 * - anything else means not supported 410 * When the probe function is not defined, then the pmu_family field 411 * is used and it must match the host CPU family such that: 412 * - cpu->family & config->pmu_family != 0 413 */ 414typedef struct { 415 unsigned long ovfl_val; /* overflow value for counters */ 416 417 pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ 418 pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ 419 420 unsigned int num_pmcs; /* number of PMCS: computed at init time */ 421 unsigned int num_pmds; /* number of PMDS: computed at init time */ 422 unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ 423 unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ 424 425 char *pmu_name; /* PMU family name */ 426 unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ 427 unsigned int flags; /* pmu specific flags */ 428 unsigned int num_ibrs; /* number of IBRS: computed at init time */ 429 unsigned int num_dbrs; /* number of DBRS: computed at init time */ 430 unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ 431 int (*probe)(void); /* customized probe routine */ 432 unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ 433} pmu_config_t; 434/* 435 * PMU specific flags 436 */ 437#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ 438 439/* 440 * debug register related type definitions 441 */ 442typedef struct { 443 unsigned long ibr_mask:56; 444 unsigned long ibr_plm:4; 445 unsigned long ibr_ig:3; 446 unsigned long ibr_x:1; 447} ibr_mask_reg_t; 448 449typedef struct { 450 unsigned long dbr_mask:56; 451 unsigned long dbr_plm:4; 452 unsigned long dbr_ig:2; 453 unsigned long dbr_w:1; 454 unsigned long dbr_r:1; 455} dbr_mask_reg_t; 456 457typedef union { 458 unsigned long val; 459 ibr_mask_reg_t ibr; 460 dbr_mask_reg_t dbr; 461} dbreg_t; 462 463 464/* 465 * perfmon command descriptions 466 */ 467typedef struct { 468 int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 469 char *cmd_name; 470 int cmd_flags; 471 unsigned int cmd_narg; 472 size_t cmd_argsize; 473 int (*cmd_getsize)(void *arg, size_t *sz); 474} pfm_cmd_desc_t; 475 476#define PFM_CMD_FD 0x01 /* command requires a file descriptor */ 477#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ 478#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ 479#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ 480 481 482#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name 483#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) 484#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) 485#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) 486#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) 487 488#define PFM_CMD_ARG_MANY -1 /* cannot be zero */ 489 490typedef struct { 491 unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ 492 unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ 493 unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ 494 unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ 495 unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ 496 unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ 497 unsigned long pfm_smpl_handler_calls; 498 unsigned long pfm_smpl_handler_cycles; 499 char pad[SMP_CACHE_BYTES] ____cacheline_aligned; 500} pfm_stats_t; 501 502/* 503 * perfmon internal variables 504 */ 505static pfm_stats_t pfm_stats[NR_CPUS]; 506static pfm_session_t pfm_sessions; /* global sessions information */ 507 508static DEFINE_SPINLOCK(pfm_alt_install_check); 509static pfm_intr_handler_desc_t *pfm_alt_intr_handler; 510 511static struct proc_dir_entry *perfmon_dir; 512static pfm_uuid_t pfm_null_uuid = {0,}; 513 514static spinlock_t pfm_buffer_fmt_lock; 515static LIST_HEAD(pfm_buffer_fmt_list); 516 517static pmu_config_t *pmu_conf; 518 519/* sysctl() controls */ 520pfm_sysctl_t pfm_sysctl; 521EXPORT_SYMBOL(pfm_sysctl); 522 523static ctl_table pfm_ctl_table[]={ 524 { 525 .procname = "debug", 526 .data = &pfm_sysctl.debug, 527 .maxlen = sizeof(int), 528 .mode = 0666, 529 .proc_handler = proc_dointvec, 530 }, 531 { 532 .procname = "debug_ovfl", 533 .data = &pfm_sysctl.debug_ovfl, 534 .maxlen = sizeof(int), 535 .mode = 0666, 536 .proc_handler = proc_dointvec, 537 }, 538 { 539 .procname = "fastctxsw", 540 .data = &pfm_sysctl.fastctxsw, 541 .maxlen = sizeof(int), 542 .mode = 0600, 543 .proc_handler = proc_dointvec, 544 }, 545 { 546 .procname = "expert_mode", 547 .data = &pfm_sysctl.expert_mode, 548 .maxlen = sizeof(int), 549 .mode = 0600, 550 .proc_handler = proc_dointvec, 551 }, 552 {} 553}; 554static ctl_table pfm_sysctl_dir[] = { 555 { 556 .procname = "perfmon", 557 .mode = 0555, 558 .child = pfm_ctl_table, 559 }, 560 {} 561}; 562static ctl_table pfm_sysctl_root[] = { 563 { 564 .procname = "kernel", 565 .mode = 0555, 566 .child = pfm_sysctl_dir, 567 }, 568 {} 569}; 570static struct ctl_table_header *pfm_sysctl_header; 571 572static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 573 574#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) 575#define pfm_get_cpu_data(a,b) per_cpu(a, b) 576 577static inline void 578pfm_put_task(struct task_struct *task) 579{ 580 if (task != current) put_task_struct(task); 581} 582 583static inline void 584pfm_reserve_page(unsigned long a) 585{ 586 SetPageReserved(vmalloc_to_page((void *)a)); 587} 588static inline void 589pfm_unreserve_page(unsigned long a) 590{ 591 ClearPageReserved(vmalloc_to_page((void*)a)); 592} 593 594static inline unsigned long 595pfm_protect_ctx_ctxsw(pfm_context_t *x) 596{ 597 spin_lock(&(x)->ctx_lock); 598 return 0UL; 599} 600 601static inline void 602pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) 603{ 604 spin_unlock(&(x)->ctx_lock); 605} 606 607static inline unsigned long 608pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) 609{ 610 return get_unmapped_area(file, addr, len, pgoff, flags); 611} 612 613/* forward declaration */ 614static const struct dentry_operations pfmfs_dentry_operations; 615 616static struct dentry * 617pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) 618{ 619 return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations, 620 PFMFS_MAGIC); 621} 622 623static struct file_system_type pfm_fs_type = { 624 .name = "pfmfs", 625 .mount = pfmfs_mount, 626 .kill_sb = kill_anon_super, 627}; 628 629DEFINE_PER_CPU(unsigned long, pfm_syst_info); 630DEFINE_PER_CPU(struct task_struct *, pmu_owner); 631DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); 632DEFINE_PER_CPU(unsigned long, pmu_activation_number); 633EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); 634 635 636/* forward declaration */ 637static const struct file_operations pfm_file_ops; 638 639/* 640 * forward declarations 641 */ 642#ifndef CONFIG_SMP 643static void pfm_lazy_save_regs (struct task_struct *ta); 644#endif 645 646void dump_pmu_state(const char *); 647static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 648 649#include "perfmon_itanium.h" 650#include "perfmon_mckinley.h" 651#include "perfmon_montecito.h" 652#include "perfmon_generic.h" 653 654static pmu_config_t *pmu_confs[]={ 655 &pmu_conf_mont, 656 &pmu_conf_mck, 657 &pmu_conf_ita, 658 &pmu_conf_gen, /* must be last */ 659 NULL 660}; 661 662 663static int pfm_end_notify_user(pfm_context_t *ctx); 664 665static inline void 666pfm_clear_psr_pp(void) 667{ 668 ia64_rsm(IA64_PSR_PP); 669 ia64_srlz_i(); 670} 671 672static inline void 673pfm_set_psr_pp(void) 674{ 675 ia64_ssm(IA64_PSR_PP); 676 ia64_srlz_i(); 677} 678 679static inline void 680pfm_clear_psr_up(void) 681{ 682 ia64_rsm(IA64_PSR_UP); 683 ia64_srlz_i(); 684} 685 686static inline void 687pfm_set_psr_up(void) 688{ 689 ia64_ssm(IA64_PSR_UP); 690 ia64_srlz_i(); 691} 692 693static inline unsigned long 694pfm_get_psr(void) 695{ 696 unsigned long tmp; 697 tmp = ia64_getreg(_IA64_REG_PSR); 698 ia64_srlz_i(); 699 return tmp; 700} 701 702static inline void 703pfm_set_psr_l(unsigned long val) 704{ 705 ia64_setreg(_IA64_REG_PSR_L, val); 706 ia64_srlz_i(); 707} 708 709static inline void 710pfm_freeze_pmu(void) 711{ 712 ia64_set_pmc(0,1UL); 713 ia64_srlz_d(); 714} 715 716static inline void 717pfm_unfreeze_pmu(void) 718{ 719 ia64_set_pmc(0,0UL); 720 ia64_srlz_d(); 721} 722 723static inline void 724pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) 725{ 726 int i; 727 728 for (i=0; i < nibrs; i++) { 729 ia64_set_ibr(i, ibrs[i]); 730 ia64_dv_serialize_instruction(); 731 } 732 ia64_srlz_i(); 733} 734 735static inline void 736pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) 737{ 738 int i; 739 740 for (i=0; i < ndbrs; i++) { 741 ia64_set_dbr(i, dbrs[i]); 742 ia64_dv_serialize_data(); 743 } 744 ia64_srlz_d(); 745} 746 747/* 748 * PMD[i] must be a counter. no check is made 749 */ 750static inline unsigned long 751pfm_read_soft_counter(pfm_context_t *ctx, int i) 752{ 753 return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); 754} 755 756/* 757 * PMD[i] must be a counter. no check is made 758 */ 759static inline void 760pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) 761{ 762 unsigned long ovfl_val = pmu_conf->ovfl_val; 763 764 ctx->ctx_pmds[i].val = val & ~ovfl_val; 765 /* 766 * writing to unimplemented part is ignore, so we do not need to 767 * mask off top part 768 */ 769 ia64_set_pmd(i, val & ovfl_val); 770} 771 772static pfm_msg_t * 773pfm_get_new_msg(pfm_context_t *ctx) 774{ 775 int idx, next; 776 777 next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; 778 779 DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 780 if (next == ctx->ctx_msgq_head) return NULL; 781 782 idx = ctx->ctx_msgq_tail; 783 ctx->ctx_msgq_tail = next; 784 785 DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); 786 787 return ctx->ctx_msgq+idx; 788} 789 790static pfm_msg_t * 791pfm_get_next_msg(pfm_context_t *ctx) 792{ 793 pfm_msg_t *msg; 794 795 DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 796 797 if (PFM_CTXQ_EMPTY(ctx)) return NULL; 798 799 /* 800 * get oldest message 801 */ 802 msg = ctx->ctx_msgq+ctx->ctx_msgq_head; 803 804 /* 805 * and move forward 806 */ 807 ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; 808 809 DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); 810 811 return msg; 812} 813 814static void 815pfm_reset_msgq(pfm_context_t *ctx) 816{ 817 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 818 DPRINT(("ctx=%p msgq reset\n", ctx)); 819} 820 821static void * 822pfm_rvmalloc(unsigned long size) 823{ 824 void *mem; 825 unsigned long addr; 826 827 size = PAGE_ALIGN(size); 828 mem = vzalloc(size); 829 if (mem) { 830 //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); 831 addr = (unsigned long)mem; 832 while (size > 0) { 833 pfm_reserve_page(addr); 834 addr+=PAGE_SIZE; 835 size-=PAGE_SIZE; 836 } 837 } 838 return mem; 839} 840 841static void 842pfm_rvfree(void *mem, unsigned long size) 843{ 844 unsigned long addr; 845 846 if (mem) { 847 DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size)); 848 addr = (unsigned long) mem; 849 while ((long) size > 0) { 850 pfm_unreserve_page(addr); 851 addr+=PAGE_SIZE; 852 size-=PAGE_SIZE; 853 } 854 vfree(mem); 855 } 856 return; 857} 858 859static pfm_context_t * 860pfm_context_alloc(int ctx_flags) 861{ 862 pfm_context_t *ctx; 863 864 /* 865 * allocate context descriptor 866 * must be able to free with interrupts disabled 867 */ 868 ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); 869 if (ctx) { 870 DPRINT(("alloc ctx @%p\n", ctx)); 871 872 /* 873 * init context protection lock 874 */ 875 spin_lock_init(&ctx->ctx_lock); 876 877 /* 878 * context is unloaded 879 */ 880 ctx->ctx_state = PFM_CTX_UNLOADED; 881 882 /* 883 * initialization of context's flags 884 */ 885 ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; 886 ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; 887 ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; 888 /* 889 * will move to set properties 890 * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; 891 */ 892 893 /* 894 * init restart semaphore to locked 895 */ 896 init_completion(&ctx->ctx_restart_done); 897 898 /* 899 * activation is used in SMP only 900 */ 901 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 902 SET_LAST_CPU(ctx, -1); 903 904 /* 905 * initialize notification message queue 906 */ 907 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 908 init_waitqueue_head(&ctx->ctx_msgq_wait); 909 init_waitqueue_head(&ctx->ctx_zombieq); 910 911 } 912 return ctx; 913} 914 915static void 916pfm_context_free(pfm_context_t *ctx) 917{ 918 if (ctx) { 919 DPRINT(("free ctx @%p\n", ctx)); 920 kfree(ctx); 921 } 922} 923 924static void 925pfm_mask_monitoring(struct task_struct *task) 926{ 927 pfm_context_t *ctx = PFM_GET_CTX(task); 928 unsigned long mask, val, ovfl_mask; 929 int i; 930 931 DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task))); 932 933 ovfl_mask = pmu_conf->ovfl_val; 934 /* 935 * monitoring can only be masked as a result of a valid 936 * counter overflow. In UP, it means that the PMU still 937 * has an owner. Note that the owner can be different 938 * from the current task. However the PMU state belongs 939 * to the owner. 940 * In SMP, a valid overflow only happens when task is 941 * current. Therefore if we come here, we know that 942 * the PMU state belongs to the current task, therefore 943 * we can access the live registers. 944 * 945 * So in both cases, the live register contains the owner's 946 * state. We can ONLY touch the PMU registers and NOT the PSR. 947 * 948 * As a consequence to this call, the ctx->th_pmds[] array 949 * contains stale information which must be ignored 950 * when context is reloaded AND monitoring is active (see 951 * pfm_restart). 952 */ 953 mask = ctx->ctx_used_pmds[0]; 954 for (i = 0; mask; i++, mask>>=1) { 955 /* skip non used pmds */ 956 if ((mask & 0x1) == 0) continue; 957 val = ia64_get_pmd(i); 958 959 if (PMD_IS_COUNTING(i)) { 960 /* 961 * we rebuild the full 64 bit value of the counter 962 */ 963 ctx->ctx_pmds[i].val += (val & ovfl_mask); 964 } else { 965 ctx->ctx_pmds[i].val = val; 966 } 967 DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 968 i, 969 ctx->ctx_pmds[i].val, 970 val & ovfl_mask)); 971 } 972 /* 973 * mask monitoring by setting the privilege level to 0 974 * we cannot use psr.pp/psr.up for this, it is controlled by 975 * the user 976 * 977 * if task is current, modify actual registers, otherwise modify 978 * thread save state, i.e., what will be restored in pfm_load_regs() 979 */ 980 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 981 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 982 if ((mask & 0x1) == 0UL) continue; 983 ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); 984 ctx->th_pmcs[i] &= ~0xfUL; 985 DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 986 } 987 /* 988 * make all of this visible 989 */ 990 ia64_srlz_d(); 991} 992 993/* 994 * must always be done with task == current 995 * 996 * context must be in MASKED state when calling 997 */ 998static void 999pfm_restore_monitoring(struct task_struct *task) 1000{ 1001 pfm_context_t *ctx = PFM_GET_CTX(task); 1002 unsigned long mask, ovfl_mask; 1003 unsigned long psr, val; 1004 int i, is_system; 1005 1006 is_system = ctx->ctx_fl_system; 1007 ovfl_mask = pmu_conf->ovfl_val; 1008 1009 if (task != current) { 1010 printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current)); 1011 return; 1012 } 1013 if (ctx->ctx_state != PFM_CTX_MASKED) { 1014 printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, 1015 task_pid_nr(task), task_pid_nr(current), ctx->ctx_state); 1016 return; 1017 } 1018 psr = pfm_get_psr(); 1019 /* 1020 * monitoring is masked via the PMC. 1021 * As we restore their value, we do not want each counter to 1022 * restart right away. We stop monitoring using the PSR, 1023 * restore the PMC (and PMD) and then re-establish the psr 1024 * as it was. Note that there can be no pending overflow at 1025 * this point, because monitoring was MASKED. 1026 * 1027 * system-wide session are pinned and self-monitoring 1028 */ 1029 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 1030 /* disable dcr pp */ 1031 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 1032 pfm_clear_psr_pp(); 1033 } else { 1034 pfm_clear_psr_up(); 1035 } 1036 /* 1037 * first, we restore the PMD 1038 */ 1039 mask = ctx->ctx_used_pmds[0]; 1040 for (i = 0; mask; i++, mask>>=1) { 1041 /* skip non used pmds */ 1042 if ((mask & 0x1) == 0) continue; 1043 1044 if (PMD_IS_COUNTING(i)) { 1045 /* 1046 * we split the 64bit value according to 1047 * counter width 1048 */ 1049 val = ctx->ctx_pmds[i].val & ovfl_mask; 1050 ctx->ctx_pmds[i].val &= ~ovfl_mask; 1051 } else { 1052 val = ctx->ctx_pmds[i].val; 1053 } 1054 ia64_set_pmd(i, val); 1055 1056 DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 1057 i, 1058 ctx->ctx_pmds[i].val, 1059 val)); 1060 } 1061 /* 1062 * restore the PMCs 1063 */ 1064 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 1065 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 1066 if ((mask & 0x1) == 0UL) continue; 1067 ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1068 ia64_set_pmc(i, ctx->th_pmcs[i]); 1069 DPRINT(("[%d] pmc[%d]=0x%lx\n", 1070 task_pid_nr(task), i, ctx->th_pmcs[i])); 1071 } 1072 ia64_srlz_d(); 1073 1074 /* 1075 * must restore DBR/IBR because could be modified while masked 1076 * XXX: need to optimize 1077 */ 1078 if (ctx->ctx_fl_using_dbreg) { 1079 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 1080 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 1081 } 1082 1083 /* 1084 * now restore PSR 1085 */ 1086 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 1087 /* enable dcr pp */ 1088 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 1089 ia64_srlz_i(); 1090 } 1091 pfm_set_psr_l(psr); 1092} 1093 1094static inline void 1095pfm_save_pmds(unsigned long *pmds, unsigned long mask) 1096{ 1097 int i; 1098 1099 ia64_srlz_d(); 1100 1101 for (i=0; mask; i++, mask>>=1) { 1102 if (mask & 0x1) pmds[i] = ia64_get_pmd(i); 1103 } 1104} 1105 1106/* 1107 * reload from thread state (used for ctxw only) 1108 */ 1109static inline void 1110pfm_restore_pmds(unsigned long *pmds, unsigned long mask) 1111{ 1112 int i; 1113 unsigned long val, ovfl_val = pmu_conf->ovfl_val; 1114 1115 for (i=0; mask; i++, mask>>=1) { 1116 if ((mask & 0x1) == 0) continue; 1117 val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; 1118 ia64_set_pmd(i, val); 1119 } 1120 ia64_srlz_d(); 1121} 1122 1123/* 1124 * propagate PMD from context to thread-state 1125 */ 1126static inline void 1127pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) 1128{ 1129 unsigned long ovfl_val = pmu_conf->ovfl_val; 1130 unsigned long mask = ctx->ctx_all_pmds[0]; 1131 unsigned long val; 1132 int i; 1133 1134 DPRINT(("mask=0x%lx\n", mask)); 1135 1136 for (i=0; mask; i++, mask>>=1) { 1137 1138 val = ctx->ctx_pmds[i].val; 1139 1140 /* 1141 * We break up the 64 bit value into 2 pieces 1142 * the lower bits go to the machine state in the 1143 * thread (will be reloaded on ctxsw in). 1144 * The upper part stays in the soft-counter. 1145 */ 1146 if (PMD_IS_COUNTING(i)) { 1147 ctx->ctx_pmds[i].val = val & ~ovfl_val; 1148 val &= ovfl_val; 1149 } 1150 ctx->th_pmds[i] = val; 1151 1152 DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", 1153 i, 1154 ctx->th_pmds[i], 1155 ctx->ctx_pmds[i].val)); 1156 } 1157} 1158 1159/* 1160 * propagate PMC from context to thread-state 1161 */ 1162static inline void 1163pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) 1164{ 1165 unsigned long mask = ctx->ctx_all_pmcs[0]; 1166 int i; 1167 1168 DPRINT(("mask=0x%lx\n", mask)); 1169 1170 for (i=0; mask; i++, mask>>=1) { 1171 /* masking 0 with ovfl_val yields 0 */ 1172 ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1173 DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 1174 } 1175} 1176 1177 1178 1179static inline void 1180pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) 1181{ 1182 int i; 1183 1184 for (i=0; mask; i++, mask>>=1) { 1185 if ((mask & 0x1) == 0) continue; 1186 ia64_set_pmc(i, pmcs[i]); 1187 } 1188 ia64_srlz_d(); 1189} 1190 1191static inline int 1192pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) 1193{ 1194 return memcmp(a, b, sizeof(pfm_uuid_t)); 1195} 1196 1197static inline int 1198pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) 1199{ 1200 int ret = 0; 1201 if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); 1202 return ret; 1203} 1204 1205static inline int 1206pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) 1207{ 1208 int ret = 0; 1209 if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); 1210 return ret; 1211} 1212 1213 1214static inline int 1215pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, 1216 int cpu, void *arg) 1217{ 1218 int ret = 0; 1219 if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); 1220 return ret; 1221} 1222 1223static inline int 1224pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, 1225 int cpu, void *arg) 1226{ 1227 int ret = 0; 1228 if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); 1229 return ret; 1230} 1231 1232static inline int 1233pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1234{ 1235 int ret = 0; 1236 if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); 1237 return ret; 1238} 1239 1240static inline int 1241pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1242{ 1243 int ret = 0; 1244 if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); 1245 return ret; 1246} 1247 1248static pfm_buffer_fmt_t * 1249__pfm_find_buffer_fmt(pfm_uuid_t uuid) 1250{ 1251 struct list_head * pos; 1252 pfm_buffer_fmt_t * entry; 1253 1254 list_for_each(pos, &pfm_buffer_fmt_list) { 1255 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 1256 if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) 1257 return entry; 1258 } 1259 return NULL; 1260} 1261 1262/* 1263 * find a buffer format based on its uuid 1264 */ 1265static pfm_buffer_fmt_t * 1266pfm_find_buffer_fmt(pfm_uuid_t uuid) 1267{ 1268 pfm_buffer_fmt_t * fmt; 1269 spin_lock(&pfm_buffer_fmt_lock); 1270 fmt = __pfm_find_buffer_fmt(uuid); 1271 spin_unlock(&pfm_buffer_fmt_lock); 1272 return fmt; 1273} 1274 1275int 1276pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) 1277{ 1278 int ret = 0; 1279 1280 /* some sanity checks */ 1281 if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; 1282 1283 /* we need at least a handler */ 1284 if (fmt->fmt_handler == NULL) return -EINVAL; 1285 1286 /* 1287 * XXX: need check validity of fmt_arg_size 1288 */ 1289 1290 spin_lock(&pfm_buffer_fmt_lock); 1291 1292 if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { 1293 printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); 1294 ret = -EBUSY; 1295 goto out; 1296 } 1297 list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); 1298 printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); 1299 1300out: 1301 spin_unlock(&pfm_buffer_fmt_lock); 1302 return ret; 1303} 1304EXPORT_SYMBOL(pfm_register_buffer_fmt); 1305 1306int 1307pfm_unregister_buffer_fmt(pfm_uuid_t uuid) 1308{ 1309 pfm_buffer_fmt_t *fmt; 1310 int ret = 0; 1311 1312 spin_lock(&pfm_buffer_fmt_lock); 1313 1314 fmt = __pfm_find_buffer_fmt(uuid); 1315 if (!fmt) { 1316 printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); 1317 ret = -EINVAL; 1318 goto out; 1319 } 1320 list_del_init(&fmt->fmt_list); 1321 printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); 1322 1323out: 1324 spin_unlock(&pfm_buffer_fmt_lock); 1325 return ret; 1326 1327} 1328EXPORT_SYMBOL(pfm_unregister_buffer_fmt); 1329 1330extern void update_pal_halt_status(int); 1331 1332static int 1333pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) 1334{ 1335 unsigned long flags; 1336 /* 1337 * validity checks on cpu_mask have been done upstream 1338 */ 1339 LOCK_PFS(flags); 1340 1341 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1342 pfm_sessions.pfs_sys_sessions, 1343 pfm_sessions.pfs_task_sessions, 1344 pfm_sessions.pfs_sys_use_dbregs, 1345 is_syswide, 1346 cpu)); 1347 1348 if (is_syswide) { 1349 /* 1350 * cannot mix system wide and per-task sessions 1351 */ 1352 if (pfm_sessions.pfs_task_sessions > 0UL) { 1353 DPRINT(("system wide not possible, %u conflicting task_sessions\n", 1354 pfm_sessions.pfs_task_sessions)); 1355 goto abort; 1356 } 1357 1358 if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; 1359 1360 DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); 1361 1362 pfm_sessions.pfs_sys_session[cpu] = task; 1363 1364 pfm_sessions.pfs_sys_sessions++ ; 1365 1366 } else { 1367 if (pfm_sessions.pfs_sys_sessions) goto abort; 1368 pfm_sessions.pfs_task_sessions++; 1369 } 1370 1371 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1372 pfm_sessions.pfs_sys_sessions, 1373 pfm_sessions.pfs_task_sessions, 1374 pfm_sessions.pfs_sys_use_dbregs, 1375 is_syswide, 1376 cpu)); 1377 1378 /* 1379 * disable default_idle() to go to PAL_HALT 1380 */ 1381 update_pal_halt_status(0); 1382 1383 UNLOCK_PFS(flags); 1384 1385 return 0; 1386 1387error_conflict: 1388 DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", 1389 task_pid_nr(pfm_sessions.pfs_sys_session[cpu]), 1390 cpu)); 1391abort: 1392 UNLOCK_PFS(flags); 1393 1394 return -EBUSY; 1395 1396} 1397 1398static int 1399pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) 1400{ 1401 unsigned long flags; 1402 /* 1403 * validity checks on cpu_mask have been done upstream 1404 */ 1405 LOCK_PFS(flags); 1406 1407 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1408 pfm_sessions.pfs_sys_sessions, 1409 pfm_sessions.pfs_task_sessions, 1410 pfm_sessions.pfs_sys_use_dbregs, 1411 is_syswide, 1412 cpu)); 1413 1414 1415 if (is_syswide) { 1416 pfm_sessions.pfs_sys_session[cpu] = NULL; 1417 /* 1418 * would not work with perfmon+more than one bit in cpu_mask 1419 */ 1420 if (ctx && ctx->ctx_fl_using_dbreg) { 1421 if (pfm_sessions.pfs_sys_use_dbregs == 0) { 1422 printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); 1423 } else { 1424 pfm_sessions.pfs_sys_use_dbregs--; 1425 } 1426 } 1427 pfm_sessions.pfs_sys_sessions--; 1428 } else { 1429 pfm_sessions.pfs_task_sessions--; 1430 } 1431 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1432 pfm_sessions.pfs_sys_sessions, 1433 pfm_sessions.pfs_task_sessions, 1434 pfm_sessions.pfs_sys_use_dbregs, 1435 is_syswide, 1436 cpu)); 1437 1438 /* 1439 * if possible, enable default_idle() to go into PAL_HALT 1440 */ 1441 if (pfm_sessions.pfs_task_sessions == 0 && pfm_sessions.pfs_sys_sessions == 0) 1442 update_pal_halt_status(1); 1443 1444 UNLOCK_PFS(flags); 1445 1446 return 0; 1447} 1448 1449/* 1450 * removes virtual mapping of the sampling buffer. 1451 * IMPORTANT: cannot be called with interrupts disable, e.g. inside 1452 * a PROTECT_CTX() section. 1453 */ 1454static int 1455pfm_remove_smpl_mapping(void *vaddr, unsigned long size) 1456{ 1457 struct task_struct *task = current; 1458 int r; 1459 1460 /* sanity checks */ 1461 if (task->mm == NULL || size == 0UL || vaddr == NULL) { 1462 printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm); 1463 return -EINVAL; 1464 } 1465 1466 DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size)); 1467 1468 /* 1469 * does the actual unmapping 1470 */ 1471 r = vm_munmap((unsigned long)vaddr, size); 1472 1473 if (r !=0) { 1474 printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size); 1475 } 1476 1477 DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); 1478 1479 return 0; 1480} 1481 1482/* 1483 * free actual physical storage used by sampling buffer 1484 */ 1485#if 0 1486static int 1487pfm_free_smpl_buffer(pfm_context_t *ctx) 1488{ 1489 pfm_buffer_fmt_t *fmt; 1490 1491 if (ctx->ctx_smpl_hdr == NULL) goto invalid_free; 1492 1493 /* 1494 * we won't use the buffer format anymore 1495 */ 1496 fmt = ctx->ctx_buf_fmt; 1497 1498 DPRINT(("sampling buffer @%p size %lu vaddr=%p\n", 1499 ctx->ctx_smpl_hdr, 1500 ctx->ctx_smpl_size, 1501 ctx->ctx_smpl_vaddr)); 1502 1503 pfm_buf_fmt_exit(fmt, current, NULL, NULL); 1504 1505 /* 1506 * free the buffer 1507 */ 1508 pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size); 1509 1510 ctx->ctx_smpl_hdr = NULL; 1511 ctx->ctx_smpl_size = 0UL; 1512 1513 return 0; 1514 1515invalid_free: 1516 printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current)); 1517 return -EINVAL; 1518} 1519#endif 1520 1521static inline void 1522pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) 1523{ 1524 if (fmt == NULL) return; 1525 1526 pfm_buf_fmt_exit(fmt, current, NULL, NULL); 1527 1528} 1529 1530/* 1531 * pfmfs should _never_ be mounted by userland - too much of security hassle, 1532 * no real gain from having the whole whorehouse mounted. So we don't need 1533 * any operations on the root directory. However, we need a non-trivial 1534 * d_name - pfm: will go nicely and kill the special-casing in procfs. 1535 */ 1536static struct vfsmount *pfmfs_mnt __read_mostly; 1537 1538static int __init 1539init_pfm_fs(void) 1540{ 1541 int err = register_filesystem(&pfm_fs_type); 1542 if (!err) { 1543 pfmfs_mnt = kern_mount(&pfm_fs_type); 1544 err = PTR_ERR(pfmfs_mnt); 1545 if (IS_ERR(pfmfs_mnt)) 1546 unregister_filesystem(&pfm_fs_type); 1547 else 1548 err = 0; 1549 } 1550 return err; 1551} 1552 1553static ssize_t 1554pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) 1555{ 1556 pfm_context_t *ctx; 1557 pfm_msg_t *msg; 1558 ssize_t ret; 1559 unsigned long flags; 1560 DECLARE_WAITQUEUE(wait, current); 1561 if (PFM_IS_FILE(filp) == 0) { 1562 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1563 return -EINVAL; 1564 } 1565 1566 ctx = filp->private_data; 1567 if (ctx == NULL) { 1568 printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current)); 1569 return -EINVAL; 1570 } 1571 1572 /* 1573 * check even when there is no message 1574 */ 1575 if (size < sizeof(pfm_msg_t)) { 1576 DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); 1577 return -EINVAL; 1578 } 1579 1580 PROTECT_CTX(ctx, flags); 1581 1582 /* 1583 * put ourselves on the wait queue 1584 */ 1585 add_wait_queue(&ctx->ctx_msgq_wait, &wait); 1586 1587 1588 for(;;) { 1589 /* 1590 * check wait queue 1591 */ 1592 1593 set_current_state(TASK_INTERRUPTIBLE); 1594 1595 DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 1596 1597 ret = 0; 1598 if(PFM_CTXQ_EMPTY(ctx) == 0) break; 1599 1600 UNPROTECT_CTX(ctx, flags); 1601 1602 /* 1603 * check non-blocking read 1604 */ 1605 ret = -EAGAIN; 1606 if(filp->f_flags & O_NONBLOCK) break; 1607 1608 /* 1609 * check pending signals 1610 */ 1611 if(signal_pending(current)) { 1612 ret = -EINTR; 1613 break; 1614 } 1615 /* 1616 * no message, so wait 1617 */ 1618 schedule(); 1619 1620 PROTECT_CTX(ctx, flags); 1621 } 1622 DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret)); 1623 set_current_state(TASK_RUNNING); 1624 remove_wait_queue(&ctx->ctx_msgq_wait, &wait); 1625 1626 if (ret < 0) goto abort; 1627 1628 ret = -EINVAL; 1629 msg = pfm_get_next_msg(ctx); 1630 if (msg == NULL) { 1631 printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current)); 1632 goto abort_locked; 1633 } 1634 1635 DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); 1636 1637 ret = -EFAULT; 1638 if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); 1639 1640abort_locked: 1641 UNPROTECT_CTX(ctx, flags); 1642abort: 1643 return ret; 1644} 1645 1646static ssize_t 1647pfm_write(struct file *file, const char __user *ubuf, 1648 size_t size, loff_t *ppos) 1649{ 1650 DPRINT(("pfm_write called\n")); 1651 return -EINVAL; 1652} 1653 1654static unsigned int 1655pfm_poll(struct file *filp, poll_table * wait) 1656{ 1657 pfm_context_t *ctx; 1658 unsigned long flags; 1659 unsigned int mask = 0; 1660 1661 if (PFM_IS_FILE(filp) == 0) { 1662 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1663 return 0; 1664 } 1665 1666 ctx = filp->private_data; 1667 if (ctx == NULL) { 1668 printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current)); 1669 return 0; 1670 } 1671 1672 1673 DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd)); 1674 1675 poll_wait(filp, &ctx->ctx_msgq_wait, wait); 1676 1677 PROTECT_CTX(ctx, flags); 1678 1679 if (PFM_CTXQ_EMPTY(ctx) == 0) 1680 mask = POLLIN | POLLRDNORM; 1681 1682 UNPROTECT_CTX(ctx, flags); 1683 1684 DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask)); 1685 1686 return mask; 1687} 1688 1689static long 1690pfm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1691{ 1692 DPRINT(("pfm_ioctl called\n")); 1693 return -EINVAL; 1694} 1695 1696/* 1697 * interrupt cannot be masked when coming here 1698 */ 1699static inline int 1700pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) 1701{ 1702 int ret; 1703 1704 ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); 1705 1706 DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1707 task_pid_nr(current), 1708 fd, 1709 on, 1710 ctx->ctx_async_queue, ret)); 1711 1712 return ret; 1713} 1714 1715static int 1716pfm_fasync(int fd, struct file *filp, int on) 1717{ 1718 pfm_context_t *ctx; 1719 int ret; 1720 1721 if (PFM_IS_FILE(filp) == 0) { 1722 printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current)); 1723 return -EBADF; 1724 } 1725 1726 ctx = filp->private_data; 1727 if (ctx == NULL) { 1728 printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current)); 1729 return -EBADF; 1730 } 1731 /* 1732 * we cannot mask interrupts during this call because this may 1733 * may go to sleep if memory is not readily avalaible. 1734 * 1735 * We are protected from the conetxt disappearing by the get_fd()/put_fd() 1736 * done in caller. Serialization of this function is ensured by caller. 1737 */ 1738 ret = pfm_do_fasync(fd, filp, ctx, on); 1739 1740 1741 DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1742 fd, 1743 on, 1744 ctx->ctx_async_queue, ret)); 1745 1746 return ret; 1747} 1748 1749#ifdef CONFIG_SMP 1750/* 1751 * this function is exclusively called from pfm_close(). 1752 * The context is not protected at that time, nor are interrupts 1753 * on the remote CPU. That's necessary to avoid deadlocks. 1754 */ 1755static void 1756pfm_syswide_force_stop(void *info) 1757{ 1758 pfm_context_t *ctx = (pfm_context_t *)info; 1759 struct pt_regs *regs = task_pt_regs(current); 1760 struct task_struct *owner; 1761 unsigned long flags; 1762 int ret; 1763 1764 if (ctx->ctx_cpu != smp_processor_id()) { 1765 printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n", 1766 ctx->ctx_cpu, 1767 smp_processor_id()); 1768 return; 1769 } 1770 owner = GET_PMU_OWNER(); 1771 if (owner != ctx->ctx_task) { 1772 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", 1773 smp_processor_id(), 1774 task_pid_nr(owner), task_pid_nr(ctx->ctx_task)); 1775 return; 1776 } 1777 if (GET_PMU_CTX() != ctx) { 1778 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n", 1779 smp_processor_id(), 1780 GET_PMU_CTX(), ctx); 1781 return; 1782 } 1783 1784 DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task))); 1785 /* 1786 * the context is already protected in pfm_close(), we simply 1787 * need to mask interrupts to avoid a PMU interrupt race on 1788 * this CPU 1789 */ 1790 local_irq_save(flags); 1791 1792 ret = pfm_context_unload(ctx, NULL, 0, regs); 1793 if (ret) { 1794 DPRINT(("context_unload returned %d\n", ret)); 1795 } 1796 1797 /* 1798 * unmask interrupts, PMU interrupts are now spurious here 1799 */ 1800 local_irq_restore(flags); 1801} 1802 1803static void 1804pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) 1805{ 1806 int ret; 1807 1808 DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu)); 1809 ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 1); 1810 DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret)); 1811} 1812#endif /* CONFIG_SMP */ 1813 1814/* 1815 * called for each close(). Partially free resources. 1816 * When caller is self-monitoring, the context is unloaded. 1817 */ 1818static int 1819pfm_flush(struct file *filp, fl_owner_t id) 1820{ 1821 pfm_context_t *ctx; 1822 struct task_struct *task; 1823 struct pt_regs *regs; 1824 unsigned long flags; 1825 unsigned long smpl_buf_size = 0UL; 1826 void *smpl_buf_vaddr = NULL; 1827 int state, is_system; 1828 1829 if (PFM_IS_FILE(filp) == 0) { 1830 DPRINT(("bad magic for\n")); 1831 return -EBADF; 1832 } 1833 1834 ctx = filp->private_data; 1835 if (ctx == NULL) { 1836 printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current)); 1837 return -EBADF; 1838 } 1839 1840 /* 1841 * remove our file from the async queue, if we use this mode. 1842 * This can be done without the context being protected. We come 1843 * here when the context has become unreachable by other tasks. 1844 * 1845 * We may still have active monitoring at this point and we may 1846 * end up in pfm_overflow_handler(). However, fasync_helper() 1847 * operates with interrupts disabled and it cleans up the 1848 * queue. If the PMU handler is called prior to entering 1849 * fasync_helper() then it will send a signal. If it is 1850 * invoked after, it will find an empty queue and no 1851 * signal will be sent. In both case, we are safe 1852 */ 1853 PROTECT_CTX(ctx, flags); 1854 1855 state = ctx->ctx_state; 1856 is_system = ctx->ctx_fl_system; 1857 1858 task = PFM_CTX_TASK(ctx); 1859 regs = task_pt_regs(task); 1860 1861 DPRINT(("ctx_state=%d is_current=%d\n", 1862 state, 1863 task == current ? 1 : 0)); 1864 1865 /* 1866 * if state == UNLOADED, then task is NULL 1867 */ 1868 1869 /* 1870 * we must stop and unload because we are losing access to the context. 1871 */ 1872 if (task == current) { 1873#ifdef CONFIG_SMP 1874 /* 1875 * the task IS the owner but it migrated to another CPU: that's bad 1876 * but we must handle this cleanly. Unfortunately, the kernel does 1877 * not provide a mechanism to block migration (while the context is loaded). 1878 * 1879 * We need to release the resource on the ORIGINAL cpu. 1880 */ 1881 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 1882 1883 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 1884 /* 1885 * keep context protected but unmask interrupt for IPI 1886 */ 1887 local_irq_restore(flags); 1888 1889 pfm_syswide_cleanup_other_cpu(ctx); 1890 1891 /* 1892 * restore interrupt masking 1893 */ 1894 local_irq_save(flags); 1895 1896 /* 1897 * context is unloaded at this point 1898 */ 1899 } else 1900#endif /* CONFIG_SMP */ 1901 { 1902 1903 DPRINT(("forcing unload\n")); 1904 /* 1905 * stop and unload, returning with state UNLOADED 1906 * and session unreserved. 1907 */ 1908 pfm_context_unload(ctx, NULL, 0, regs); 1909 1910 DPRINT(("ctx_state=%d\n", ctx->ctx_state)); 1911 } 1912 } 1913 1914 /* 1915 * remove virtual mapping, if any, for the calling task. 1916 * cannot reset ctx field until last user is calling close(). 1917 * 1918 * ctx_smpl_vaddr must never be cleared because it is needed 1919 * by every task with access to the context 1920 * 1921 * When called from do_exit(), the mm context is gone already, therefore 1922 * mm is NULL, i.e., the VMA is already gone and we do not have to 1923 * do anything here 1924 */ 1925 if (ctx->ctx_smpl_vaddr && current->mm) { 1926 smpl_buf_vaddr = ctx->ctx_smpl_vaddr; 1927 smpl_buf_size = ctx->ctx_smpl_size; 1928 } 1929 1930 UNPROTECT_CTX(ctx, flags); 1931 1932 /* 1933 * if there was a mapping, then we systematically remove it 1934 * at this point. Cannot be done inside critical section 1935 * because some VM function reenables interrupts. 1936 * 1937 */ 1938 if (smpl_buf_vaddr) pfm_remove_smpl_mapping(smpl_buf_vaddr, smpl_buf_size); 1939 1940 return 0; 1941} 1942/* 1943 * called either on explicit close() or from exit_files(). 1944 * Only the LAST user of the file gets to this point, i.e., it is 1945 * called only ONCE. 1946 * 1947 * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero 1948 * (fput()),i.e, last task to access the file. Nobody else can access the 1949 * file at this point. 1950 * 1951 * When called from exit_files(), the VMA has been freed because exit_mm() 1952 * is executed before exit_files(). 1953 * 1954 * When called from exit_files(), the current task is not yet ZOMBIE but we 1955 * flush the PMU state to the context. 1956 */ 1957static int 1958pfm_close(struct inode *inode, struct file *filp) 1959{ 1960 pfm_context_t *ctx; 1961 struct task_struct *task; 1962 struct pt_regs *regs; 1963 DECLARE_WAITQUEUE(wait, current); 1964 unsigned long flags; 1965 unsigned long smpl_buf_size = 0UL; 1966 void *smpl_buf_addr = NULL; 1967 int free_possible = 1; 1968 int state, is_system; 1969 1970 DPRINT(("pfm_close called private=%p\n", filp->private_data)); 1971 1972 if (PFM_IS_FILE(filp) == 0) { 1973 DPRINT(("bad magic\n")); 1974 return -EBADF; 1975 } 1976 1977 ctx = filp->private_data; 1978 if (ctx == NULL) { 1979 printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current)); 1980 return -EBADF; 1981 } 1982 1983 PROTECT_CTX(ctx, flags); 1984 1985 state = ctx->ctx_state; 1986 is_system = ctx->ctx_fl_system; 1987 1988 task = PFM_CTX_TASK(ctx); 1989 regs = task_pt_regs(task); 1990 1991 DPRINT(("ctx_state=%d is_current=%d\n", 1992 state, 1993 task == current ? 1 : 0)); 1994 1995 /* 1996 * if task == current, then pfm_flush() unloaded the context 1997 */ 1998 if (state == PFM_CTX_UNLOADED) goto doit; 1999 2000 /* 2001 * context is loaded/masked and task != current, we need to 2002 * either force an unload or go zombie 2003 */ 2004 2005 /* 2006 * The task is currently blocked or will block after an overflow. 2007 * we must force it to wakeup to get out of the 2008 * MASKED state and transition to the unloaded state by itself. 2009 * 2010 * This situation is only possible for per-task mode 2011 */ 2012 if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) { 2013 2014 /* 2015 * set a "partial" zombie state to be checked 2016 * upon return from down() in pfm_handle_work(). 2017 * 2018 * We cannot use the ZOMBIE state, because it is checked 2019 * by pfm_load_regs() which is called upon wakeup from down(). 2020 * In such case, it would free the context and then we would 2021 * return to pfm_handle_work() which would access the 2022 * stale context. Instead, we set a flag invisible to pfm_load_regs() 2023 * but visible to pfm_handle_work(). 2024 * 2025 * For some window of time, we have a zombie context with 2026 * ctx_state = MASKED and not ZOMBIE 2027 */ 2028 ctx->ctx_fl_going_zombie = 1; 2029 2030 /* 2031 * force task to wake up from MASKED state 2032 */ 2033 complete(&ctx->ctx_restart_done); 2034 2035 DPRINT(("waking up ctx_state=%d\n", state)); 2036 2037 /* 2038 * put ourself to sleep waiting for the other 2039 * task to report completion 2040 * 2041 * the context is protected by mutex, therefore there 2042 * is no risk of being notified of completion before 2043 * begin actually on the waitq. 2044 */ 2045 set_current_state(TASK_INTERRUPTIBLE); 2046 add_wait_queue(&ctx->ctx_zombieq, &wait); 2047 2048 UNPROTECT_CTX(ctx, flags); 2049 2050 /* 2051 * XXX: check for signals : 2052 * - ok for explicit close 2053 * - not ok when coming from exit_files() 2054 */ 2055 schedule(); 2056 2057 2058 PROTECT_CTX(ctx, flags); 2059 2060 2061 remove_wait_queue(&ctx->ctx_zombieq, &wait); 2062 set_current_state(TASK_RUNNING); 2063 2064 /* 2065 * context is unloaded at this point 2066 */ 2067 DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); 2068 } 2069 else if (task != current) { 2070#ifdef CONFIG_SMP 2071 /* 2072 * switch context to zombie state 2073 */ 2074 ctx->ctx_state = PFM_CTX_ZOMBIE; 2075 2076 DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task))); 2077 /* 2078 * cannot free the context on the spot. deferred until 2079 * the task notices the ZOMBIE state 2080 */ 2081 free_possible = 0; 2082#else 2083 pfm_context_unload(ctx, NULL, 0, regs); 2084#endif 2085 } 2086 2087doit: 2088 /* reload state, may have changed during opening of critical section */ 2089 state = ctx->ctx_state; 2090 2091 /* 2092 * the context is still attached to a task (possibly current) 2093 * we cannot destroy it right now 2094 */ 2095 2096 /* 2097 * we must free the sampling buffer right here because 2098 * we cannot rely on it being cleaned up later by the 2099 * monitored task. It is not possible to free vmalloc'ed 2100 * memory in pfm_load_regs(). Instead, we remove the buffer 2101 * now. should there be subsequent PMU overflow originally 2102 * meant for sampling, the will be converted to spurious 2103 * and that's fine because the monitoring tools is gone anyway. 2104 */ 2105 if (ctx->ctx_smpl_hdr) { 2106 smpl_buf_addr = ctx->ctx_smpl_hdr; 2107 smpl_buf_size = ctx->ctx_smpl_size; 2108 /* no more sampling */ 2109 ctx->ctx_smpl_hdr = NULL; 2110 ctx->ctx_fl_is_sampling = 0; 2111 } 2112 2113 DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", 2114 state, 2115 free_possible, 2116 smpl_buf_addr, 2117 smpl_buf_size)); 2118 2119 if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); 2120 2121 /* 2122 * UNLOADED that the session has already been unreserved. 2123 */ 2124 if (state == PFM_CTX_ZOMBIE) { 2125 pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); 2126 } 2127 2128 /* 2129 * disconnect file descriptor from context must be done 2130 * before we unlock. 2131 */ 2132 filp->private_data = NULL; 2133 2134 /* 2135 * if we free on the spot, the context is now completely unreachable 2136 * from the callers side. The monitored task side is also cut, so we 2137 * can freely cut. 2138 * 2139 * If we have a deferred free, only the caller side is disconnected. 2140 */ 2141 UNPROTECT_CTX(ctx, flags); 2142 2143 /* 2144 * All memory free operations (especially for vmalloc'ed memory) 2145 * MUST be done with interrupts ENABLED. 2146 */ 2147 if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size); 2148 2149 /* 2150 * return the memory used by the context 2151 */ 2152 if (free_possible) pfm_context_free(ctx); 2153 2154 return 0; 2155} 2156 2157static int 2158pfm_no_open(struct inode *irrelevant, struct file *dontcare) 2159{ 2160 DPRINT(("pfm_no_open called\n")); 2161 return -ENXIO; 2162} 2163 2164 2165 2166static const struct file_operations pfm_file_ops = { 2167 .llseek = no_llseek, 2168 .read = pfm_read, 2169 .write = pfm_write, 2170 .poll = pfm_poll, 2171 .unlocked_ioctl = pfm_ioctl, 2172 .open = pfm_no_open, /* special open code to disallow open via /proc */ 2173 .fasync = pfm_fasync, 2174 .release = pfm_close, 2175 .flush = pfm_flush 2176}; 2177 2178static int 2179pfmfs_delete_dentry(const struct dentry *dentry) 2180{ 2181 return 1; 2182} 2183 2184static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) 2185{ 2186 return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]", 2187 dentry->d_inode->i_ino); 2188} 2189 2190static const struct dentry_operations pfmfs_dentry_operations = { 2191 .d_delete = pfmfs_delete_dentry, 2192 .d_dname = pfmfs_dname, 2193}; 2194 2195 2196static struct file * 2197pfm_alloc_file(pfm_context_t *ctx) 2198{ 2199 struct file *file; 2200 struct inode *inode; 2201 struct path path; 2202 struct qstr this = { .name = "" }; 2203 2204 /* 2205 * allocate a new inode 2206 */ 2207 inode = new_inode(pfmfs_mnt->mnt_sb); 2208 if (!inode) 2209 return ERR_PTR(-ENOMEM); 2210 2211 DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); 2212 2213 inode->i_mode = S_IFCHR|S_IRUGO; 2214 inode->i_uid = current_fsuid(); 2215 inode->i_gid = current_fsgid(); 2216 2217 /* 2218 * allocate a new dcache entry 2219 */ 2220 path.dentry = d_alloc(pfmfs_mnt->mnt_root, &this); 2221 if (!path.dentry) { 2222 iput(inode); 2223 return ERR_PTR(-ENOMEM); 2224 } 2225 path.mnt = mntget(pfmfs_mnt); 2226 2227 d_add(path.dentry, inode); 2228 2229 file = alloc_file(&path, FMODE_READ, &pfm_file_ops); 2230 if (!file) { 2231 path_put(&path); 2232 return ERR_PTR(-ENFILE); 2233 } 2234 2235 file->f_flags = O_RDONLY; 2236 file->private_data = ctx; 2237 2238 return file; 2239} 2240 2241static int 2242pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) 2243{ 2244 DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); 2245 2246 while (size > 0) { 2247 unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; 2248 2249 2250 if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) 2251 return -ENOMEM; 2252 2253 addr += PAGE_SIZE; 2254 buf += PAGE_SIZE; 2255 size -= PAGE_SIZE; 2256 } 2257 return 0; 2258} 2259 2260/* 2261 * allocate a sampling buffer and remaps it into the user address space of the task 2262 */ 2263static int 2264pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) 2265{ 2266 struct mm_struct *mm = task->mm; 2267 struct vm_area_struct *vma = NULL; 2268 unsigned long size; 2269 void *smpl_buf; 2270 2271 2272 /* 2273 * the fixed header + requested size and align to page boundary 2274 */ 2275 size = PAGE_ALIGN(rsize); 2276 2277 DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); 2278 2279 /* 2280 * check requested size to avoid Denial-of-service attacks 2281 * XXX: may have to refine this test 2282 * Check against address space limit. 2283 * 2284 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) 2285 * return -ENOMEM; 2286 */ 2287 if (size > task_rlimit(task, RLIMIT_MEMLOCK)) 2288 return -ENOMEM; 2289 2290 /* 2291 * We do the easy to undo allocations first. 2292 * 2293 * pfm_rvmalloc(), clears the buffer, so there is no leak 2294 */ 2295 smpl_buf = pfm_rvmalloc(size); 2296 if (smpl_buf == NULL) { 2297 DPRINT(("Can't allocate sampling buffer\n")); 2298 return -ENOMEM; 2299 } 2300 2301 DPRINT(("smpl_buf @%p\n", smpl_buf)); 2302 2303 /* allocate vma */ 2304 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 2305 if (!vma) { 2306 DPRINT(("Cannot allocate vma\n")); 2307 goto error_kmem; 2308 } 2309 INIT_LIST_HEAD(&vma->anon_vma_chain); 2310 2311 /* 2312 * partially initialize the vma for the sampling buffer 2313 */ 2314 vma->vm_mm = mm; 2315 vma->vm_file = filp; 2316 vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; 2317 vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ 2318 2319 /* 2320 * Now we have everything we need and we can initialize 2321 * and connect all the data structures 2322 */ 2323 2324 ctx->ctx_smpl_hdr = smpl_buf; 2325 ctx->ctx_smpl_size = size; /* aligned size */ 2326 2327 /* 2328 * Let's do the difficult operations next. 2329 * 2330 * now we atomically find some area in the address space and 2331 * remap the buffer in it. 2332 */ 2333 down_write(&task->mm->mmap_sem); 2334 2335 /* find some free area in address space, must have mmap sem held */ 2336 vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); 2337 if (vma->vm_start == 0UL) { 2338 DPRINT(("Cannot find unmapped area for size %ld\n", size)); 2339 up_write(&task->mm->mmap_sem); 2340 goto error; 2341 } 2342 vma->vm_end = vma->vm_start + size; 2343 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; 2344 2345 DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); 2346 2347 /* can only be applied to current task, need to have the mm semaphore held when called */ 2348 if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { 2349 DPRINT(("Can't remap buffer\n")); 2350 up_write(&task->mm->mmap_sem); 2351 goto error; 2352 } 2353 2354 get_file(filp); 2355 2356 /* 2357 * now insert the vma in the vm list for the process, must be 2358 * done with mmap lock held 2359 */ 2360 insert_vm_struct(mm, vma); 2361 2362 mm->total_vm += size >> PAGE_SHIFT; 2363 vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, 2364 vma_pages(vma)); 2365 up_write(&task->mm->mmap_sem); 2366 2367 /* 2368 * keep track of user level virtual address 2369 */ 2370 ctx->ctx_smpl_vaddr = (void *)vma->vm_start; 2371 *(unsigned long *)user_vaddr = vma->vm_start; 2372 2373 return 0; 2374 2375error: 2376 kmem_cache_free(vm_area_cachep, vma); 2377error_kmem: 2378 pfm_rvfree(smpl_buf, size); 2379 2380 return -ENOMEM; 2381} 2382 2383/* 2384 * XXX: do something better here 2385 */ 2386static int 2387pfm_bad_permissions(struct task_struct *task) 2388{ 2389 const struct cred *tcred; 2390 uid_t uid = current_uid(); 2391 gid_t gid = current_gid(); 2392 int ret; 2393 2394 rcu_read_lock(); 2395 tcred = __task_cred(task); 2396 2397 /* inspired by ptrace_attach() */ 2398 DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", 2399 uid, 2400 gid, 2401 tcred->euid, 2402 tcred->suid, 2403 tcred->uid, 2404 tcred->egid, 2405 tcred->sgid)); 2406 2407 ret = ((uid != tcred->euid) 2408 || (uid != tcred->suid) 2409 || (uid != tcred->uid) 2410 || (gid != tcred->egid) 2411 || (gid != tcred->sgid) 2412 || (gid != tcred->gid)) && !capable(CAP_SYS_PTRACE); 2413 2414 rcu_read_unlock(); 2415 return ret; 2416} 2417 2418static int 2419pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) 2420{ 2421 int ctx_flags; 2422 2423 /* valid signal */ 2424 2425 ctx_flags = pfx->ctx_flags; 2426 2427 if (ctx_flags & PFM_FL_SYSTEM_WIDE) { 2428 2429 /* 2430 * cannot block in this mode 2431 */ 2432 if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { 2433 DPRINT(("cannot use blocking mode when in system wide monitoring\n")); 2434 return -EINVAL; 2435 } 2436 } else { 2437 } 2438 /* probably more to add here */ 2439 2440 return 0; 2441} 2442 2443static int 2444pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned int ctx_flags, 2445 unsigned int cpu, pfarg_context_t *arg) 2446{ 2447 pfm_buffer_fmt_t *fmt = NULL; 2448 unsigned long size = 0UL; 2449 void *uaddr = NULL; 2450 void *fmt_arg = NULL; 2451 int ret = 0; 2452#define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) 2453 2454 /* invoke and lock buffer format, if found */ 2455 fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); 2456 if (fmt == NULL) { 2457 DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task))); 2458 return -EINVAL; 2459 } 2460 2461 /* 2462 * buffer argument MUST be contiguous to pfarg_context_t 2463 */ 2464 if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); 2465 2466 ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); 2467 2468 DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret)); 2469 2470 if (ret) goto error; 2471 2472 /* link buffer format and context */ 2473 ctx->ctx_buf_fmt = fmt; 2474 ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */ 2475 2476 /* 2477 * check if buffer format wants to use perfmon buffer allocation/mapping service 2478 */ 2479 ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); 2480 if (ret) goto error; 2481 2482 if (size) { 2483 /* 2484 * buffer is always remapped into the caller's address space 2485 */ 2486 ret = pfm_smpl_buffer_alloc(current, filp, ctx, size, &uaddr); 2487 if (ret) goto error; 2488 2489 /* keep track of user address of buffer */ 2490 arg->ctx_smpl_vaddr = uaddr; 2491 } 2492 ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg); 2493 2494error: 2495 return ret; 2496} 2497 2498static void 2499pfm_reset_pmu_state(pfm_context_t *ctx) 2500{ 2501 int i; 2502 2503 /* 2504 * install reset values for PMC. 2505 */ 2506 for (i=1; PMC_IS_LAST(i) == 0; i++) { 2507 if (PMC_IS_IMPL(i) == 0) continue; 2508 ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); 2509 DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); 2510 } 2511 /* 2512 * PMD registers are set to 0UL when the context in memset() 2513 */ 2514 2515 /* 2516 * On context switched restore, we must restore ALL pmc and ALL pmd even 2517 * when they are not actively used by the task. In UP, the incoming process 2518 * may otherwise pick up left over PMC, PMD state from the previous process. 2519 * As opposed to PMD, stale PMC can cause harm to the incoming 2520 * process because they may change what is being measured. 2521 * Therefore, we must systematically reinstall the entire 2522 * PMC state. In SMP, the same thing is possible on the 2523 * same CPU but also on between 2 CPUs. 2524 * 2525 * The problem with PMD is information leaking especially 2526 * to user level when psr.sp=0 2527 * 2528 * There is unfortunately no easy way to avoid this problem 2529 * on either UP or SMP. This definitively slows down the 2530 * pfm_load_regs() function. 2531 */ 2532 2533 /* 2534 * bitmask of all PMCs accessible to this context 2535 * 2536 * PMC0 is treated differently. 2537 */ 2538 ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; 2539 2540 /* 2541 * bitmask of all PMDs that are accessible to this context 2542 */ 2543 ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; 2544 2545 DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); 2546 2547 /* 2548 * useful in case of re-enable after disable 2549 */ 2550 ctx->ctx_used_ibrs[0] = 0UL; 2551 ctx->ctx_used_dbrs[0] = 0UL; 2552} 2553 2554static int 2555pfm_ctx_getsize(void *arg, size_t *sz) 2556{ 2557 pfarg_context_t *req = (pfarg_context_t *)arg; 2558 pfm_buffer_fmt_t *fmt; 2559 2560 *sz = 0; 2561 2562 if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0; 2563 2564 fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id); 2565 if (fmt == NULL) { 2566 DPRINT(("cannot find buffer format\n")); 2567 return -EINVAL; 2568 } 2569 /* get just enough to copy in user parameters */ 2570 *sz = fmt->fmt_arg_size; 2571 DPRINT(("arg_size=%lu\n", *sz)); 2572 2573 return 0; 2574} 2575 2576 2577 2578/* 2579 * cannot attach if : 2580 * - kernel task 2581 * - task not owned by caller 2582 * - task incompatible with context mode 2583 */ 2584static int 2585pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) 2586{ 2587 /* 2588 * no kernel task or task not owner by caller 2589 */ 2590 if (task->mm == NULL) { 2591 DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task))); 2592 return -EPERM; 2593 } 2594 if (pfm_bad_permissions(task)) { 2595 DPRINT(("no permission to attach to [%d]\n", task_pid_nr(task))); 2596 return -EPERM; 2597 } 2598 /* 2599 * cannot block in self-monitoring mode 2600 */ 2601 if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { 2602 DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task))); 2603 return -EINVAL; 2604 } 2605 2606 if (task->exit_state == EXIT_ZOMBIE) { 2607 DPRINT(("cannot attach to zombie task [%d]\n", task_pid_nr(task))); 2608 return -EBUSY; 2609 } 2610 2611 /* 2612 * always ok for self 2613 */ 2614 if (task == current) return 0; 2615 2616 if (!task_is_stopped_or_traced(task)) { 2617 DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state)); 2618 return -EBUSY; 2619 } 2620 /* 2621 * make sure the task is off any CPU 2622 */ 2623 wait_task_inactive(task, 0); 2624 2625 /* more to come... */ 2626 2627 return 0; 2628} 2629 2630static int 2631pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) 2632{ 2633 struct task_struct *p = current; 2634 int ret; 2635 2636 /* XXX: need to add more checks here */ 2637 if (pid < 2) return -EPERM; 2638 2639 if (pid != task_pid_vnr(current)) { 2640 2641 read_lock(&tasklist_lock); 2642 2643 p = find_task_by_vpid(pid); 2644 2645 /* make sure task cannot go away while we operate on it */ 2646 if (p) get_task_struct(p); 2647 2648 read_unlock(&tasklist_lock); 2649 2650 if (p == NULL) return -ESRCH; 2651 } 2652 2653 ret = pfm_task_incompatible(ctx, p); 2654 if (ret == 0) { 2655 *task = p; 2656 } else if (p != current) { 2657 pfm_put_task(p); 2658 } 2659 return ret; 2660} 2661 2662 2663 2664static int 2665pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2666{ 2667 pfarg_context_t *req = (pfarg_context_t *)arg; 2668 struct file *filp; 2669 struct path path; 2670 int ctx_flags; 2671 int fd; 2672 int ret; 2673 2674 /* let's check the arguments first */ 2675 ret = pfarg_is_sane(current, req); 2676 if (ret < 0) 2677 return ret; 2678 2679 ctx_flags = req->ctx_flags; 2680 2681 ret = -ENOMEM; 2682 2683 fd = get_unused_fd(); 2684 if (fd < 0) 2685 return fd; 2686 2687 ctx = pfm_context_alloc(ctx_flags); 2688 if (!ctx) 2689 goto error; 2690 2691 filp = pfm_alloc_file(ctx); 2692 if (IS_ERR(filp)) { 2693 ret = PTR_ERR(filp); 2694 goto error_file; 2695 } 2696 2697 req->ctx_fd = ctx->ctx_fd = fd; 2698 2699 /* 2700 * does the user want to sample? 2701 */ 2702 if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { 2703 ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req); 2704 if (ret) 2705 goto buffer_error; 2706 } 2707 2708 DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d\n", 2709 ctx, 2710 ctx_flags, 2711 ctx->ctx_fl_system, 2712 ctx->ctx_fl_block, 2713 ctx->ctx_fl_excl_idle, 2714 ctx->ctx_fl_no_msg, 2715 ctx->ctx_fd)); 2716 2717 /* 2718 * initialize soft PMU state 2719 */ 2720 pfm_reset_pmu_state(ctx); 2721 2722 fd_install(fd, filp); 2723 2724 return 0; 2725 2726buffer_error: 2727 path = filp->f_path; 2728 put_filp(filp); 2729 path_put(&path); 2730 2731 if (ctx->ctx_buf_fmt) { 2732 pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); 2733 } 2734error_file: 2735 pfm_context_free(ctx); 2736 2737error: 2738 put_unused_fd(fd); 2739 return ret; 2740} 2741 2742static inline unsigned long 2743pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) 2744{ 2745 unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; 2746 unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; 2747 extern unsigned long carta_random32 (unsigned long seed); 2748 2749 if (reg->flags & PFM_REGFL_RANDOM) { 2750 new_seed = carta_random32(old_seed); 2751 val -= (old_seed & mask); /* counter values are negative numbers! */ 2752 if ((mask >> 32) != 0) 2753 /* construct a full 64-bit random value: */ 2754 new_seed |= carta_random32(old_seed >> 32) << 32; 2755 reg->seed = new_seed; 2756 } 2757 reg->lval = val; 2758 return val; 2759} 2760 2761static void 2762pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2763{ 2764 unsigned long mask = ovfl_regs[0]; 2765 unsigned long reset_others = 0UL; 2766 unsigned long val; 2767 int i; 2768 2769 /* 2770 * now restore reset value on sampling overflowed counters 2771 */ 2772 mask >>= PMU_FIRST_COUNTER; 2773 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2774 2775 if ((mask & 0x1UL) == 0UL) continue; 2776 2777 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2778 reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2779 2780 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2781 } 2782 2783 /* 2784 * Now take care of resetting the other registers 2785 */ 2786 for(i = 0; reset_others; i++, reset_others >>= 1) { 2787 2788 if ((reset_others & 0x1) == 0) continue; 2789 2790 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2791 2792 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2793 is_long_reset ? "long" : "short", i, val)); 2794 } 2795} 2796 2797static void 2798pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2799{ 2800 unsigned long mask = ovfl_regs[0]; 2801 unsigned long reset_others = 0UL; 2802 unsigned long val; 2803 int i; 2804 2805 DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); 2806 2807 if (ctx->ctx_state == PFM_CTX_MASKED) { 2808 pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset); 2809 return; 2810 } 2811 2812 /* 2813 * now restore reset value on sampling overflowed counters 2814 */ 2815 mask >>= PMU_FIRST_COUNTER; 2816 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2817 2818 if ((mask & 0x1UL) == 0UL) continue; 2819 2820 val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2821 reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2822 2823 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2824 2825 pfm_write_soft_counter(ctx, i, val); 2826 } 2827 2828 /* 2829 * Now take care of resetting the other registers 2830 */ 2831 for(i = 0; reset_others; i++, reset_others >>= 1) { 2832 2833 if ((reset_others & 0x1) == 0) continue; 2834 2835 val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2836 2837 if (PMD_IS_COUNTING(i)) { 2838 pfm_write_soft_counter(ctx, i, val); 2839 } else { 2840 ia64_set_pmd(i, val); 2841 } 2842 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2843 is_long_reset ? "long" : "short", i, val)); 2844 } 2845 ia64_srlz_d(); 2846} 2847 2848static int 2849pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2850{ 2851 struct task_struct *task; 2852 pfarg_reg_t *req = (pfarg_reg_t *)arg; 2853 unsigned long value, pmc_pm; 2854 unsigned long smpl_pmds, reset_pmds, impl_pmds; 2855 unsigned int cnum, reg_flags, flags, pmc_type; 2856 int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; 2857 int is_monitor, is_counting, state; 2858 int ret = -EINVAL; 2859 pfm_reg_check_t wr_func; 2860#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) 2861 2862 state = ctx->ctx_state; 2863 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 2864 is_system = ctx->ctx_fl_system; 2865 task = ctx->ctx_task; 2866 impl_pmds = pmu_conf->impl_pmds[0]; 2867 2868 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 2869 2870 if (is_loaded) { 2871 /* 2872 * In system wide and when the context is loaded, access can only happen 2873 * when the caller is running on the CPU being monitored by the session. 2874 * It does not have to be the owner (ctx_task) of the context per se. 2875 */ 2876 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 2877 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 2878 return -EBUSY; 2879 } 2880 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 2881 } 2882 expert_mode = pfm_sysctl.expert_mode; 2883 2884 for (i = 0; i < count; i++, req++) { 2885 2886 cnum = req->reg_num; 2887 reg_flags = req->reg_flags; 2888 value = req->reg_value; 2889 smpl_pmds = req->reg_smpl_pmds[0]; 2890 reset_pmds = req->reg_reset_pmds[0]; 2891 flags = 0; 2892 2893 2894 if (cnum >= PMU_MAX_PMCS) { 2895 DPRINT(("pmc%u is invalid\n", cnum)); 2896 goto error; 2897 } 2898 2899 pmc_type = pmu_conf->pmc_desc[cnum].type; 2900 pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; 2901 is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; 2902 is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; 2903 2904 /* 2905 * we reject all non implemented PMC as well 2906 * as attempts to modify PMC[0-3] which are used 2907 * as status registers by the PMU 2908 */ 2909 if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { 2910 DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); 2911 goto error; 2912 } 2913 wr_func = pmu_conf->pmc_desc[cnum].write_check; 2914 /* 2915 * If the PMC is a monitor, then if the value is not the default: 2916 * - system-wide session: PMCx.pm=1 (privileged monitor) 2917 * - per-task : PMCx.pm=0 (user monitor) 2918 */ 2919 if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { 2920 DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", 2921 cnum, 2922 pmc_pm, 2923 is_system)); 2924 goto error; 2925 } 2926 2927 if (is_counting) { 2928 /* 2929 * enforce generation of overflow interrupt. Necessary on all 2930 * CPUs. 2931 */ 2932 value |= 1 << PMU_PMC_OI; 2933 2934 if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { 2935 flags |= PFM_REGFL_OVFL_NOTIFY; 2936 } 2937 2938 if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; 2939 2940 /* verify validity of smpl_pmds */ 2941 if ((smpl_pmds & impl_pmds) != smpl_pmds) { 2942 DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); 2943 goto error; 2944 } 2945 2946 /* verify validity of reset_pmds */ 2947 if ((reset_pmds & impl_pmds) != reset_pmds) { 2948 DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); 2949 goto error; 2950 } 2951 } else { 2952 if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { 2953 DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); 2954 goto error; 2955 } 2956 /* eventid on non-counting monitors are ignored */ 2957 } 2958 2959 /* 2960 * execute write checker, if any 2961 */ 2962 if (likely(expert_mode == 0 && wr_func)) { 2963 ret = (*wr_func)(task, ctx, cnum, &value, regs); 2964 if (ret) goto error; 2965 ret = -EINVAL; 2966 } 2967 2968 /* 2969 * no error on this register 2970 */ 2971 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 2972 2973 /* 2974 * Now we commit the changes to the software state 2975 */ 2976 2977 /* 2978 * update overflow information 2979 */ 2980 if (is_counting) { 2981 /* 2982 * full flag update each time a register is programmed 2983 */ 2984 ctx->ctx_pmds[cnum].flags = flags; 2985 2986 ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds; 2987 ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; 2988 ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid; 2989 2990 /* 2991 * Mark all PMDS to be accessed as used. 2992 * 2993 * We do not keep track of PMC because we have to 2994 * systematically restore ALL of them. 2995 * 2996 * We do not update the used_monitors mask, because 2997 * if we have not programmed them, then will be in 2998 * a quiescent state, therefore we will not need to 2999 * mask/restore then when context is MASKED. 3000 */ 3001 CTX_USED_PMD(ctx, reset_pmds); 3002 CTX_USED_PMD(ctx, smpl_pmds); 3003 /* 3004 * make sure we do not try to reset on 3005 * restart because we have established new values 3006 */ 3007 if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 3008 } 3009 /* 3010 * Needed in case the user does not initialize the equivalent 3011 * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no 3012 * possible leak here. 3013 */ 3014 CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); 3015 3016 /* 3017 * keep track of the monitor PMC that we are using. 3018 * we save the value of the pmc in ctx_pmcs[] and if 3019 * the monitoring is not stopped for the context we also 3020 * place it in the saved state area so that it will be 3021 * picked up later by the context switch code. 3022 * 3023 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). 3024 * 3025 * The value in th_pmcs[] may be modified on overflow, i.e., when 3026 * monitoring needs to be stopped. 3027 */ 3028 if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); 3029 3030 /* 3031 * update context state 3032 */ 3033 ctx->ctx_pmcs[cnum] = value; 3034 3035 if (is_loaded) { 3036 /* 3037 * write thread state 3038 */ 3039 if (is_system == 0) ctx->th_pmcs[cnum] = value; 3040 3041 /* 3042 * write hardware register if we can 3043 */ 3044 if (can_access_pmu) { 3045 ia64_set_pmc(cnum, value); 3046 } 3047#ifdef CONFIG_SMP 3048 else { 3049 /* 3050 * per-task SMP only here 3051 * 3052 * we are guaranteed that the task is not running on the other CPU, 3053 * we indicate that this PMD will need to be reloaded if the task 3054 * is rescheduled on the CPU it ran last on. 3055 */ 3056 ctx->ctx_reload_pmcs[0] |= 1UL << cnum; 3057 } 3058#endif 3059 } 3060 3061 DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", 3062 cnum, 3063 value, 3064 is_loaded, 3065 can_access_pmu, 3066 flags, 3067 ctx->ctx_all_pmcs[0], 3068 ctx->ctx_used_pmds[0], 3069 ctx->ctx_pmds[cnum].eventid, 3070 smpl_pmds, 3071 reset_pmds, 3072 ctx->ctx_reload_pmcs[0], 3073 ctx->ctx_used_monitors[0], 3074 ctx->ctx_ovfl_regs[0])); 3075 } 3076 3077 /* 3078 * make sure the changes are visible 3079 */ 3080 if (can_access_pmu) ia64_srlz_d(); 3081 3082 return 0; 3083error: 3084 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3085 return ret; 3086} 3087 3088static int 3089pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3090{ 3091 struct task_struct *task; 3092 pfarg_reg_t *req = (pfarg_reg_t *)arg; 3093 unsigned long value, hw_value, ovfl_mask; 3094 unsigned int cnum; 3095 int i, can_access_pmu = 0, state; 3096 int is_counting, is_loaded, is_system, expert_mode; 3097 int ret = -EINVAL; 3098 pfm_reg_check_t wr_func; 3099 3100 3101 state = ctx->ctx_state; 3102 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3103 is_system = ctx->ctx_fl_system; 3104 ovfl_mask = pmu_conf->ovfl_val; 3105 task = ctx->ctx_task; 3106 3107 if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; 3108 3109 /* 3110 * on both UP and SMP, we can only write to the PMC when the task is 3111 * the owner of the local PMU. 3112 */ 3113 if (likely(is_loaded)) { 3114 /* 3115 * In system wide and when the context is loaded, access can only happen 3116 * when the caller is running on the CPU being monitored by the session. 3117 * It does not have to be the owner (ctx_task) of the context per se. 3118 */ 3119 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3120 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3121 return -EBUSY; 3122 } 3123 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3124 } 3125 expert_mode = pfm_sysctl.expert_mode; 3126 3127 for (i = 0; i < count; i++, req++) { 3128 3129 cnum = req->reg_num; 3130 value = req->reg_value; 3131 3132 if (!PMD_IS_IMPL(cnum)) { 3133 DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); 3134 goto abort_mission; 3135 } 3136 is_counting = PMD_IS_COUNTING(cnum); 3137 wr_func = pmu_conf->pmd_desc[cnum].write_check; 3138 3139 /* 3140 * execute write checker, if any 3141 */ 3142 if (unlikely(expert_mode == 0 && wr_func)) { 3143 unsigned long v = value; 3144 3145 ret = (*wr_func)(task, ctx, cnum, &v, regs); 3146 if (ret) goto abort_mission; 3147 3148 value = v; 3149 ret = -EINVAL; 3150 } 3151 3152 /* 3153 * no error on this register 3154 */ 3155 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 3156 3157 /* 3158 * now commit changes to software state 3159 */ 3160 hw_value = value; 3161 3162 /* 3163 * update virtualized (64bits) counter 3164 */ 3165 if (is_counting) { 3166 /* 3167 * write context state 3168 */ 3169 ctx->ctx_pmds[cnum].lval = value; 3170 3171 /* 3172 * when context is load we use the split value 3173 */ 3174 if (is_loaded) { 3175 hw_value = value & ovfl_mask; 3176 value = value & ~ovfl_mask; 3177 } 3178 } 3179 /* 3180 * update reset values (not just for counters) 3181 */ 3182 ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset; 3183 ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset; 3184 3185 /* 3186 * update randomization parameters (not just for counters) 3187 */ 3188 ctx->ctx_pmds[cnum].seed = req->reg_random_seed; 3189 ctx->ctx_pmds[cnum].mask = req->reg_random_mask; 3190 3191 /* 3192 * update context value 3193 */ 3194 ctx->ctx_pmds[cnum].val = value; 3195 3196 /* 3197 * Keep track of what we use 3198 * 3199 * We do not keep track of PMC because we have to 3200 * systematically restore ALL of them. 3201 */ 3202 CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum)); 3203 3204 /* 3205 * mark this PMD register used as well 3206 */ 3207 CTX_USED_PMD(ctx, RDEP(cnum)); 3208 3209 /* 3210 * make sure we do not try to reset on 3211 * restart because we have established new values 3212 */ 3213 if (is_counting && state == PFM_CTX_MASKED) { 3214 ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 3215 } 3216 3217 if (is_loaded) { 3218 /* 3219 * write thread state 3220 */ 3221 if (is_system == 0) ctx->th_pmds[cnum] = hw_value; 3222 3223 /* 3224 * write hardware register if we can 3225 */ 3226 if (can_access_pmu) { 3227 ia64_set_pmd(cnum, hw_value); 3228 } else { 3229#ifdef CONFIG_SMP 3230 /* 3231 * we are guaranteed that the task is not running on the other CPU, 3232 * we indicate that this PMD will need to be reloaded if the task 3233 * is rescheduled on the CPU it ran last on. 3234 */ 3235 ctx->ctx_reload_pmds[0] |= 1UL << cnum; 3236#endif 3237 } 3238 } 3239 3240 DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " 3241 "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", 3242 cnum, 3243 value, 3244 is_loaded, 3245 can_access_pmu, 3246 hw_value, 3247 ctx->ctx_pmds[cnum].val, 3248 ctx->ctx_pmds[cnum].short_reset, 3249 ctx->ctx_pmds[cnum].long_reset, 3250 PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', 3251 ctx->ctx_pmds[cnum].seed, 3252 ctx->ctx_pmds[cnum].mask, 3253 ctx->ctx_used_pmds[0], 3254 ctx->ctx_pmds[cnum].reset_pmds[0], 3255 ctx->ctx_reload_pmds[0], 3256 ctx->ctx_all_pmds[0], 3257 ctx->ctx_ovfl_regs[0])); 3258 } 3259 3260 /* 3261 * make changes visible 3262 */ 3263 if (can_access_pmu) ia64_srlz_d(); 3264 3265 return 0; 3266 3267abort_mission: 3268 /* 3269 * for now, we have only one possibility for error 3270 */ 3271 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3272 return ret; 3273} 3274 3275/* 3276 * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. 3277 * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an 3278 * interrupt is delivered during the call, it will be kept pending until we leave, making 3279 * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are 3280 * guaranteed to return consistent data to the user, it may simply be old. It is not 3281 * trivial to treat the overflow while inside the call because you may end up in 3282 * some module sampling buffer code causing deadlocks. 3283 */ 3284static int 3285pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3286{ 3287 struct task_struct *task; 3288 unsigned long val = 0UL, lval, ovfl_mask, sval; 3289 pfarg_reg_t *req = (pfarg_reg_t *)arg; 3290 unsigned int cnum, reg_flags = 0; 3291 int i, can_access_pmu = 0, state; 3292 int is_loaded, is_system, is_counting, expert_mode; 3293 int ret = -EINVAL; 3294 pfm_reg_check_t rd_func; 3295 3296 /* 3297 * access is possible when loaded only for 3298 * self-monitoring tasks or in UP mode 3299 */ 3300 3301 state = ctx->ctx_state; 3302 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3303 is_system = ctx->ctx_fl_system; 3304 ovfl_mask = pmu_conf->ovfl_val; 3305 task = ctx->ctx_task; 3306 3307 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3308 3309 if (likely(is_loaded)) { 3310 /* 3311 * In system wide and when the context is loaded, access can only happen 3312 * when the caller is running on the CPU being monitored by the session. 3313 * It does not have to be the owner (ctx_task) of the context per se. 3314 */ 3315 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3316 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3317 return -EBUSY; 3318 } 3319 /* 3320 * this can be true when not self-monitoring only in UP 3321 */ 3322 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3323 3324 if (can_access_pmu) ia64_srlz_d(); 3325 } 3326 expert_mode = pfm_sysctl.expert_mode; 3327 3328 DPRINT(("ld=%d apmu=%d ctx_state=%d\n", 3329 is_loaded, 3330 can_access_pmu, 3331 state)); 3332 3333 /* 3334 * on both UP and SMP, we can only read the PMD from the hardware register when 3335 * the task is the owner of the local PMU. 3336 */ 3337 3338 for (i = 0; i < count; i++, req++) { 3339 3340 cnum = req->reg_num; 3341 reg_flags = req->reg_flags; 3342 3343 if (unlikely(!PMD_IS_IMPL(cnum))) goto error; 3344 /* 3345 * we can only read the register that we use. That includes 3346 * the one we explicitly initialize AND the one we want included 3347 * in the sampling buffer (smpl_regs). 3348 * 3349 * Having this restriction allows optimization in the ctxsw routine 3350 * without compromising security (leaks) 3351 */ 3352 if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error; 3353 3354 sval = ctx->ctx_pmds[cnum].val; 3355 lval = ctx->ctx_pmds[cnum].lval; 3356 is_counting = PMD_IS_COUNTING(cnum); 3357 3358 /* 3359 * If the task is not the current one, then we check if the 3360 * PMU state is still in the local live register due to lazy ctxsw. 3361 * If true, then we read directly from the registers. 3362 */ 3363 if (can_access_pmu){ 3364 val = ia64_get_pmd(cnum); 3365 } else { 3366 /* 3367 * context has been saved 3368 * if context is zombie, then task does not exist anymore. 3369 * In this case, we use the full value saved in the context (pfm_flush_regs()). 3370 */ 3371 val = is_loaded ? ctx->th_pmds[cnum] : 0UL; 3372 } 3373 rd_func = pmu_conf->pmd_desc[cnum].read_check; 3374 3375 if (is_counting) { 3376 /* 3377 * XXX: need to check for overflow when loaded 3378 */ 3379 val &= ovfl_mask; 3380 val += sval; 3381 } 3382 3383 /* 3384 * execute read checker, if any 3385 */ 3386 if (unlikely(expert_mode == 0 && rd_func)) { 3387 unsigned long v = val; 3388 ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); 3389 if (ret) goto error; 3390 val = v; 3391 ret = -EINVAL; 3392 } 3393 3394 PFM_REG_RETFLAG_SET(reg_flags, 0); 3395 3396 DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); 3397 3398 /* 3399 * update register return value, abort all if problem during copy. 3400 * we only modify the reg_flags field. no check mode is fine because 3401 * access has been verified upfront in sys_perfmonctl(). 3402 */ 3403 req->reg_value = val; 3404 req->reg_flags = reg_flags; 3405 req->reg_last_reset_val = lval; 3406 } 3407 3408 return 0; 3409 3410error: 3411 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3412 return ret; 3413} 3414 3415int 3416pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3417{ 3418 pfm_context_t *ctx; 3419 3420 if (req == NULL) return -EINVAL; 3421 3422 ctx = GET_PMU_CTX(); 3423 3424 if (ctx == NULL) return -EINVAL; 3425 3426 /* 3427 * for now limit to current task, which is enough when calling 3428 * from overflow handler 3429 */ 3430 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3431 3432 return pfm_write_pmcs(ctx, req, nreq, regs); 3433} 3434EXPORT_SYMBOL(pfm_mod_write_pmcs); 3435 3436int 3437pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3438{ 3439 pfm_context_t *ctx; 3440 3441 if (req == NULL) return -EINVAL; 3442 3443 ctx = GET_PMU_CTX(); 3444 3445 if (ctx == NULL) return -EINVAL; 3446 3447 /* 3448 * for now limit to current task, which is enough when calling 3449 * from overflow handler 3450 */ 3451 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3452 3453 return pfm_read_pmds(ctx, req, nreq, regs); 3454} 3455EXPORT_SYMBOL(pfm_mod_read_pmds); 3456 3457/* 3458 * Only call this function when a process it trying to 3459 * write the debug registers (reading is always allowed) 3460 */ 3461int 3462pfm_use_debug_registers(struct task_struct *task) 3463{ 3464 pfm_context_t *ctx = task->thread.pfm_context; 3465 unsigned long flags; 3466 int ret = 0; 3467 3468 if (pmu_conf->use_rr_dbregs == 0) return 0; 3469 3470 DPRINT(("called for [%d]\n", task_pid_nr(task))); 3471 3472 /* 3473 * do it only once 3474 */ 3475 if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; 3476 3477 /* 3478 * Even on SMP, we do not need to use an atomic here because 3479 * the only way in is via ptrace() and this is possible only when the 3480 * process is stopped. Even in the case where the ctxsw out is not totally 3481 * completed by the time we come here, there is no way the 'stopped' process 3482 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. 3483 * So this is always safe. 3484 */ 3485 if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; 3486 3487 LOCK_PFS(flags); 3488 3489 /* 3490 * We cannot allow setting breakpoints when system wide monitoring 3491 * sessions are using the debug registers. 3492 */ 3493 if (pfm_sessions.pfs_sys_use_dbregs> 0) 3494 ret = -1; 3495 else 3496 pfm_sessions.pfs_ptrace_use_dbregs++; 3497 3498 DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", 3499 pfm_sessions.pfs_ptrace_use_dbregs, 3500 pfm_sessions.pfs_sys_use_dbregs, 3501 task_pid_nr(task), ret)); 3502 3503 UNLOCK_PFS(flags); 3504 3505 return ret; 3506} 3507 3508/* 3509 * This function is called for every task that exits with the 3510 * IA64_THREAD_DBG_VALID set. This indicates a task which was 3511 * able to use the debug registers for debugging purposes via 3512 * ptrace(). Therefore we know it was not using them for 3513 * performance monitoring, so we only decrement the number 3514 * of "ptraced" debug register users to keep the count up to date 3515 */ 3516int 3517pfm_release_debug_registers(struct task_struct *task) 3518{ 3519 unsigned long flags; 3520 int ret; 3521 3522 if (pmu_conf->use_rr_dbregs == 0) return 0; 3523 3524 LOCK_PFS(flags); 3525 if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { 3526 printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task)); 3527 ret = -1; 3528 } else { 3529 pfm_sessions.pfs_ptrace_use_dbregs--; 3530 ret = 0; 3531 } 3532 UNLOCK_PFS(flags); 3533 3534 return ret; 3535} 3536 3537static int 3538pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3539{ 3540 struct task_struct *task; 3541 pfm_buffer_fmt_t *fmt; 3542 pfm_ovfl_ctrl_t rst_ctrl; 3543 int state, is_system; 3544 int ret = 0; 3545 3546 state = ctx->ctx_state; 3547 fmt = ctx->ctx_buf_fmt; 3548 is_system = ctx->ctx_fl_system; 3549 task = PFM_CTX_TASK(ctx); 3550 3551 switch(state) { 3552 case PFM_CTX_MASKED: 3553 break; 3554 case PFM_CTX_LOADED: 3555 if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; 3556 /* fall through */ 3557 case PFM_CTX_UNLOADED: 3558 case PFM_CTX_ZOMBIE: 3559 DPRINT(("invalid state=%d\n", state)); 3560 return -EBUSY; 3561 default: 3562 DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); 3563 return -EINVAL; 3564 } 3565 3566 /* 3567 * In system wide and when the context is loaded, access can only happen 3568 * when the caller is running on the CPU being monitored by the session. 3569 * It does not have to be the owner (ctx_task) of the context per se. 3570 */ 3571 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3572 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3573 return -EBUSY; 3574 } 3575 3576 /* sanity check */ 3577 if (unlikely(task == NULL)) { 3578 printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current)); 3579 return -EINVAL; 3580 } 3581 3582 if (task == current || is_system) { 3583 3584 fmt = ctx->ctx_buf_fmt; 3585 3586 DPRINT(("restarting self %d ovfl=0x%lx\n", 3587 task_pid_nr(task), 3588 ctx->ctx_ovfl_regs[0])); 3589 3590 if (CTX_HAS_SMPL(ctx)) { 3591 3592 prefetch(ctx->ctx_smpl_hdr); 3593 3594 rst_ctrl.bits.mask_monitoring = 0; 3595 rst_ctrl.bits.reset_ovfl_pmds = 0; 3596 3597 if (state == PFM_CTX_LOADED) 3598 ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3599 else 3600 ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3601 } else { 3602 rst_ctrl.bits.mask_monitoring = 0; 3603 rst_ctrl.bits.reset_ovfl_pmds = 1; 3604 } 3605 3606 if (ret == 0) { 3607 if (rst_ctrl.bits.reset_ovfl_pmds) 3608 pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); 3609 3610 if (rst_ctrl.bits.mask_monitoring == 0) { 3611 DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task))); 3612 3613 if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); 3614 } else { 3615 DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task))); 3616 3617 // cannot use pfm_stop_monitoring(task, regs); 3618 } 3619 } 3620 /* 3621 * clear overflowed PMD mask to remove any stale information 3622 */ 3623 ctx->ctx_ovfl_regs[0] = 0UL; 3624 3625 /* 3626 * back to LOADED state 3627 */ 3628 ctx->ctx_state = PFM_CTX_LOADED; 3629 3630 /* 3631 * XXX: not really useful for self monitoring 3632 */ 3633 ctx->ctx_fl_can_restart = 0; 3634 3635 return 0; 3636 } 3637 3638 /* 3639 * restart another task 3640 */ 3641 3642 /* 3643 * When PFM_CTX_MASKED, we cannot issue a restart before the previous 3644 * one is seen by the task. 3645 */ 3646 if (state == PFM_CTX_MASKED) { 3647 if (ctx->ctx_fl_can_restart == 0) return -EINVAL; 3648 /* 3649 * will prevent subsequent restart before this one is 3650 * seen by other task 3651 */ 3652 ctx->ctx_fl_can_restart = 0; 3653 } 3654 3655 /* 3656 * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e. 3657 * the task is blocked or on its way to block. That's the normal 3658 * restart path. If the monitoring is not masked, then the task 3659 * can be actively monitoring and we cannot directly intervene. 3660 * Therefore we use the trap mechanism to catch the task and 3661 * force it to reset the buffer/reset PMDs. 3662 * 3663 * if non-blocking, then we ensure that the task will go into 3664 * pfm_handle_work() before returning to user mode. 3665 * 3666 * We cannot explicitly reset another task, it MUST always 3667 * be done by the task itself. This works for system wide because 3668 * the tool that is controlling the session is logically doing 3669 * "self-monitoring". 3670 */ 3671 if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { 3672 DPRINT(("unblocking [%d]\n", task_pid_nr(task))); 3673 complete(&ctx->ctx_restart_done); 3674 } else { 3675 DPRINT(("[%d] armed exit trap\n", task_pid_nr(task))); 3676 3677 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; 3678 3679 PFM_SET_WORK_PENDING(task, 1); 3680 3681 set_notify_resume(task); 3682 3683 /* 3684 * XXX: send reschedule if task runs on another CPU 3685 */ 3686 } 3687 return 0; 3688} 3689 3690static int 3691pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3692{ 3693 unsigned int m = *(unsigned int *)arg; 3694 3695 pfm_sysctl.debug = m == 0 ? 0 : 1; 3696 3697 printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); 3698 3699 if (m == 0) { 3700 memset(pfm_stats, 0, sizeof(pfm_stats)); 3701 for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL; 3702 } 3703 return 0; 3704} 3705 3706/* 3707 * arg can be NULL and count can be zero for this function 3708 */ 3709static int 3710pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3711{ 3712 struct thread_struct *thread = NULL; 3713 struct task_struct *task; 3714 pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; 3715 unsigned long flags; 3716 dbreg_t dbreg; 3717 unsigned int rnum; 3718 int first_time; 3719 int ret = 0, state; 3720 int i, can_access_pmu = 0; 3721 int is_system, is_loaded; 3722 3723 if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; 3724 3725 state = ctx->ctx_state; 3726 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3727 is_system = ctx->ctx_fl_system; 3728 task = ctx->ctx_task; 3729 3730 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3731 3732 /* 3733 * on both UP and SMP, we can only write to the PMC when the task is 3734 * the owner of the local PMU. 3735 */ 3736 if (is_loaded) { 3737 thread = &task->thread; 3738 /* 3739 * In system wide and when the context is loaded, access can only happen 3740 * when the caller is running on the CPU being monitored by the session. 3741 * It does not have to be the owner (ctx_task) of the context per se. 3742 */ 3743 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3744 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3745 return -EBUSY; 3746 } 3747 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3748 } 3749 3750 /* 3751 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w 3752 * ensuring that no real breakpoint can be installed via this call. 3753 * 3754 * IMPORTANT: regs can be NULL in this function 3755 */ 3756 3757 first_time = ctx->ctx_fl_using_dbreg == 0; 3758 3759 /* 3760 * don't bother if we are loaded and task is being debugged 3761 */ 3762 if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { 3763 DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task))); 3764 return -EBUSY; 3765 } 3766 3767 /* 3768 * check for debug registers in system wide mode 3769 * 3770 * If though a check is done in pfm_context_load(), 3771 * we must repeat it here, in case the registers are 3772 * written after the context is loaded 3773 */ 3774 if (is_loaded) { 3775 LOCK_PFS(flags); 3776 3777 if (first_time && is_system) { 3778 if (pfm_sessions.pfs_ptrace_use_dbregs) 3779 ret = -EBUSY; 3780 else 3781 pfm_sessions.pfs_sys_use_dbregs++; 3782 } 3783 UNLOCK_PFS(flags); 3784 } 3785 3786 if (ret != 0) return ret; 3787 3788 /* 3789 * mark ourself as user of the debug registers for 3790 * perfmon purposes. 3791 */ 3792 ctx->ctx_fl_using_dbreg = 1; 3793 3794 /* 3795 * clear hardware registers to make sure we don't 3796 * pick up stale state. 3797 * 3798 * for a system wide session, we do not use 3799 * thread.dbr, thread.ibr because this process 3800 * never leaves the current CPU and the state 3801 * is shared by all processes running on it 3802 */ 3803 if (first_time && can_access_pmu) { 3804 DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task))); 3805 for (i=0; i < pmu_conf->num_ibrs; i++) { 3806 ia64_set_ibr(i, 0UL); 3807 ia64_dv_serialize_instruction(); 3808 } 3809 ia64_srlz_i(); 3810 for (i=0; i < pmu_conf->num_dbrs; i++) { 3811 ia64_set_dbr(i, 0UL); 3812 ia64_dv_serialize_data(); 3813 } 3814 ia64_srlz_d(); 3815 } 3816 3817 /* 3818 * Now install the values into the registers 3819 */ 3820 for (i = 0; i < count; i++, req++) { 3821 3822 rnum = req->dbreg_num; 3823 dbreg.val = req->dbreg_value; 3824 3825 ret = -EINVAL; 3826 3827 if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { 3828 DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", 3829 rnum, dbreg.val, mode, i, count)); 3830 3831 goto abort_mission; 3832 } 3833 3834 /* 3835 * make sure we do not install enabled breakpoint 3836 */ 3837 if (rnum & 0x1) { 3838 if (mode == PFM_CODE_RR) 3839 dbreg.ibr.ibr_x = 0; 3840 else 3841 dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; 3842 } 3843 3844 PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); 3845 3846 /* 3847 * Debug registers, just like PMC, can only be modified 3848 * by a kernel call. Moreover, perfmon() access to those 3849 * registers are centralized in this routine. The hardware 3850 * does not modify the value of these registers, therefore, 3851 * if we save them as they are written, we can avoid having 3852 * to save them on context switch out. This is made possible 3853 * by the fact that when perfmon uses debug registers, ptrace() 3854 * won't be able to modify them concurrently. 3855 */ 3856 if (mode == PFM_CODE_RR) { 3857 CTX_USED_IBR(ctx, rnum); 3858 3859 if (can_access_pmu) { 3860 ia64_set_ibr(rnum, dbreg.val); 3861 ia64_dv_serialize_instruction(); 3862 } 3863 3864 ctx->ctx_ibrs[rnum] = dbreg.val; 3865 3866 DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", 3867 rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); 3868 } else { 3869 CTX_USED_DBR(ctx, rnum); 3870 3871 if (can_access_pmu) { 3872 ia64_set_dbr(rnum, dbreg.val); 3873 ia64_dv_serialize_data(); 3874 } 3875 ctx->ctx_dbrs[rnum] = dbreg.val; 3876 3877 DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", 3878 rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); 3879 } 3880 } 3881 3882 return 0; 3883 3884abort_mission: 3885 /* 3886 * in case it was our first attempt, we undo the global modifications 3887 */ 3888 if (first_time) { 3889 LOCK_PFS(flags); 3890 if (ctx->ctx_fl_system) { 3891 pfm_sessions.pfs_sys_use_dbregs--; 3892 } 3893 UNLOCK_PFS(flags); 3894 ctx->ctx_fl_using_dbreg = 0; 3895 } 3896 /* 3897 * install error return flag 3898 */ 3899 PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); 3900 3901 return ret; 3902} 3903 3904static int 3905pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3906{ 3907 return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); 3908} 3909 3910static int 3911pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3912{ 3913 return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); 3914} 3915 3916int 3917pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3918{ 3919 pfm_context_t *ctx; 3920 3921 if (req == NULL) return -EINVAL; 3922 3923 ctx = GET_PMU_CTX(); 3924 3925 if (ctx == NULL) return -EINVAL; 3926 3927 /* 3928 * for now limit to current task, which is enough when calling 3929 * from overflow handler 3930 */ 3931 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3932 3933 return pfm_write_ibrs(ctx, req, nreq, regs); 3934} 3935EXPORT_SYMBOL(pfm_mod_write_ibrs); 3936 3937int 3938pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3939{ 3940 pfm_context_t *ctx; 3941 3942 if (req == NULL) return -EINVAL; 3943 3944 ctx = GET_PMU_CTX(); 3945 3946 if (ctx == NULL) return -EINVAL; 3947 3948 /* 3949 * for now limit to current task, which is enough when calling 3950 * from overflow handler 3951 */ 3952 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3953 3954 return pfm_write_dbrs(ctx, req, nreq, regs); 3955} 3956EXPORT_SYMBOL(pfm_mod_write_dbrs); 3957 3958 3959static int 3960pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3961{ 3962 pfarg_features_t *req = (pfarg_features_t *)arg; 3963 3964 req->ft_version = PFM_VERSION; 3965 return 0; 3966} 3967 3968static int 3969pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3970{ 3971 struct pt_regs *tregs; 3972 struct task_struct *task = PFM_CTX_TASK(ctx); 3973 int state, is_system; 3974 3975 state = ctx->ctx_state; 3976 is_system = ctx->ctx_fl_system; 3977 3978 /* 3979 * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) 3980 */ 3981 if (state == PFM_CTX_UNLOADED) return -EINVAL; 3982 3983 /* 3984 * In system wide and when the context is loaded, access can only happen 3985 * when the caller is running on the CPU being monitored by the session. 3986 * It does not have to be the owner (ctx_task) of the context per se. 3987 */ 3988 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3989 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3990 return -EBUSY; 3991 } 3992 DPRINT(("task [%d] ctx_state=%d is_system=%d\n", 3993 task_pid_nr(PFM_CTX_TASK(ctx)), 3994 state, 3995 is_system)); 3996 /* 3997 * in system mode, we need to update the PMU directly 3998 * and the user level state of the caller, which may not 3999 * necessarily be the creator of the context. 4000 */ 4001 if (is_system) { 4002 /* 4003 * Update local PMU first 4004 * 4005 * disable dcr pp 4006 */ 4007 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 4008 ia64_srlz_i(); 4009 4010 /* 4011 * update local cpuinfo 4012 */ 4013 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 4014 4015 /* 4016 * stop monitoring, does srlz.i 4017 */ 4018 pfm_clear_psr_pp(); 4019 4020 /* 4021 * stop monitoring in the caller 4022 */ 4023 ia64_psr(regs)->pp = 0; 4024 4025 return 0; 4026 } 4027 /* 4028 * per-task mode 4029 */ 4030 4031 if (task == current) { 4032 /* stop monitoring at kernel level */ 4033 pfm_clear_psr_up(); 4034 4035 /* 4036 * stop monitoring at the user level 4037 */ 4038 ia64_psr(regs)->up = 0; 4039 } else { 4040 tregs = task_pt_regs(task); 4041 4042 /* 4043 * stop monitoring at the user level 4044 */ 4045 ia64_psr(tregs)->up = 0; 4046 4047 /* 4048 * monitoring disabled in kernel at next reschedule 4049 */ 4050 ctx->ctx_saved_psr_up = 0; 4051 DPRINT(("task=[%d]\n", task_pid_nr(task))); 4052 } 4053 return 0; 4054} 4055 4056 4057static int 4058pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4059{ 4060 struct pt_regs *tregs; 4061 int state, is_system; 4062 4063 state = ctx->ctx_state; 4064 is_system = ctx->ctx_fl_system; 4065 4066 if (state != PFM_CTX_LOADED) return -EINVAL; 4067 4068 /* 4069 * In system wide and when the context is loaded, access can only happen 4070 * when the caller is running on the CPU being monitored by the session. 4071 * It does not have to be the owner (ctx_task) of the context per se. 4072 */ 4073 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 4074 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 4075 return -EBUSY; 4076 } 4077 4078 /* 4079 * in system mode, we need to update the PMU directly 4080 * and the user level state of the caller, which may not 4081 * necessarily be the creator of the context. 4082 */ 4083 if (is_system) { 4084 4085 /* 4086 * set user level psr.pp for the caller 4087 */ 4088 ia64_psr(regs)->pp = 1; 4089 4090 /* 4091 * now update the local PMU and cpuinfo 4092 */ 4093 PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); 4094 4095 /* 4096 * start monitoring at kernel level 4097 */ 4098 pfm_set_psr_pp(); 4099 4100 /* enable dcr pp */ 4101 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 4102 ia64_srlz_i(); 4103 4104 return 0; 4105 } 4106 4107 /* 4108 * per-process mode 4109 */ 4110 4111 if (ctx->ctx_task == current) { 4112 4113 /* start monitoring at kernel level */ 4114 pfm_set_psr_up(); 4115 4116 /* 4117 * activate monitoring at user level 4118 */ 4119 ia64_psr(regs)->up = 1; 4120 4121 } else { 4122 tregs = task_pt_regs(ctx->ctx_task); 4123 4124 /* 4125 * start monitoring at the kernel level the next 4126 * time the task is scheduled 4127 */ 4128 ctx->ctx_saved_psr_up = IA64_PSR_UP; 4129 4130 /* 4131 * activate monitoring at user level 4132 */ 4133 ia64_psr(tregs)->up = 1; 4134 } 4135 return 0; 4136} 4137 4138static int 4139pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4140{ 4141 pfarg_reg_t *req = (pfarg_reg_t *)arg; 4142 unsigned int cnum; 4143 int i; 4144 int ret = -EINVAL; 4145 4146 for (i = 0; i < count; i++, req++) { 4147 4148 cnum = req->reg_num; 4149 4150 if (!PMC_IS_IMPL(cnum)) goto abort_mission; 4151 4152 req->reg_value = PMC_DFL_VAL(cnum); 4153 4154 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 4155 4156 DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); 4157 } 4158 return 0; 4159 4160abort_mission: 4161 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 4162 return ret; 4163} 4164 4165static int 4166pfm_check_task_exist(pfm_context_t *ctx) 4167{ 4168 struct task_struct *g, *t; 4169 int ret = -ESRCH; 4170 4171 read_lock(&tasklist_lock); 4172 4173 do_each_thread (g, t) { 4174 if (t->thread.pfm_context == ctx) { 4175 ret = 0; 4176 goto out; 4177 } 4178 } while_each_thread (g, t); 4179out: 4180 read_unlock(&tasklist_lock); 4181 4182 DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); 4183 4184 return ret; 4185} 4186 4187static int 4188pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4189{ 4190 struct task_struct *task; 4191 struct thread_struct *thread; 4192 struct pfm_context_t *old; 4193 unsigned long flags; 4194#ifndef CONFIG_SMP 4195 struct task_struct *owner_task = NULL; 4196#endif 4197 pfarg_load_t *req = (pfarg_load_t *)arg; 4198 unsigned long *pmcs_source, *pmds_source; 4199 int the_cpu; 4200 int ret = 0; 4201 int state, is_system, set_dbregs = 0; 4202 4203 state = ctx->ctx_state; 4204 is_system = ctx->ctx_fl_system; 4205 /* 4206 * can only load from unloaded or terminated state 4207 */ 4208 if (state != PFM_CTX_UNLOADED) { 4209 DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", 4210 req->load_pid, 4211 ctx->ctx_state)); 4212 return -EBUSY; 4213 } 4214 4215 DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); 4216 4217 if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) { 4218 DPRINT(("cannot use blocking mode on self\n")); 4219 return -EINVAL; 4220 } 4221 4222 ret = pfm_get_task(ctx, req->load_pid, &task); 4223 if (ret) { 4224 DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret)); 4225 return ret; 4226 } 4227 4228 ret = -EINVAL; 4229 4230 /* 4231 * system wide is self monitoring only 4232 */ 4233 if (is_system && task != current) { 4234 DPRINT(("system wide is self monitoring only load_pid=%d\n", 4235 req->load_pid)); 4236 goto error; 4237 } 4238 4239 thread = &task->thread; 4240 4241 ret = 0; 4242 /* 4243 * cannot load a context which is using range restrictions, 4244 * into a task that is being debugged. 4245 */ 4246 if (ctx->ctx_fl_using_dbreg) { 4247 if (thread->flags & IA64_THREAD_DBG_VALID) { 4248 ret = -EBUSY; 4249 DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid)); 4250 goto error; 4251 } 4252 LOCK_PFS(flags); 4253 4254 if (is_system) { 4255 if (pfm_sessions.pfs_ptrace_use_dbregs) { 4256 DPRINT(("cannot load [%d] dbregs in use\n", 4257 task_pid_nr(task))); 4258 ret = -EBUSY; 4259 } else { 4260 pfm_sessions.pfs_sys_use_dbregs++; 4261 DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs)); 4262 set_dbregs = 1; 4263 } 4264 } 4265 4266 UNLOCK_PFS(flags); 4267 4268 if (ret) goto error; 4269 } 4270 4271 /* 4272 * SMP system-wide monitoring implies self-monitoring. 4273 * 4274 * The programming model expects the task to 4275 * be pinned on a CPU throughout the session. 4276 * Here we take note of the current CPU at the 4277 * time the context is loaded. No call from 4278 * another CPU will be allowed. 4279 * 4280 * The pinning via shed_setaffinity() 4281 * must be done by the calling task prior 4282 * to this call. 4283 * 4284 * systemwide: keep track of CPU this session is supposed to run on 4285 */ 4286 the_cpu = ctx->ctx_cpu = smp_processor_id(); 4287 4288 ret = -EBUSY; 4289 /* 4290 * now reserve the session 4291 */ 4292 ret = pfm_reserve_session(current, is_system, the_cpu); 4293 if (ret) goto error; 4294 4295 /* 4296 * task is necessarily stopped at this point. 4297 * 4298 * If the previous context was zombie, then it got removed in 4299 * pfm_save_regs(). Therefore we should not see it here. 4300 * If we see a context, then this is an active context 4301 * 4302 * XXX: needs to be atomic 4303 */ 4304 DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", 4305 thread->pfm_context, ctx)); 4306 4307 ret = -EBUSY; 4308 old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); 4309 if (old != NULL) { 4310 DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); 4311 goto error_unres; 4312 } 4313 4314 pfm_reset_msgq(ctx); 4315 4316 ctx->ctx_state = PFM_CTX_LOADED; 4317 4318 /* 4319 * link context to task 4320 */ 4321 ctx->ctx_task = task; 4322 4323 if (is_system) { 4324 /* 4325 * we load as stopped 4326 */ 4327 PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); 4328 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 4329 4330 if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); 4331 } else { 4332 thread->flags |= IA64_THREAD_PM_VALID; 4333 } 4334 4335 /* 4336 * propagate into thread-state 4337 */ 4338 pfm_copy_pmds(task, ctx); 4339 pfm_copy_pmcs(task, ctx); 4340 4341 pmcs_source = ctx->th_pmcs; 4342 pmds_source = ctx->th_pmds; 4343 4344 /* 4345 * always the case for system-wide 4346 */ 4347 if (task == current) { 4348 4349 if (is_system == 0) { 4350 4351 /* allow user level control */ 4352 ia64_psr(regs)->sp = 0; 4353 DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task))); 4354 4355 SET_LAST_CPU(ctx, smp_processor_id()); 4356 INC_ACTIVATION(); 4357 SET_ACTIVATION(ctx); 4358#ifndef CONFIG_SMP 4359 /* 4360 * push the other task out, if any 4361 */ 4362 owner_task = GET_PMU_OWNER(); 4363 if (owner_task) pfm_lazy_save_regs(owner_task); 4364#endif 4365 } 4366 /* 4367 * load all PMD from ctx to PMU (as opposed to thread state) 4368 * restore all PMC from ctx to PMU 4369 */ 4370 pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]); 4371 pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]); 4372 4373 ctx->ctx_reload_pmcs[0] = 0UL; 4374 ctx->ctx_reload_pmds[0] = 0UL; 4375 4376 /* 4377 * guaranteed safe by earlier check against DBG_VALID 4378 */ 4379 if (ctx->ctx_fl_using_dbreg) { 4380 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 4381 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 4382 } 4383 /* 4384 * set new ownership 4385 */ 4386 SET_PMU_OWNER(task, ctx); 4387 4388 DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task))); 4389 } else { 4390 /* 4391 * when not current, task MUST be stopped, so this is safe 4392 */ 4393 regs = task_pt_regs(task); 4394 4395 /* force a full reload */ 4396 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4397 SET_LAST_CPU(ctx, -1); 4398 4399 /* initial saved psr (stopped) */ 4400 ctx->ctx_saved_psr_up = 0UL; 4401 ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; 4402 } 4403 4404 ret = 0; 4405 4406error_unres: 4407 if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu); 4408error: 4409 /* 4410 * we must undo the dbregs setting (for system-wide) 4411 */ 4412 if (ret && set_dbregs) { 4413 LOCK_PFS(flags); 4414 pfm_sessions.pfs_sys_use_dbregs--; 4415 UNLOCK_PFS(flags); 4416 } 4417 /* 4418 * release task, there is now a link with the context 4419 */ 4420 if (is_system == 0 && task != current) { 4421 pfm_put_task(task); 4422 4423 if (ret == 0) { 4424 ret = pfm_check_task_exist(ctx); 4425 if (ret) { 4426 ctx->ctx_state = PFM_CTX_UNLOADED; 4427 ctx->ctx_task = NULL; 4428 } 4429 } 4430 } 4431 return ret; 4432} 4433 4434/* 4435 * in this function, we do not need to increase the use count 4436 * for the task via get_task_struct(), because we hold the 4437 * context lock. If the task were to disappear while having 4438 * a context attached, it would go through pfm_exit_thread() 4439 * which also grabs the context lock and would therefore be blocked 4440 * until we are here. 4441 */ 4442static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx); 4443 4444static int 4445pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4446{ 4447 struct task_struct *task = PFM_CTX_TASK(ctx); 4448 struct pt_regs *tregs; 4449 int prev_state, is_system; 4450 int ret; 4451 4452 DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1)); 4453 4454 prev_state = ctx->ctx_state; 4455 is_system = ctx->ctx_fl_system; 4456 4457 /* 4458 * unload only when necessary 4459 */ 4460 if (prev_state == PFM_CTX_UNLOADED) { 4461 DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); 4462 return 0; 4463 } 4464 4465 /* 4466 * clear psr and dcr bits 4467 */ 4468 ret = pfm_stop(ctx, NULL, 0, regs); 4469 if (ret) return ret; 4470 4471 ctx->ctx_state = PFM_CTX_UNLOADED; 4472 4473 /* 4474 * in system mode, we need to update the PMU directly 4475 * and the user level state of the caller, which may not 4476 * necessarily be the creator of the context. 4477 */ 4478 if (is_system) { 4479 4480 /* 4481 * Update cpuinfo 4482 * 4483 * local PMU is taken care of in pfm_stop() 4484 */ 4485 PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE); 4486 PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE); 4487 4488 /* 4489 * save PMDs in context 4490 * release ownership 4491 */ 4492 pfm_flush_pmds(current, ctx); 4493 4494 /* 4495 * at this point we are done with the PMU 4496 * so we can unreserve the resource. 4497 */ 4498 if (prev_state != PFM_CTX_ZOMBIE) 4499 pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu); 4500 4501 /* 4502 * disconnect context from task 4503 */ 4504 task->thread.pfm_context = NULL; 4505 /* 4506 * disconnect task from context 4507 */ 4508 ctx->ctx_task = NULL; 4509 4510 /* 4511 * There is nothing more to cleanup here. 4512 */ 4513 return 0; 4514 } 4515 4516 /* 4517 * per-task mode 4518 */ 4519 tregs = task == current ? regs : task_pt_regs(task); 4520 4521 if (task == current) { 4522 /* 4523 * cancel user level control 4524 */ 4525 ia64_psr(regs)->sp = 1; 4526 4527 DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task))); 4528 } 4529 /* 4530 * save PMDs to context 4531 * release ownership 4532 */ 4533 pfm_flush_pmds(task, ctx); 4534 4535 /* 4536 * at this point we are done with the PMU 4537 * so we can unreserve the resource. 4538 * 4539 * when state was ZOMBIE, we have already unreserved. 4540 */ 4541 if (prev_state != PFM_CTX_ZOMBIE) 4542 pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu); 4543 4544 /* 4545 * reset activation counter and psr 4546 */ 4547 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4548 SET_LAST_CPU(ctx, -1); 4549 4550 /* 4551 * PMU state will not be restored 4552 */ 4553 task->thread.flags &= ~IA64_THREAD_PM_VALID; 4554 4555 /* 4556 * break links between context and task 4557 */ 4558 task->thread.pfm_context = NULL; 4559 ctx->ctx_task = NULL; 4560 4561 PFM_SET_WORK_PENDING(task, 0); 4562 4563 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 4564 ctx->ctx_fl_can_restart = 0; 4565 ctx->ctx_fl_going_zombie = 0; 4566 4567 DPRINT(("disconnected [%d] from context\n", task_pid_nr(task))); 4568 4569 return 0; 4570} 4571 4572 4573/* 4574 * called only from exit_thread(): task == current 4575 * we come here only if current has a context attached (loaded or masked) 4576 */ 4577void 4578pfm_exit_thread(struct task_struct *task) 4579{ 4580 pfm_context_t *ctx; 4581 unsigned long flags; 4582 struct pt_regs *regs = task_pt_regs(task); 4583 int ret, state; 4584 int free_ok = 0; 4585 4586 ctx = PFM_GET_CTX(task); 4587 4588 PROTECT_CTX(ctx, flags); 4589 4590 DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task))); 4591 4592 state = ctx->ctx_state; 4593 switch(state) { 4594 case PFM_CTX_UNLOADED: 4595 /* 4596 * only comes to this function if pfm_context is not NULL, i.e., cannot 4597 * be in unloaded state 4598 */ 4599 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task)); 4600 break; 4601 case PFM_CTX_LOADED: 4602 case PFM_CTX_MASKED: 4603 ret = pfm_context_unload(ctx, NULL, 0, regs); 4604 if (ret) { 4605 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4606 } 4607 DPRINT(("ctx unloaded for current state was %d\n", state)); 4608 4609 pfm_end_notify_user(ctx); 4610 break; 4611 case PFM_CTX_ZOMBIE: 4612 ret = pfm_context_unload(ctx, NULL, 0, regs); 4613 if (ret) { 4614 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4615 } 4616 free_ok = 1; 4617 break; 4618 default: 4619 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state); 4620 break; 4621 } 4622 UNPROTECT_CTX(ctx, flags); 4623 4624 { u64 psr = pfm_get_psr(); 4625 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 4626 BUG_ON(GET_PMU_OWNER()); 4627 BUG_ON(ia64_psr(regs)->up); 4628 BUG_ON(ia64_psr(regs)->pp); 4629 } 4630 4631 /* 4632 * All memory free operations (especially for vmalloc'ed memory) 4633 * MUST be done with interrupts ENABLED. 4634 */ 4635 if (free_ok) pfm_context_free(ctx); 4636} 4637 4638/* 4639 * functions MUST be listed in the increasing order of their index (see permfon.h) 4640 */ 4641#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz } 4642#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL } 4643#define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP) 4644#define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW) 4645#define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL} 4646 4647static pfm_cmd_desc_t pfm_cmd_tab[]={ 4648/* 0 */PFM_CMD_NONE, 4649/* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4650/* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4651/* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4652/* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS), 4653/* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS), 4654/* 6 */PFM_CMD_NONE, 4655/* 7 */PFM_CMD_NONE, 4656/* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize), 4657/* 9 */PFM_CMD_NONE, 4658/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW), 4659/* 11 */PFM_CMD_NONE, 4660/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL), 4661/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL), 4662/* 14 */PFM_CMD_NONE, 4663/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4664/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL), 4665/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS), 4666/* 18 */PFM_CMD_NONE, 4667/* 19 */PFM_CMD_NONE, 4668/* 20 */PFM_CMD_NONE, 4669/* 21 */PFM_CMD_NONE, 4670/* 22 */PFM_CMD_NONE, 4671/* 23 */PFM_CMD_NONE, 4672/* 24 */PFM_CMD_NONE, 4673/* 25 */PFM_CMD_NONE, 4674/* 26 */PFM_CMD_NONE, 4675/* 27 */PFM_CMD_NONE, 4676/* 28 */PFM_CMD_NONE, 4677/* 29 */PFM_CMD_NONE, 4678/* 30 */PFM_CMD_NONE, 4679/* 31 */PFM_CMD_NONE, 4680/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL), 4681/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL) 4682}; 4683#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) 4684 4685static int 4686pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) 4687{ 4688 struct task_struct *task; 4689 int state, old_state; 4690 4691recheck: 4692 state = ctx->ctx_state; 4693 task = ctx->ctx_task; 4694 4695 if (task == NULL) { 4696 DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); 4697 return 0; 4698 } 4699 4700 DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", 4701 ctx->ctx_fd, 4702 state, 4703 task_pid_nr(task), 4704 task->state, PFM_CMD_STOPPED(cmd))); 4705 4706 /* 4707 * self-monitoring always ok. 4708 * 4709 * for system-wide the caller can either be the creator of the 4710 * context (to one to which the context is attached to) OR 4711 * a task running on the same CPU as the session. 4712 */ 4713 if (task == current || ctx->ctx_fl_system) return 0; 4714 4715 /* 4716 * we are monitoring another thread 4717 */ 4718 switch(state) { 4719 case PFM_CTX_UNLOADED: 4720 /* 4721 * if context is UNLOADED we are safe to go 4722 */ 4723 return 0; 4724 case PFM_CTX_ZOMBIE: 4725 /* 4726 * no command can operate on a zombie context 4727 */ 4728 DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); 4729 return -EINVAL; 4730 case PFM_CTX_MASKED: 4731 /* 4732 * PMU state has been saved to software even though 4733 * the thread may still be running. 4734 */ 4735 if (cmd != PFM_UNLOAD_CONTEXT) return 0; 4736 } 4737 4738 /* 4739 * context is LOADED or MASKED. Some commands may need to have 4740 * the task stopped. 4741 * 4742 * We could lift this restriction for UP but it would mean that 4743 * the user has no guarantee the task would not run between 4744 * two successive calls to perfmonctl(). That's probably OK. 4745 * If this user wants to ensure the task does not run, then 4746 * the task must be stopped. 4747 */ 4748 if (PFM_CMD_STOPPED(cmd)) { 4749 if (!task_is_stopped_or_traced(task)) { 4750 DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task))); 4751 return -EBUSY; 4752 } 4753 /* 4754 * task is now stopped, wait for ctxsw out 4755 * 4756 * This is an interesting point in the code. 4757 * We need to unprotect the context because 4758 * the pfm_save_regs() routines needs to grab 4759 * the same lock. There are danger in doing 4760 * this because it leaves a window open for 4761 * another task to get access to the context 4762 * and possibly change its state. The one thing 4763 * that is not possible is for the context to disappear 4764 * because we are protected by the VFS layer, i.e., 4765 * get_fd()/put_fd(). 4766 */ 4767 old_state = state; 4768 4769 UNPROTECT_CTX(ctx, flags); 4770 4771 wait_task_inactive(task, 0); 4772 4773 PROTECT_CTX(ctx, flags); 4774 4775 /* 4776 * we must recheck to verify if state has changed 4777 */ 4778 if (ctx->ctx_state != old_state) { 4779 DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); 4780 goto recheck; 4781 } 4782 } 4783 return 0; 4784} 4785 4786/* 4787 * system-call entry point (must return long) 4788 */ 4789asmlinkage long 4790sys_perfmonctl (int fd, int cmd, void __user *arg, int count) 4791{ 4792 struct file *file = NULL; 4793 pfm_context_t *ctx = NULL; 4794 unsigned long flags = 0UL; 4795 void *args_k = NULL; 4796 long ret; /* will expand int return types */ 4797 size_t base_sz, sz, xtra_sz = 0; 4798 int narg, completed_args = 0, call_made = 0, cmd_flags; 4799 int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 4800 int (*getsize)(void *arg, size_t *sz); 4801#define PFM_MAX_ARGSIZE 4096 4802 4803 /* 4804 * reject any call if perfmon was disabled at initialization 4805 */ 4806 if (unlikely(pmu_conf == NULL)) return -ENOSYS; 4807 4808 if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { 4809 DPRINT(("invalid cmd=%d\n", cmd)); 4810 return -EINVAL; 4811 } 4812 4813 func = pfm_cmd_tab[cmd].cmd_func; 4814 narg = pfm_cmd_tab[cmd].cmd_narg; 4815 base_sz = pfm_cmd_tab[cmd].cmd_argsize; 4816 getsize = pfm_cmd_tab[cmd].cmd_getsize; 4817 cmd_flags = pfm_cmd_tab[cmd].cmd_flags; 4818 4819 if (unlikely(func == NULL)) { 4820 DPRINT(("invalid cmd=%d\n", cmd)); 4821 return -EINVAL; 4822 } 4823 4824 DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n", 4825 PFM_CMD_NAME(cmd), 4826 cmd, 4827 narg, 4828 base_sz, 4829 count)); 4830 4831 /* 4832 * check if number of arguments matches what the command expects 4833 */ 4834 if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))) 4835 return -EINVAL; 4836 4837restart_args: 4838 sz = xtra_sz + base_sz*count; 4839 /* 4840 * limit abuse to min page size 4841 */ 4842 if (unlikely(sz > PFM_MAX_ARGSIZE)) { 4843 printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz); 4844 return -E2BIG; 4845 } 4846 4847 /* 4848 * allocate default-sized argument buffer 4849 */ 4850 if (likely(count && args_k == NULL)) { 4851 args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); 4852 if (args_k == NULL) return -ENOMEM; 4853 } 4854 4855 ret = -EFAULT; 4856 4857 /* 4858 * copy arguments 4859 * 4860 * assume sz = 0 for command without parameters 4861 */ 4862 if (sz && copy_from_user(args_k, arg, sz)) { 4863 DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg)); 4864 goto error_args; 4865 } 4866 4867 /* 4868 * check if command supports extra parameters 4869 */ 4870 if (completed_args == 0 && getsize) { 4871 /* 4872 * get extra parameters size (based on main argument) 4873 */ 4874 ret = (*getsize)(args_k, &xtra_sz); 4875 if (ret) goto error_args; 4876 4877 completed_args = 1; 4878 4879 DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz)); 4880 4881 /* retry if necessary */ 4882 if (likely(xtra_sz)) goto restart_args; 4883 } 4884 4885 if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd; 4886 4887 ret = -EBADF; 4888 4889 file = fget(fd); 4890 if (unlikely(file == NULL)) { 4891 DPRINT(("invalid fd %d\n", fd)); 4892 goto error_args; 4893 } 4894 if (unlikely(PFM_IS_FILE(file) == 0)) { 4895 DPRINT(("fd %d not related to perfmon\n", fd)); 4896 goto error_args; 4897 } 4898 4899 ctx = file->private_data; 4900 if (unlikely(ctx == NULL)) { 4901 DPRINT(("no context for fd %d\n", fd)); 4902 goto error_args; 4903 } 4904 prefetch(&ctx->ctx_state); 4905 4906 PROTECT_CTX(ctx, flags); 4907 4908 /* 4909 * check task is stopped 4910 */ 4911 ret = pfm_check_task_state(ctx, cmd, flags); 4912 if (unlikely(ret)) goto abort_locked; 4913 4914skip_fd: 4915 ret = (*func)(ctx, args_k, count, task_pt_regs(current)); 4916 4917 call_made = 1; 4918 4919abort_locked: 4920 if (likely(ctx)) { 4921 DPRINT(("context unlocked\n")); 4922 UNPROTECT_CTX(ctx, flags); 4923 } 4924 4925 /* copy argument back to user, if needed */ 4926 if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; 4927 4928error_args: 4929 if (file) 4930 fput(file); 4931 4932 kfree(args_k); 4933 4934 DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); 4935 4936 return ret; 4937} 4938 4939static void 4940pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs) 4941{ 4942 pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt; 4943 pfm_ovfl_ctrl_t rst_ctrl; 4944 int state; 4945 int ret = 0; 4946 4947 state = ctx->ctx_state; 4948 /* 4949 * Unlock sampling buffer and reset index atomically 4950 * XXX: not really needed when blocking 4951 */ 4952 if (CTX_HAS_SMPL(ctx)) { 4953 4954 rst_ctrl.bits.mask_monitoring = 0; 4955 rst_ctrl.bits.reset_ovfl_pmds = 0; 4956 4957 if (state == PFM_CTX_LOADED) 4958 ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4959 else 4960 ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4961 } else { 4962 rst_ctrl.bits.mask_monitoring = 0; 4963 rst_ctrl.bits.reset_ovfl_pmds = 1; 4964 } 4965 4966 if (ret == 0) { 4967 if (rst_ctrl.bits.reset_ovfl_pmds) { 4968 pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET); 4969 } 4970 if (rst_ctrl.bits.mask_monitoring == 0) { 4971 DPRINT(("resuming monitoring\n")); 4972 if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current); 4973 } else { 4974 DPRINT(("stopping monitoring\n")); 4975 //pfm_stop_monitoring(current, regs); 4976 } 4977 ctx->ctx_state = PFM_CTX_LOADED; 4978 } 4979} 4980 4981/* 4982 * context MUST BE LOCKED when calling 4983 * can only be called for current 4984 */ 4985static void 4986pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) 4987{ 4988 int ret; 4989 4990 DPRINT(("entering for [%d]\n", task_pid_nr(current))); 4991 4992 ret = pfm_context_unload(ctx, NULL, 0, regs); 4993 if (ret) { 4994 printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret); 4995 } 4996 4997 /* 4998 * and wakeup controlling task, indicating we are now disconnected 4999 */ 5000 wake_up_interruptible(&ctx->ctx_zombieq); 5001 5002 /* 5003 * given that context is still locked, the controlling 5004 * task will only get access when we return from 5005 * pfm_handle_work(). 5006 */ 5007} 5008 5009static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); 5010 5011 /* 5012 * pfm_handle_work() can be called with interrupts enabled 5013 * (TIF_NEED_RESCHED) or disabled. The down_interruptible 5014 * call may sleep, therefore we must re-enable interrupts 5015 * to avoid deadlocks. It is safe to do so because this function 5016 * is called ONLY when returning to user level (pUStk=1), in which case 5017 * there is no risk of kernel stack overflow due to deep 5018 * interrupt nesting. 5019 */ 5020void 5021pfm_handle_work(void) 5022{ 5023 pfm_context_t *ctx; 5024 struct pt_regs *regs; 5025 unsigned long flags, dummy_flags; 5026 unsigned long ovfl_regs; 5027 unsigned int reason; 5028 int ret; 5029 5030 ctx = PFM_GET_CTX(current); 5031 if (ctx == NULL) { 5032 printk(KERN_ERR "perfmon: [%d] has no PFM context\n", 5033 task_pid_nr(current)); 5034 return; 5035 } 5036 5037 PROTECT_CTX(ctx, flags); 5038 5039 PFM_SET_WORK_PENDING(current, 0); 5040 5041 regs = task_pt_regs(current); 5042 5043 /* 5044 * extract reason for being here and clear 5045 */ 5046 reason = ctx->ctx_fl_trap_reason; 5047 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 5048 ovfl_regs = ctx->ctx_ovfl_regs[0]; 5049 5050 DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state)); 5051 5052 /* 5053 * must be done before we check for simple-reset mode 5054 */ 5055 if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) 5056 goto do_zombie; 5057 5058 //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; 5059 if (reason == PFM_TRAP_REASON_RESET) 5060 goto skip_blocking; 5061 5062 /* 5063 * restore interrupt mask to what it was on entry. 5064 * Could be enabled/diasbled. 5065 */ 5066 UNPROTECT_CTX(ctx, flags); 5067 5068 /* 5069 * force interrupt enable because of down_interruptible() 5070 */ 5071 local_irq_enable(); 5072 5073 DPRINT(("before block sleeping\n")); 5074 5075 /* 5076 * may go through without blocking on SMP systems 5077 * if restart has been received already by the time we call down() 5078 */ 5079 ret = wait_for_completion_interruptible(&ctx->ctx_restart_done); 5080 5081 DPRINT(("after block sleeping ret=%d\n", ret)); 5082 5083 /* 5084 * lock context and mask interrupts again 5085 * We save flags into a dummy because we may have 5086 * altered interrupts mask compared to entry in this 5087 * function. 5088 */ 5089 PROTECT_CTX(ctx, dummy_flags); 5090 5091 /* 5092 * we need to read the ovfl_regs only after wake-up 5093 * because we may have had pfm_write_pmds() in between 5094 * and that can changed PMD values and therefore 5095 * ovfl_regs is reset for these new PMD values. 5096 */ 5097 ovfl_regs = ctx->ctx_ovfl_regs[0]; 5098 5099 if (ctx->ctx_fl_going_zombie) { 5100do_zombie: 5101 DPRINT(("context is zombie, bailing out\n")); 5102 pfm_context_force_terminate(ctx, regs); 5103 goto nothing_to_do; 5104 } 5105 /* 5106 * in case of interruption of down() we don't restart anything 5107 */ 5108 if (ret < 0) 5109 goto nothing_to_do; 5110 5111skip_blocking: 5112 pfm_resume_after_ovfl(ctx, ovfl_regs, regs); 5113 ctx->ctx_ovfl_regs[0] = 0UL; 5114 5115nothing_to_do: 5116 /* 5117 * restore flags as they were upon entry 5118 */ 5119 UNPROTECT_CTX(ctx, flags); 5120} 5121 5122static int 5123pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg) 5124{ 5125 if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5126 DPRINT(("ignoring overflow notification, owner is zombie\n")); 5127 return 0; 5128 } 5129 5130 DPRINT(("waking up somebody\n")); 5131 5132 if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait); 5133 5134 /* 5135 * safe, we are not in intr handler, nor in ctxsw when 5136 * we come here 5137 */ 5138 kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN); 5139 5140 return 0; 5141} 5142 5143static int 5144pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) 5145{ 5146 pfm_msg_t *msg = NULL; 5147 5148 if (ctx->ctx_fl_no_msg == 0) { 5149 msg = pfm_get_new_msg(ctx); 5150 if (msg == NULL) { 5151 printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n"); 5152 return -1; 5153 } 5154 5155 msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; 5156 msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd; 5157 msg->pfm_ovfl_msg.msg_active_set = 0; 5158 msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds; 5159 msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; 5160 msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; 5161 msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; 5162 msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5163 } 5164 5165 DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n", 5166 msg, 5167 ctx->ctx_fl_no_msg, 5168 ctx->ctx_fd, 5169 ovfl_pmds)); 5170 5171 return pfm_notify_user(ctx, msg); 5172} 5173 5174static int 5175pfm_end_notify_user(pfm_context_t *ctx) 5176{ 5177 pfm_msg_t *msg; 5178 5179 msg = pfm_get_new_msg(ctx); 5180 if (msg == NULL) { 5181 printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); 5182 return -1; 5183 } 5184 /* no leak */ 5185 memset(msg, 0, sizeof(*msg)); 5186 5187 msg->pfm_end_msg.msg_type = PFM_MSG_END; 5188 msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; 5189 msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5190 5191 DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n", 5192 msg, 5193 ctx->ctx_fl_no_msg, 5194 ctx->ctx_fd)); 5195 5196 return pfm_notify_user(ctx, msg); 5197} 5198 5199/* 5200 * main overflow processing routine. 5201 * it can be called from the interrupt path or explicitly during the context switch code 5202 */ 5203static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, 5204 unsigned long pmc0, struct pt_regs *regs) 5205{ 5206 pfm_ovfl_arg_t *ovfl_arg; 5207 unsigned long mask; 5208 unsigned long old_val, ovfl_val, new_val; 5209 unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; 5210 unsigned long tstamp; 5211 pfm_ovfl_ctrl_t ovfl_ctrl; 5212 unsigned int i, has_smpl; 5213 int must_notify = 0; 5214 5215 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring; 5216 5217 /* 5218 * sanity test. Should never happen 5219 */ 5220 if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; 5221 5222 tstamp = ia64_get_itc(); 5223 mask = pmc0 >> PMU_FIRST_COUNTER; 5224 ovfl_val = pmu_conf->ovfl_val; 5225 has_smpl = CTX_HAS_SMPL(ctx); 5226 5227 DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " 5228 "used_pmds=0x%lx\n", 5229 pmc0, 5230 task ? task_pid_nr(task): -1, 5231 (regs ? regs->cr_iip : 0), 5232 CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", 5233 ctx->ctx_used_pmds[0])); 5234 5235 5236 /* 5237 * first we update the virtual counters 5238 * assume there was a prior ia64_srlz_d() issued 5239 */ 5240 for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { 5241 5242 /* skip pmd which did not overflow */ 5243 if ((mask & 0x1) == 0) continue; 5244 5245 /* 5246 * Note that the pmd is not necessarily 0 at this point as qualified events 5247 * may have happened before the PMU was frozen. The residual count is not 5248 * taken into consideration here but will be with any read of the pmd via 5249 * pfm_read_pmds(). 5250 */ 5251 old_val = new_val = ctx->ctx_pmds[i].val; 5252 new_val += 1 + ovfl_val; 5253 ctx->ctx_pmds[i].val = new_val; 5254 5255 /* 5256 * check for overflow condition 5257 */ 5258 if (likely(old_val > new_val)) { 5259 ovfl_pmds |= 1UL << i; 5260 if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; 5261 } 5262 5263 DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", 5264 i, 5265 new_val, 5266 old_val, 5267 ia64_get_pmd(i) & ovfl_val, 5268 ovfl_pmds, 5269 ovfl_notify)); 5270 } 5271 5272 /* 5273 * there was no 64-bit overflow, nothing else to do 5274 */ 5275 if (ovfl_pmds == 0UL) return; 5276 5277 /* 5278 * reset all control bits 5279 */ 5280 ovfl_ctrl.val = 0; 5281 reset_pmds = 0UL; 5282 5283 /* 5284 * if a sampling format module exists, then we "cache" the overflow by 5285 * calling the module's handler() routine. 5286 */ 5287 if (has_smpl) { 5288 unsigned long start_cycles, end_cycles; 5289 unsigned long pmd_mask; 5290 int j, k, ret = 0; 5291 int this_cpu = smp_processor_id(); 5292 5293 pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; 5294 ovfl_arg = &ctx->ctx_ovfl_arg; 5295 5296 prefetch(ctx->ctx_smpl_hdr); 5297 5298 for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { 5299 5300 mask = 1UL << i; 5301 5302 if ((pmd_mask & 0x1) == 0) continue; 5303 5304 ovfl_arg->ovfl_pmd = (unsigned char )i; 5305 ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; 5306 ovfl_arg->active_set = 0; 5307 ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ 5308 ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; 5309 5310 ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; 5311 ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; 5312 ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; 5313 5314 /* 5315 * copy values of pmds of interest. Sampling format may copy them 5316 * into sampling buffer. 5317 */ 5318 if (smpl_pmds) { 5319 for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { 5320 if ((smpl_pmds & 0x1) == 0) continue; 5321 ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); 5322 DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); 5323 } 5324 } 5325 5326 pfm_stats[this_cpu].pfm_smpl_handler_calls++; 5327 5328 start_cycles = ia64_get_itc(); 5329 5330 /* 5331 * call custom buffer format record (handler) routine 5332 */ 5333 ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); 5334 5335 end_cycles = ia64_get_itc(); 5336 5337 /* 5338 * For those controls, we take the union because they have 5339 * an all or nothing behavior. 5340 */ 5341 ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; 5342 ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; 5343 ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; 5344 /* 5345 * build the bitmask of pmds to reset now 5346 */ 5347 if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; 5348 5349 pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; 5350 } 5351 /* 5352 * when the module cannot handle the rest of the overflows, we abort right here 5353 */ 5354 if (ret && pmd_mask) { 5355 DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", 5356 pmd_mask<<PMU_FIRST_COUNTER)); 5357 } 5358 /* 5359 * remove the pmds we reset now from the set of pmds to reset in pfm_restart() 5360 */ 5361 ovfl_pmds &= ~reset_pmds; 5362 } else { 5363 /* 5364 * when no sampling module is used, then the default 5365 * is to notify on overflow if requested by user 5366 */ 5367 ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0; 5368 ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0; 5369 ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */ 5370 ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1; 5371 /* 5372 * if needed, we reset all overflowed pmds 5373 */ 5374 if (ovfl_notify == 0) reset_pmds = ovfl_pmds; 5375 } 5376 5377 DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds)); 5378 5379 /* 5380 * reset the requested PMD registers using the short reset values 5381 */ 5382 if (reset_pmds) { 5383 unsigned long bm = reset_pmds; 5384 pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET); 5385 } 5386 5387 if (ovfl_notify && ovfl_ctrl.bits.notify_user) { 5388 /* 5389 * keep track of what to reset when unblocking 5390 */ 5391 ctx->ctx_ovfl_regs[0] = ovfl_pmds; 5392 5393 /* 5394 * check for blocking context 5395 */ 5396 if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) { 5397 5398 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK; 5399 5400 /* 5401 * set the perfmon specific checking pending work for the task 5402 */ 5403 PFM_SET_WORK_PENDING(task, 1); 5404 5405 /* 5406 * when coming from ctxsw, current still points to the 5407 * previous task, therefore we must work with task and not current. 5408 */ 5409 set_notify_resume(task); 5410 } 5411 /* 5412 * defer until state is changed (shorten spin window). the context is locked 5413 * anyway, so the signal receiver would come spin for nothing. 5414 */ 5415 must_notify = 1; 5416 } 5417 5418 DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", 5419 GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1, 5420 PFM_GET_WORK_PENDING(task), 5421 ctx->ctx_fl_trap_reason, 5422 ovfl_pmds, 5423 ovfl_notify, 5424 ovfl_ctrl.bits.mask_monitoring ? 1 : 0)); 5425 /* 5426 * in case monitoring must be stopped, we toggle the psr bits 5427 */ 5428 if (ovfl_ctrl.bits.mask_monitoring) { 5429 pfm_mask_monitoring(task); 5430 ctx->ctx_state = PFM_CTX_MASKED; 5431 ctx->ctx_fl_can_restart = 1; 5432 } 5433 5434 /* 5435 * send notification now 5436 */ 5437 if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify); 5438 5439 return; 5440 5441sanity_check: 5442 printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", 5443 smp_processor_id(), 5444 task ? task_pid_nr(task) : -1, 5445 pmc0); 5446 return; 5447 5448stop_monitoring: 5449 /* 5450 * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). 5451 * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can 5452 * come here as zombie only if the task is the current task. In which case, we 5453 * can access the PMU hardware directly. 5454 * 5455 * Note that zombies do have PM_VALID set. So here we do the minimal. 5456 * 5457 * In case the context was zombified it could not be reclaimed at the time 5458 * the monitoring program exited. At this point, the PMU reservation has been 5459 * returned, the sampiing buffer has been freed. We must convert this call 5460 * into a spurious interrupt. However, we must also avoid infinite overflows 5461 * by stopping monitoring for this task. We can only come here for a per-task 5462 * context. All we need to do is to stop monitoring using the psr bits which 5463 * are always task private. By re-enabling secure montioring, we ensure that 5464 * the monitored task will not be able to re-activate monitoring. 5465 * The task will eventually be context switched out, at which point the context 5466 * will be reclaimed (that includes releasing ownership of the PMU). 5467 * 5468 * So there might be a window of time where the number of per-task session is zero 5469 * yet one PMU might have a owner and get at most one overflow interrupt for a zombie 5470 * context. This is safe because if a per-task session comes in, it will push this one 5471 * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide 5472 * session is force on that CPU, given that we use task pinning, pfm_save_regs() will 5473 * also push our zombie context out. 5474 * 5475 * Overall pretty hairy stuff.... 5476 */ 5477 DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1)); 5478 pfm_clear_psr_up(); 5479 ia64_psr(regs)->up = 0; 5480 ia64_psr(regs)->sp = 1; 5481 return; 5482} 5483 5484static int 5485pfm_do_interrupt_handler(void *arg, struct pt_regs *regs) 5486{ 5487 struct task_struct *task; 5488 pfm_context_t *ctx; 5489 unsigned long flags; 5490 u64 pmc0; 5491 int this_cpu = smp_processor_id(); 5492 int retval = 0; 5493 5494 pfm_stats[this_cpu].pfm_ovfl_intr_count++; 5495 5496 /* 5497 * srlz.d done before arriving here 5498 */ 5499 pmc0 = ia64_get_pmc(0); 5500 5501 task = GET_PMU_OWNER(); 5502 ctx = GET_PMU_CTX(); 5503 5504 /* 5505 * if we have some pending bits set 5506 * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 5507 */ 5508 if (PMC0_HAS_OVFL(pmc0) && task) { 5509 /* 5510 * we assume that pmc0.fr is always set here 5511 */ 5512 5513 /* sanity check */ 5514 if (!ctx) goto report_spurious1; 5515 5516 if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) 5517 goto report_spurious2; 5518 5519 PROTECT_CTX_NOPRINT(ctx, flags); 5520 5521 pfm_overflow_handler(task, ctx, pmc0, regs); 5522 5523 UNPROTECT_CTX_NOPRINT(ctx, flags); 5524 5525 } else { 5526 pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++; 5527 retval = -1; 5528 } 5529 /* 5530 * keep it unfrozen at all times 5531 */ 5532 pfm_unfreeze_pmu(); 5533 5534 return retval; 5535 5536report_spurious1: 5537 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", 5538 this_cpu, task_pid_nr(task)); 5539 pfm_unfreeze_pmu(); 5540 return -1; 5541report_spurious2: 5542 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 5543 this_cpu, 5544 task_pid_nr(task)); 5545 pfm_unfreeze_pmu(); 5546 return -1; 5547} 5548 5549static irqreturn_t 5550pfm_interrupt_handler(int irq, void *arg) 5551{ 5552 unsigned long start_cycles, total_cycles; 5553 unsigned long min, max; 5554 int this_cpu; 5555 int ret; 5556 struct pt_regs *regs = get_irq_regs(); 5557 5558 this_cpu = get_cpu(); 5559 if (likely(!pfm_alt_intr_handler)) { 5560 min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; 5561 max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; 5562 5563 start_cycles = ia64_get_itc(); 5564 5565 ret = pfm_do_interrupt_handler(arg, regs); 5566 5567 total_cycles = ia64_get_itc(); 5568 5569 /* 5570 * don't measure spurious interrupts 5571 */ 5572 if (likely(ret == 0)) { 5573 total_cycles -= start_cycles; 5574 5575 if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; 5576 if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; 5577 5578 pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; 5579 } 5580 } 5581 else { 5582 (*pfm_alt_intr_handler->handler)(irq, arg, regs); 5583 } 5584 5585 put_cpu(); 5586 return IRQ_HANDLED; 5587} 5588 5589/* 5590 * /proc/perfmon interface, for debug only 5591 */ 5592 5593#define PFM_PROC_SHOW_HEADER ((void *)(long)nr_cpu_ids+1) 5594 5595static void * 5596pfm_proc_start(struct seq_file *m, loff_t *pos) 5597{ 5598 if (*pos == 0) { 5599 return PFM_PROC_SHOW_HEADER; 5600 } 5601 5602 while (*pos <= nr_cpu_ids) { 5603 if (cpu_online(*pos - 1)) { 5604 return (void *)*pos; 5605 } 5606 ++*pos; 5607 } 5608 return NULL; 5609} 5610 5611static void * 5612pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) 5613{ 5614 ++*pos; 5615 return pfm_proc_start(m, pos); 5616} 5617 5618static void 5619pfm_proc_stop(struct seq_file *m, void *v) 5620{ 5621} 5622 5623static void 5624pfm_proc_show_header(struct seq_file *m) 5625{ 5626 struct list_head * pos; 5627 pfm_buffer_fmt_t * entry; 5628 unsigned long flags; 5629 5630 seq_printf(m, 5631 "perfmon version : %u.%u\n" 5632 "model : %s\n" 5633 "fastctxsw : %s\n" 5634 "expert mode : %s\n" 5635 "ovfl_mask : 0x%lx\n" 5636 "PMU flags : 0x%x\n", 5637 PFM_VERSION_MAJ, PFM_VERSION_MIN, 5638 pmu_conf->pmu_name, 5639 pfm_sysctl.fastctxsw > 0 ? "Yes": "No", 5640 pfm_sysctl.expert_mode > 0 ? "Yes": "No", 5641 pmu_conf->ovfl_val, 5642 pmu_conf->flags); 5643 5644 LOCK_PFS(flags); 5645 5646 seq_printf(m, 5647 "proc_sessions : %u\n" 5648 "sys_sessions : %u\n" 5649 "sys_use_dbregs : %u\n" 5650 "ptrace_use_dbregs : %u\n", 5651 pfm_sessions.pfs_task_sessions, 5652 pfm_sessions.pfs_sys_sessions, 5653 pfm_sessions.pfs_sys_use_dbregs, 5654 pfm_sessions.pfs_ptrace_use_dbregs); 5655 5656 UNLOCK_PFS(flags); 5657 5658 spin_lock(&pfm_buffer_fmt_lock); 5659 5660 list_for_each(pos, &pfm_buffer_fmt_list) { 5661 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 5662 seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n", 5663 entry->fmt_uuid[0], 5664 entry->fmt_uuid[1], 5665 entry->fmt_uuid[2], 5666 entry->fmt_uuid[3], 5667 entry->fmt_uuid[4], 5668 entry->fmt_uuid[5], 5669 entry->fmt_uuid[6], 5670 entry->fmt_uuid[7], 5671 entry->fmt_uuid[8], 5672 entry->fmt_uuid[9], 5673 entry->fmt_uuid[10], 5674 entry->fmt_uuid[11], 5675 entry->fmt_uuid[12], 5676 entry->fmt_uuid[13], 5677 entry->fmt_uuid[14], 5678 entry->fmt_uuid[15], 5679 entry->fmt_name); 5680 } 5681 spin_unlock(&pfm_buffer_fmt_lock); 5682 5683} 5684 5685static int 5686pfm_proc_show(struct seq_file *m, void *v) 5687{ 5688 unsigned long psr; 5689 unsigned int i; 5690 int cpu; 5691 5692 if (v == PFM_PROC_SHOW_HEADER) { 5693 pfm_proc_show_header(m); 5694 return 0; 5695 } 5696 5697 /* show info for CPU (v - 1) */ 5698 5699 cpu = (long)v - 1; 5700 seq_printf(m, 5701 "CPU%-2d overflow intrs : %lu\n" 5702 "CPU%-2d overflow cycles : %lu\n" 5703 "CPU%-2d overflow min : %lu\n" 5704 "CPU%-2d overflow max : %lu\n" 5705 "CPU%-2d smpl handler calls : %lu\n" 5706 "CPU%-2d smpl handler cycles : %lu\n" 5707 "CPU%-2d spurious intrs : %lu\n" 5708 "CPU%-2d replay intrs : %lu\n" 5709 "CPU%-2d syst_wide : %d\n" 5710 "CPU%-2d dcr_pp : %d\n" 5711 "CPU%-2d exclude idle : %d\n" 5712 "CPU%-2d owner : %d\n" 5713 "CPU%-2d context : %p\n" 5714 "CPU%-2d activations : %lu\n", 5715 cpu, pfm_stats[cpu].pfm_ovfl_intr_count, 5716 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, 5717 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, 5718 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, 5719 cpu, pfm_stats[cpu].pfm_smpl_handler_calls, 5720 cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, 5721 cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, 5722 cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, 5723 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, 5724 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, 5725 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, 5726 cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, 5727 cpu, pfm_get_cpu_data(pmu_ctx, cpu), 5728 cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); 5729 5730 if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { 5731 5732 psr = pfm_get_psr(); 5733 5734 ia64_srlz_d(); 5735 5736 seq_printf(m, 5737 "CPU%-2d psr : 0x%lx\n" 5738 "CPU%-2d pmc0 : 0x%lx\n", 5739 cpu, psr, 5740 cpu, ia64_get_pmc(0)); 5741 5742 for (i=0; PMC_IS_LAST(i) == 0; i++) { 5743 if (PMC_IS_COUNTING(i) == 0) continue; 5744 seq_printf(m, 5745 "CPU%-2d pmc%u : 0x%lx\n" 5746 "CPU%-2d pmd%u : 0x%lx\n", 5747 cpu, i, ia64_get_pmc(i), 5748 cpu, i, ia64_get_pmd(i)); 5749 } 5750 } 5751 return 0; 5752} 5753 5754const struct seq_operations pfm_seq_ops = { 5755 .start = pfm_proc_start, 5756 .next = pfm_proc_next, 5757 .stop = pfm_proc_stop, 5758 .show = pfm_proc_show 5759}; 5760 5761static int 5762pfm_proc_open(struct inode *inode, struct file *file) 5763{ 5764 return seq_open(file, &pfm_seq_ops); 5765} 5766 5767 5768/* 5769 * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens 5770 * during pfm_enable() hence before pfm_start(). We cannot assume monitoring 5771 * is active or inactive based on mode. We must rely on the value in 5772 * local_cpu_data->pfm_syst_info 5773 */ 5774void 5775pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) 5776{ 5777 struct pt_regs *regs; 5778 unsigned long dcr; 5779 unsigned long dcr_pp; 5780 5781 dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; 5782 5783 /* 5784 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 5785 * on every CPU, so we can rely on the pid to identify the idle task. 5786 */ 5787 if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { 5788 regs = task_pt_regs(task); 5789 ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; 5790 return; 5791 } 5792 /* 5793 * if monitoring has started 5794 */ 5795 if (dcr_pp) { 5796 dcr = ia64_getreg(_IA64_REG_CR_DCR); 5797 /* 5798 * context switching in? 5799 */ 5800 if (is_ctxswin) { 5801 /* mask monitoring for the idle task */ 5802 ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); 5803 pfm_clear_psr_pp(); 5804 ia64_srlz_i(); 5805 return; 5806 } 5807 /* 5808 * context switching out 5809 * restore monitoring for next task 5810 * 5811 * Due to inlining this odd if-then-else construction generates 5812 * better code. 5813 */ 5814 ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); 5815 pfm_set_psr_pp(); 5816 ia64_srlz_i(); 5817 } 5818} 5819 5820#ifdef CONFIG_SMP 5821 5822static void 5823pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) 5824{ 5825 struct task_struct *task = ctx->ctx_task; 5826 5827 ia64_psr(regs)->up = 0; 5828 ia64_psr(regs)->sp = 1; 5829 5830 if (GET_PMU_OWNER() == task) { 5831 DPRINT(("cleared ownership for [%d]\n", 5832 task_pid_nr(ctx->ctx_task))); 5833 SET_PMU_OWNER(NULL, NULL); 5834 } 5835 5836 /* 5837 * disconnect the task from the context and vice-versa 5838 */ 5839 PFM_SET_WORK_PENDING(task, 0); 5840 5841 task->thread.pfm_context = NULL; 5842 task->thread.flags &= ~IA64_THREAD_PM_VALID; 5843 5844 DPRINT(("force cleanup for [%d]\n", task_pid_nr(task))); 5845} 5846 5847 5848/* 5849 * in 2.6, interrupts are masked when we come here and the runqueue lock is held 5850 */ 5851void 5852pfm_save_regs(struct task_struct *task) 5853{ 5854 pfm_context_t *ctx; 5855 unsigned long flags; 5856 u64 psr; 5857 5858 5859 ctx = PFM_GET_CTX(task); 5860 if (ctx == NULL) return; 5861 5862 /* 5863 * we always come here with interrupts ALREADY disabled by 5864 * the scheduler. So we simply need to protect against concurrent 5865 * access, not CPU concurrency. 5866 */ 5867 flags = pfm_protect_ctx_ctxsw(ctx); 5868 5869 if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5870 struct pt_regs *regs = task_pt_regs(task); 5871 5872 pfm_clear_psr_up(); 5873 5874 pfm_force_cleanup(ctx, regs); 5875 5876 BUG_ON(ctx->ctx_smpl_hdr); 5877 5878 pfm_unprotect_ctx_ctxsw(ctx, flags); 5879 5880 pfm_context_free(ctx); 5881 return; 5882 } 5883 5884 /* 5885 * save current PSR: needed because we modify it 5886 */ 5887 ia64_srlz_d(); 5888 psr = pfm_get_psr(); 5889 5890 BUG_ON(psr & (IA64_PSR_I)); 5891 5892 /* 5893 * stop monitoring: 5894 * This is the last instruction which may generate an overflow 5895 * 5896 * We do not need to set psr.sp because, it is irrelevant in kernel. 5897 * It will be restored from ipsr when going back to user level 5898 */ 5899 pfm_clear_psr_up(); 5900 5901 /* 5902 * keep a copy of psr.up (for reload) 5903 */ 5904 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5905 5906 /* 5907 * release ownership of this PMU. 5908 * PM interrupts are masked, so nothing 5909 * can happen. 5910 */ 5911 SET_PMU_OWNER(NULL, NULL); 5912 5913 /* 5914 * we systematically save the PMD as we have no 5915 * guarantee we will be schedule at that same 5916 * CPU again. 5917 */ 5918 pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 5919 5920 /* 5921 * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 5922 * we will need it on the restore path to check 5923 * for pending overflow. 5924 */ 5925 ctx->th_pmcs[0] = ia64_get_pmc(0); 5926 5927 /* 5928 * unfreeze PMU if had pending overflows 5929 */ 5930 if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 5931 5932 /* 5933 * finally, allow context access. 5934 * interrupts will still be masked after this call. 5935 */ 5936 pfm_unprotect_ctx_ctxsw(ctx, flags); 5937} 5938 5939#else /* !CONFIG_SMP */ 5940void 5941pfm_save_regs(struct task_struct *task) 5942{ 5943 pfm_context_t *ctx; 5944 u64 psr; 5945 5946 ctx = PFM_GET_CTX(task); 5947 if (ctx == NULL) return; 5948 5949 /* 5950 * save current PSR: needed because we modify it 5951 */ 5952 psr = pfm_get_psr(); 5953 5954 BUG_ON(psr & (IA64_PSR_I)); 5955 5956 /* 5957 * stop monitoring: 5958 * This is the last instruction which may generate an overflow 5959 * 5960 * We do not need to set psr.sp because, it is irrelevant in kernel. 5961 * It will be restored from ipsr when going back to user level 5962 */ 5963 pfm_clear_psr_up(); 5964 5965 /* 5966 * keep a copy of psr.up (for reload) 5967 */ 5968 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5969} 5970 5971static void 5972pfm_lazy_save_regs (struct task_struct *task) 5973{ 5974 pfm_context_t *ctx; 5975 unsigned long flags; 5976 5977 { u64 psr = pfm_get_psr(); 5978 BUG_ON(psr & IA64_PSR_UP); 5979 } 5980 5981 ctx = PFM_GET_CTX(task); 5982 5983 /* 5984 * we need to mask PMU overflow here to 5985 * make sure that we maintain pmc0 until 5986 * we save it. overflow interrupts are 5987 * treated as spurious if there is no 5988 * owner. 5989 * 5990 * XXX: I don't think this is necessary 5991 */ 5992 PROTECT_CTX(ctx,flags); 5993 5994 /* 5995 * release ownership of this PMU. 5996 * must be done before we save the registers. 5997 * 5998 * after this call any PMU interrupt is treated 5999 * as spurious. 6000 */ 6001 SET_PMU_OWNER(NULL, NULL); 6002 6003 /* 6004 * save all the pmds we use 6005 */ 6006 pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 6007 6008 /* 6009 * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 6010 * it is needed to check for pended overflow 6011 * on the restore path 6012 */ 6013 ctx->th_pmcs[0] = ia64_get_pmc(0); 6014 6015 /* 6016 * unfreeze PMU if had pending overflows 6017 */ 6018 if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 6019 6020 /* 6021 * now get can unmask PMU interrupts, they will 6022 * be treated as purely spurious and we will not 6023 * lose any information 6024 */ 6025 UNPROTECT_CTX(ctx,flags); 6026} 6027#endif /* CONFIG_SMP */ 6028 6029#ifdef CONFIG_SMP 6030/* 6031 * in 2.6, interrupts are masked when we come here and the runqueue lock is held 6032 */ 6033void 6034pfm_load_regs (struct task_struct *task) 6035{ 6036 pfm_context_t *ctx; 6037 unsigned long pmc_mask = 0UL, pmd_mask = 0UL; 6038 unsigned long flags; 6039 u64 psr, psr_up; 6040 int need_irq_resend; 6041 6042 ctx = PFM_GET_CTX(task); 6043 if (unlikely(ctx == NULL)) return; 6044 6045 BUG_ON(GET_PMU_OWNER()); 6046 6047 /* 6048 * possible on unload 6049 */ 6050 if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; 6051 6052 /* 6053 * we always come here with interrupts ALREADY disabled by 6054 * the scheduler. So we simply need to protect against concurrent 6055 * access, not CPU concurrency. 6056 */ 6057 flags = pfm_protect_ctx_ctxsw(ctx); 6058 psr = pfm_get_psr(); 6059 6060 need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; 6061 6062 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 6063 BUG_ON(psr & IA64_PSR_I); 6064 6065 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { 6066 struct pt_regs *regs = task_pt_regs(task); 6067 6068 BUG_ON(ctx->ctx_smpl_hdr); 6069 6070 pfm_force_cleanup(ctx, regs); 6071 6072 pfm_unprotect_ctx_ctxsw(ctx, flags); 6073 6074 /* 6075 * this one (kmalloc'ed) is fine with interrupts disabled 6076 */ 6077 pfm_context_free(ctx); 6078 6079 return; 6080 } 6081 6082 /* 6083 * we restore ALL the debug registers to avoid picking up 6084 * stale state. 6085 */ 6086 if (ctx->ctx_fl_using_dbreg) { 6087 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 6088 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 6089 } 6090 /* 6091 * retrieve saved psr.up 6092 */ 6093 psr_up = ctx->ctx_saved_psr_up; 6094 6095 /* 6096 * if we were the last user of the PMU on that CPU, 6097 * then nothing to do except restore psr 6098 */ 6099 if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) { 6100 6101 /* 6102 * retrieve partial reload masks (due to user modifications) 6103 */ 6104 pmc_mask = ctx->ctx_reload_pmcs[0]; 6105 pmd_mask = ctx->ctx_reload_pmds[0]; 6106 6107 } else { 6108 /* 6109 * To avoid leaking information to the user level when psr.sp=0, 6110 * we must reload ALL implemented pmds (even the ones we don't use). 6111 * In the kernel we only allow PFM_READ_PMDS on registers which 6112 * we initialized or requested (sampling) so there is no risk there. 6113 */ 6114 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; 6115 6116 /* 6117 * ALL accessible PMCs are systematically reloaded, unused registers 6118 * get their default (from pfm_reset_pmu_state()) values to avoid picking 6119 * up stale configuration. 6120 * 6121 * PMC0 is never in the mask. It is always restored separately. 6122 */ 6123 pmc_mask = ctx->ctx_all_pmcs[0]; 6124 } 6125 /* 6126 * when context is MASKED, we will restore PMC with plm=0 6127 * and PMD with stale information, but that's ok, nothing 6128 * will be captured. 6129 * 6130 * XXX: optimize here 6131 */ 6132 if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); 6133 if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); 6134 6135 /* 6136 * check for pending overflow at the time the state 6137 * was saved. 6138 */ 6139 if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { 6140 /* 6141 * reload pmc0 with the overflow information 6142 * On McKinley PMU, this will trigger a PMU interrupt 6143 */ 6144 ia64_set_pmc(0, ctx->th_pmcs[0]); 6145 ia64_srlz_d(); 6146 ctx->th_pmcs[0] = 0UL; 6147 6148 /* 6149 * will replay the PMU interrupt 6150 */ 6151 if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); 6152 6153 pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; 6154 } 6155 6156 /* 6157 * we just did a reload, so we reset the partial reload fields 6158 */ 6159 ctx->ctx_reload_pmcs[0] = 0UL; 6160 ctx->ctx_reload_pmds[0] = 0UL; 6161 6162 SET_LAST_CPU(ctx, smp_processor_id()); 6163 6164 /* 6165 * dump activation value for this PMU 6166 */ 6167 INC_ACTIVATION(); 6168 /* 6169 * record current activation for this context 6170 */ 6171 SET_ACTIVATION(ctx); 6172 6173 /* 6174 * establish new ownership. 6175 */ 6176 SET_PMU_OWNER(task, ctx); 6177 6178 /* 6179 * restore the psr.up bit. measurement 6180 * is active again. 6181 * no PMU interrupt can happen at this point 6182 * because we still have interrupts disabled. 6183 */ 6184 if (likely(psr_up)) pfm_set_psr_up(); 6185 6186 /* 6187 * allow concurrent access to context 6188 */ 6189 pfm_unprotect_ctx_ctxsw(ctx, flags); 6190} 6191#else /* !CONFIG_SMP */ 6192/* 6193 * reload PMU state for UP kernels 6194 * in 2.5 we come here with interrupts disabled 6195 */ 6196void 6197pfm_load_regs (struct task_struct *task) 6198{ 6199 pfm_context_t *ctx; 6200 struct task_struct *owner; 6201 unsigned long pmd_mask, pmc_mask; 6202 u64 psr, psr_up; 6203 int need_irq_resend; 6204 6205 owner = GET_PMU_OWNER(); 6206 ctx = PFM_GET_CTX(task); 6207 psr = pfm_get_psr(); 6208 6209 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 6210 BUG_ON(psr & IA64_PSR_I); 6211 6212 /* 6213 * we restore ALL the debug registers to avoid picking up 6214 * stale state. 6215 * 6216 * This must be done even when the task is still the owner 6217 * as the registers may have been modified via ptrace() 6218 * (not perfmon) by the previous task. 6219 */ 6220 if (ctx->ctx_fl_using_dbreg) { 6221 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 6222 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 6223 } 6224 6225 /* 6226 * retrieved saved psr.up 6227 */ 6228 psr_up = ctx->ctx_saved_psr_up; 6229 need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; 6230 6231 /* 6232 * short path, our state is still there, just 6233 * need to restore psr and we go 6234 * 6235 * we do not touch either PMC nor PMD. the psr is not touched 6236 * by the overflow_handler. So we are safe w.r.t. to interrupt 6237 * concurrency even without interrupt masking. 6238 */ 6239 if (likely(owner == task)) { 6240 if (likely(psr_up)) pfm_set_psr_up(); 6241 return; 6242 } 6243 6244 /* 6245 * someone else is still using the PMU, first push it out and 6246 * then we'll be able to install our stuff ! 6247 * 6248 * Upon return, there will be no owner for the current PMU 6249 */ 6250 if (owner) pfm_lazy_save_regs(owner); 6251 6252 /* 6253 * To avoid leaking information to the user level when psr.sp=0, 6254 * we must reload ALL implemented pmds (even the ones we don't use). 6255 * In the kernel we only allow PFM_READ_PMDS on registers which 6256 * we initialized or requested (sampling) so there is no risk there. 6257 */ 6258 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; 6259 6260 /* 6261 * ALL accessible PMCs are systematically reloaded, unused registers 6262 * get their default (from pfm_reset_pmu_state()) values to avoid picking 6263 * up stale configuration. 6264 * 6265 * PMC0 is never in the mask. It is always restored separately 6266 */ 6267 pmc_mask = ctx->ctx_all_pmcs[0]; 6268 6269 pfm_restore_pmds(ctx->th_pmds, pmd_mask); 6270 pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); 6271 6272 /* 6273 * check for pending overflow at the time the state 6274 * was saved. 6275 */ 6276 if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { 6277 /* 6278 * reload pmc0 with the overflow information 6279 * On McKinley PMU, this will trigger a PMU interrupt 6280 */ 6281 ia64_set_pmc(0, ctx->th_pmcs[0]); 6282 ia64_srlz_d(); 6283 6284 ctx->th_pmcs[0] = 0UL; 6285 6286 /* 6287 * will replay the PMU interrupt 6288 */ 6289 if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); 6290 6291 pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; 6292 } 6293 6294 /* 6295 * establish new ownership. 6296 */ 6297 SET_PMU_OWNER(task, ctx); 6298 6299 /* 6300 * restore the psr.up bit. measurement 6301 * is active again. 6302 * no PMU interrupt can happen at this point 6303 * because we still have interrupts disabled. 6304 */ 6305 if (likely(psr_up)) pfm_set_psr_up(); 6306} 6307#endif /* CONFIG_SMP */ 6308 6309/* 6310 * this function assumes monitoring is stopped 6311 */ 6312static void 6313pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) 6314{ 6315 u64 pmc0; 6316 unsigned long mask2, val, pmd_val, ovfl_val; 6317 int i, can_access_pmu = 0; 6318 int is_self; 6319 6320 /* 6321 * is the caller the task being monitored (or which initiated the 6322 * session for system wide measurements) 6323 */ 6324 is_self = ctx->ctx_task == task ? 1 : 0; 6325 6326 /* 6327 * can access PMU is task is the owner of the PMU state on the current CPU 6328 * or if we are running on the CPU bound to the context in system-wide mode 6329 * (that is not necessarily the task the context is attached to in this mode). 6330 * In system-wide we always have can_access_pmu true because a task running on an 6331 * invalid processor is flagged earlier in the call stack (see pfm_stop). 6332 */ 6333 can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id()); 6334 if (can_access_pmu) { 6335 /* 6336 * Mark the PMU as not owned 6337 * This will cause the interrupt handler to do nothing in case an overflow 6338 * interrupt was in-flight 6339 * This also guarantees that pmc0 will contain the final state 6340 * It virtually gives us full control on overflow processing from that point 6341 * on. 6342 */ 6343 SET_PMU_OWNER(NULL, NULL); 6344 DPRINT(("releasing ownership\n")); 6345 6346 /* 6347 * read current overflow status: 6348 * 6349 * we are guaranteed to read the final stable state 6350 */ 6351 ia64_srlz_d(); 6352 pmc0 = ia64_get_pmc(0); /* slow */ 6353 6354 /* 6355 * reset freeze bit, overflow status information destroyed 6356 */ 6357 pfm_unfreeze_pmu(); 6358 } else { 6359 pmc0 = ctx->th_pmcs[0]; 6360 /* 6361 * clear whatever overflow status bits there were 6362 */ 6363 ctx->th_pmcs[0] = 0; 6364 } 6365 ovfl_val = pmu_conf->ovfl_val; 6366 /* 6367 * we save all the used pmds 6368 * we take care of overflows for counting PMDs 6369 * 6370 * XXX: sampling situation is not taken into account here 6371 */ 6372 mask2 = ctx->ctx_used_pmds[0]; 6373 6374 DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2)); 6375 6376 for (i = 0; mask2; i++, mask2>>=1) { 6377 6378 /* skip non used pmds */ 6379 if ((mask2 & 0x1) == 0) continue; 6380 6381 /* 6382 * can access PMU always true in system wide mode 6383 */ 6384 val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : ctx->th_pmds[i]; 6385 6386 if (PMD_IS_COUNTING(i)) { 6387 DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", 6388 task_pid_nr(task), 6389 i, 6390 ctx->ctx_pmds[i].val, 6391 val & ovfl_val)); 6392 6393 /* 6394 * we rebuild the full 64 bit value of the counter 6395 */ 6396 val = ctx->ctx_pmds[i].val + (val & ovfl_val); 6397 6398 /* 6399 * now everything is in ctx_pmds[] and we need 6400 * to clear the saved context from save_regs() such that 6401 * pfm_read_pmds() gets the correct value 6402 */ 6403 pmd_val = 0UL; 6404 6405 /* 6406 * take care of overflow inline 6407 */ 6408 if (pmc0 & (1UL << i)) { 6409 val += 1 + ovfl_val; 6410 DPRINT(("[%d] pmd[%d] overflowed\n", task_pid_nr(task), i)); 6411 } 6412 } 6413 6414 DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task_pid_nr(task), i, val, pmd_val)); 6415 6416 if (is_self) ctx->th_pmds[i] = pmd_val; 6417 6418 ctx->ctx_pmds[i].val = val; 6419 } 6420} 6421 6422static struct irqaction perfmon_irqaction = { 6423 .handler = pfm_interrupt_handler, 6424 .flags = IRQF_DISABLED, 6425 .name = "perfmon" 6426}; 6427 6428static void 6429pfm_alt_save_pmu_state(void *data) 6430{ 6431 struct pt_regs *regs; 6432 6433 regs = task_pt_regs(current); 6434 6435 DPRINT(("called\n")); 6436 6437 /* 6438 * should not be necessary but 6439 * let's take not risk 6440 */ 6441 pfm_clear_psr_up(); 6442 pfm_clear_psr_pp(); 6443 ia64_psr(regs)->pp = 0; 6444 6445 /* 6446 * This call is required 6447 * May cause a spurious interrupt on some processors 6448 */ 6449 pfm_freeze_pmu(); 6450 6451 ia64_srlz_d(); 6452} 6453 6454void 6455pfm_alt_restore_pmu_state(void *data) 6456{ 6457 struct pt_regs *regs; 6458 6459 regs = task_pt_regs(current); 6460 6461 DPRINT(("called\n")); 6462 6463 /* 6464 * put PMU back in state expected 6465 * by perfmon 6466 */ 6467 pfm_clear_psr_up(); 6468 pfm_clear_psr_pp(); 6469 ia64_psr(regs)->pp = 0; 6470 6471 /* 6472 * perfmon runs with PMU unfrozen at all times 6473 */ 6474 pfm_unfreeze_pmu(); 6475 6476 ia64_srlz_d(); 6477} 6478 6479int 6480pfm_install_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) 6481{ 6482 int ret, i; 6483 int reserve_cpu; 6484 6485 /* some sanity checks */ 6486 if (hdl == NULL || hdl->handler == NULL) return -EINVAL; 6487 6488 /* do the easy test first */ 6489 if (pfm_alt_intr_handler) return -EBUSY; 6490 6491 /* one at a time in the install or remove, just fail the others */ 6492 if (!spin_trylock(&pfm_alt_install_check)) { 6493 return -EBUSY; 6494 } 6495 6496 /* reserve our session */ 6497 for_each_online_cpu(reserve_cpu) { 6498 ret = pfm_reserve_session(NULL, 1, reserve_cpu); 6499 if (ret) goto cleanup_reserve; 6500 } 6501 6502 /* save the current system wide pmu states */ 6503 ret = on_each_cpu(pfm_alt_save_pmu_state, NULL, 1); 6504 if (ret) { 6505 DPRINT(("on_each_cpu() failed: %d\n", ret)); 6506 goto cleanup_reserve; 6507 } 6508 6509 /* officially change to the alternate interrupt handler */ 6510 pfm_alt_intr_handler = hdl; 6511 6512 spin_unlock(&pfm_alt_install_check); 6513 6514 return 0; 6515 6516cleanup_reserve: 6517 for_each_online_cpu(i) { 6518 /* don't unreserve more than we reserved */ 6519 if (i >= reserve_cpu) break; 6520 6521 pfm_unreserve_session(NULL, 1, i); 6522 } 6523 6524 spin_unlock(&pfm_alt_install_check); 6525 6526 return ret; 6527} 6528EXPORT_SYMBOL_GPL(pfm_install_alt_pmu_interrupt); 6529 6530int 6531pfm_remove_alt_pmu_interrupt(pfm_intr_handler_desc_t *hdl) 6532{ 6533 int i; 6534 int ret; 6535 6536 if (hdl == NULL) return -EINVAL; 6537 6538 /* cannot remove someone else's handler! */ 6539 if (pfm_alt_intr_handler != hdl) return -EINVAL; 6540 6541 /* one at a time in the install or remove, just fail the others */ 6542 if (!spin_trylock(&pfm_alt_install_check)) { 6543 return -EBUSY; 6544 } 6545 6546 pfm_alt_intr_handler = NULL; 6547 6548 ret = on_each_cpu(pfm_alt_restore_pmu_state, NULL, 1); 6549 if (ret) { 6550 DPRINT(("on_each_cpu() failed: %d\n", ret)); 6551 } 6552 6553 for_each_online_cpu(i) { 6554 pfm_unreserve_session(NULL, 1, i); 6555 } 6556 6557 spin_unlock(&pfm_alt_install_check); 6558 6559 return 0; 6560} 6561EXPORT_SYMBOL_GPL(pfm_remove_alt_pmu_interrupt); 6562 6563/* 6564 * perfmon initialization routine, called from the initcall() table 6565 */ 6566static int init_pfm_fs(void); 6567 6568static int __init 6569pfm_probe_pmu(void) 6570{ 6571 pmu_config_t **p; 6572 int family; 6573 6574 family = local_cpu_data->family; 6575 p = pmu_confs; 6576 6577 while(*p) { 6578 if ((*p)->probe) { 6579 if ((*p)->probe() == 0) goto found; 6580 } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) { 6581 goto found; 6582 } 6583 p++; 6584 } 6585 return -1; 6586found: 6587 pmu_conf = *p; 6588 return 0; 6589} 6590 6591static const struct file_operations pfm_proc_fops = { 6592 .open = pfm_proc_open, 6593 .read = seq_read, 6594 .llseek = seq_lseek, 6595 .release = seq_release, 6596}; 6597 6598int __init 6599pfm_init(void) 6600{ 6601 unsigned int n, n_counters, i; 6602 6603 printk("perfmon: version %u.%u IRQ %u\n", 6604 PFM_VERSION_MAJ, 6605 PFM_VERSION_MIN, 6606 IA64_PERFMON_VECTOR); 6607 6608 if (pfm_probe_pmu()) { 6609 printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", 6610 local_cpu_data->family); 6611 return -ENODEV; 6612 } 6613 6614 /* 6615 * compute the number of implemented PMD/PMC from the 6616 * description tables 6617 */ 6618 n = 0; 6619 for (i=0; PMC_IS_LAST(i) == 0; i++) { 6620 if (PMC_IS_IMPL(i) == 0) continue; 6621 pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63); 6622 n++; 6623 } 6624 pmu_conf->num_pmcs = n; 6625 6626 n = 0; n_counters = 0; 6627 for (i=0; PMD_IS_LAST(i) == 0; i++) { 6628 if (PMD_IS_IMPL(i) == 0) continue; 6629 pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63); 6630 n++; 6631 if (PMD_IS_COUNTING(i)) n_counters++; 6632 } 6633 pmu_conf->num_pmds = n; 6634 pmu_conf->num_counters = n_counters; 6635 6636 /* 6637 * sanity checks on the number of debug registers 6638 */ 6639 if (pmu_conf->use_rr_dbregs) { 6640 if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) { 6641 printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs); 6642 pmu_conf = NULL; 6643 return -1; 6644 } 6645 if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) { 6646 printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs); 6647 pmu_conf = NULL; 6648 return -1; 6649 } 6650 } 6651 6652 printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n", 6653 pmu_conf->pmu_name, 6654 pmu_conf->num_pmcs, 6655 pmu_conf->num_pmds, 6656 pmu_conf->num_counters, 6657 ffz(pmu_conf->ovfl_val)); 6658 6659 /* sanity check */ 6660 if (pmu_conf->num_pmds >= PFM_NUM_PMD_REGS || pmu_conf->num_pmcs >= PFM_NUM_PMC_REGS) { 6661 printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); 6662 pmu_conf = NULL; 6663 return -1; 6664 } 6665 6666 /* 6667 * create /proc/perfmon (mostly for debugging purposes) 6668 */ 6669 perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops); 6670 if (perfmon_dir == NULL) { 6671 printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); 6672 pmu_conf = NULL; 6673 return -1; 6674 } 6675 6676 /* 6677 * create /proc/sys/kernel/perfmon (for debugging purposes) 6678 */ 6679 pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root); 6680 6681 /* 6682 * initialize all our spinlocks 6683 */ 6684 spin_lock_init(&pfm_sessions.pfs_lock); 6685 spin_lock_init(&pfm_buffer_fmt_lock); 6686 6687 init_pfm_fs(); 6688 6689 for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL; 6690 6691 return 0; 6692} 6693 6694__initcall(pfm_init); 6695 6696/* 6697 * this function is called before pfm_init() 6698 */ 6699void 6700pfm_init_percpu (void) 6701{ 6702 static int first_time=1; 6703 /* 6704 * make sure no measurement is active 6705 * (may inherit programmed PMCs from EFI). 6706 */ 6707 pfm_clear_psr_pp(); 6708 pfm_clear_psr_up(); 6709 6710 /* 6711 * we run with the PMU not frozen at all times 6712 */ 6713 pfm_unfreeze_pmu(); 6714 6715 if (first_time) { 6716 register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); 6717 first_time=0; 6718 } 6719 6720 ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); 6721 ia64_srlz_d(); 6722} 6723 6724/* 6725 * used for debug purposes only 6726 */ 6727void 6728dump_pmu_state(const char *from) 6729{ 6730 struct task_struct *task; 6731 struct pt_regs *regs; 6732 pfm_context_t *ctx; 6733 unsigned long psr, dcr, info, flags; 6734 int i, this_cpu; 6735 6736 local_irq_save(flags); 6737 6738 this_cpu = smp_processor_id(); 6739 regs = task_pt_regs(current); 6740 info = PFM_CPUINFO_GET(); 6741 dcr = ia64_getreg(_IA64_REG_CR_DCR); 6742 6743 if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) { 6744 local_irq_restore(flags); 6745 return; 6746 } 6747 6748 printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", 6749 this_cpu, 6750 from, 6751 task_pid_nr(current), 6752 regs->cr_iip, 6753 current->comm); 6754 6755 task = GET_PMU_OWNER(); 6756 ctx = GET_PMU_CTX(); 6757 6758 printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task_pid_nr(task) : -1, ctx); 6759 6760 psr = pfm_get_psr(); 6761 6762 printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", 6763 this_cpu, 6764 ia64_get_pmc(0), 6765 psr & IA64_PSR_PP ? 1 : 0, 6766 psr & IA64_PSR_UP ? 1 : 0, 6767 dcr & IA64_DCR_PP ? 1 : 0, 6768 info, 6769 ia64_psr(regs)->up, 6770 ia64_psr(regs)->pp); 6771 6772 ia64_psr(regs)->up = 0; 6773 ia64_psr(regs)->pp = 0; 6774 6775 for (i=1; PMC_IS_LAST(i) == 0; i++) { 6776 if (PMC_IS_IMPL(i) == 0) continue; 6777 printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, ctx->th_pmcs[i]); 6778 } 6779 6780 for (i=1; PMD_IS_LAST(i) == 0; i++) { 6781 if (PMD_IS_IMPL(i) == 0) continue; 6782 printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, ctx->th_pmds[i]); 6783 } 6784 6785 if (ctx) { 6786 printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n", 6787 this_cpu, 6788 ctx->ctx_state, 6789 ctx->ctx_smpl_vaddr, 6790 ctx->ctx_smpl_hdr, 6791 ctx->ctx_msgq_head, 6792 ctx->ctx_msgq_tail, 6793 ctx->ctx_saved_psr_up); 6794 } 6795 local_irq_restore(flags); 6796} 6797 6798/* 6799 * called from process.c:copy_thread(). task is new child. 6800 */ 6801void 6802pfm_inherit(struct task_struct *task, struct pt_regs *regs) 6803{ 6804 struct thread_struct *thread; 6805 6806 DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task_pid_nr(task))); 6807 6808 thread = &task->thread; 6809 6810 /* 6811 * cut links inherited from parent (current) 6812 */ 6813 thread->pfm_context = NULL; 6814 6815 PFM_SET_WORK_PENDING(task, 0); 6816 6817 /* 6818 * the psr bits are already set properly in copy_threads() 6819 */ 6820} 6821#else /* !CONFIG_PERFMON */ 6822asmlinkage long 6823sys_perfmonctl (int fd, int cmd, void *arg, int count) 6824{ 6825 return -ENOSYS; 6826} 6827#endif /* CONFIG_PERFMON */ 6828