op_pmu.c revision 8cfa702f803c5ef6a2b062a489a1b2cf66b45b5e
1/**
2 * @file op_pmu.c
3 * Setup and handling of IA64 Performance Monitoring Unit (PMU)
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Bob Montgomery
9 * @author Will Cohen
10 * @author John Levon
11 * @author Philippe Elie
12 */
13
14
15#include "oprofile.h"
16#include "op_util.h"
17#include <asm/perfmon.h>
18#include "op_ia64_model.h"
19
20/* number of counters physically present */
21static uint op_nr_counters = 4;
22
23/* performance counters are in pairs: pmcN and pmdN.  The pmc register acts
24 * as the event selection; the pmd register is the counter. */
25#define perf_reg(c)	((c)+4)
26
27#define IA64_1_PMD_MASK_VAL	((1UL << 32) - 1)
28#define IA64_2_PMD_MASK_VAL	((1UL << 47) - 1)
29
30/* The appropriate value is selected in pmu_init() */
31unsigned long pmd_mask = IA64_2_PMD_MASK_VAL;
32
33#define pmd_overflowed(r, c) ((r) & (1 << perf_reg(c)))
34#define set_pmd_neg(v, c) do { \
35	ia64_set_pmd(perf_reg(c), -(ulong)(v) & pmd_mask); \
36	ia64_srlz_d(); } while (0)
37#define set_pmd(v, c) do { \
38	ia64_set_pmd(perf_reg(c), (v) & pmd_mask); \
39	ia64_srlz_d(); } while (0)
40#define set_pmc(v, c) do { ia64_set_pmc(perf_reg(c), (v)); ia64_srlz_d(); } while (0)
41#define get_pmd(c) ia64_get_pmd(perf_reg(c))
42#define get_pmc(c) ia64_get_pmc(perf_reg(c))
43
44/* ---------------- IRQ handler ------------------ */
45
46/* The args match the args for pfm_overflow_handler in perfmon.c.
47 * The task_struct is currently filled in with the perfmon "owner" of
48 * the PMU.  This might change.  I'm not sure it makes sense in perfmon
49 * either with system-wide profiling.
50 * pmc0 is a bit mask for overflowed counters (bits 4-7)
51 * This routine should return 0 to resume interrupts.
52 */
53inline static void
54op_do_pmu_interrupt(u64 pmc0, struct pt_regs * regs)
55{
56	uint cpu = op_cpu_id();
57	int ctr;
58
59	for (ctr = 0 ; ctr < op_nr_counters ; ++ctr) {
60		if (pmd_overflowed(pmc0, ctr)) {
61			op_do_profile(cpu, regs->cr_iip, 1, ctr);
62			set_pmd_neg(oprof_data[cpu].ctr_count[ctr], ctr);
63		}
64	}
65	return;
66}
67
68
69static void
70op_raw_pmu_interrupt(int irq, void * arg, struct pt_regs * regs)
71{
72	u64 pmc0;
73
74	pmc0 = ia64_get_pmc(0);
75
76	if ((pmc0 & ~0x1UL) != 0UL) {
77		op_do_pmu_interrupt(pmc0, regs);
78		ia64_set_pmc(0, 0);
79		ia64_srlz_d();
80	}
81}
82
83
84#define MY_OPROFILE_VECTOR (IA64_PERFMON_VECTOR - 2)
85
86static void
87op_set_pmv(void * dummy)
88{
89	ia64_set_pmv(MY_OPROFILE_VECTOR);
90	ia64_srlz_d();
91}
92
93
94static void
95op_restore_pmv(void* dummy)
96{
97	ia64_set_pmv(IA64_PERFMON_VECTOR);
98	ia64_srlz_d();
99}
100
101
102static int
103install_handler(void)
104{
105	int err = 0;
106
107	/* Try it legally - confusion about vec vs irq */
108	err = request_irq(MY_OPROFILE_VECTOR, op_raw_pmu_interrupt,
109			SA_INTERRUPT | SA_PERCPU_IRQ, "oprofile", NULL);
110
111	if (err) {
112		printk(KERN_ALERT "oprofile_IA64: request_irq fails, "
113				"returns %d\n", err);
114		return err;
115	}
116
117	if ((smp_call_function(op_set_pmv, NULL, 0, 1))) {
118		printk(KERN_ALERT "oprofile_IA64: unexpected failure "
119				"of smp_call_function(op_set_pmv)\n");
120	}
121
122	op_set_pmv(NULL);
123
124	return err;
125}
126
127
128static int
129restore_handler(void)
130{
131	int err = 0;
132
133	if ((smp_call_function(op_restore_pmv, NULL, 0, 1))) {
134		printk(KERN_ALERT "oprofile_IA64: unexpected failure "
135				"of smp_call_function(op_restore_pmv)\n");
136	}
137
138	op_restore_pmv(NULL);
139
140	free_irq(MY_OPROFILE_VECTOR, NULL);
141	return err;
142}
143
144
145/* ---------------- PMU setup ------------------ */
146
147/* This is kind of artificial.  The proc interface might really want to
148 * accept register values directly.  There are other features not exposed
149 * by this limited interface.  Of course that might require all sorts of
150 * validity checking??? */
151static void
152pmc_fill_in(ulong * val, u8 kernel, u8 user, u8 event, u8 um)
153{
154	/* enable interrupt generation */
155	*val |= (1 << 5);
156
157	/* setup as a privileged monitor */
158	*val |= (1 << 6);
159
160	/* McKinley requires pmc4 to have bit 23 set (enable PMU).
161	 * It is supposedly ignored in other pmc registers.
162	 * Try assuming it's ignored in Itanium, too, and just
163	 * set it for everyone.
164	 */
165
166	*val |= (1 << 23);
167
168	/* enable/disable chosen OS and USR counting */
169	(user)   ? (*val |= (1 << 3))
170		 : (*val &= ~(1 << 3));
171
172	(kernel) ? (*val |= (1 << 0))
173		 : (*val &= ~(1 << 0));
174
175	/* what are we counting ? */
176	*val &= ~(0xff << 8);
177	*val |= ((event & 0xff) << 8);
178	*val &= ~(0xf << 16);
179	*val |= ((um & 0xf) << 16);
180}
181
182
183static void
184pmu_setup(void * dummy)
185{
186	ulong pmc_val;
187	int ii;
188
189	/* setup each counter */
190	for (ii = 0 ; ii < op_nr_counters ; ++ii) {
191		if (sysctl.ctr[ii].enabled) {
192			pmc_val = 0;
193
194			set_pmd_neg(sysctl.ctr[ii].count, ii);
195			pmc_fill_in(&pmc_val, sysctl.ctr[ii].kernel,
196				sysctl.ctr[ii].user, sysctl.ctr[ii].event,
197				sysctl.ctr[ii].unit_mask);
198
199			set_pmc(pmc_val, ii);
200		}
201	}
202}
203
204
205void
206disable_psr(void * dummy)
207{
208	struct pt_regs * regs;
209	/* disable profiling for my saved state */
210	regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
211	regs--;
212	ia64_psr(regs)->pp = 0;
213	/* shouldn't need to */
214	ia64_psr(regs)->up = 0;
215
216	/* disable profiling for my current state */
217	__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
218
219#if defined(CONFIG_PERFMON) && defined(CONFIG_SMP)
220#if V_AT_LEAST(2, 4, 21)
221	local_cpu_data->pfm_syst_info |=  PFM_CPUINFO_SYST_WIDE;
222	local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP;
223	/* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
224#else
225	/* disable profiling for everyone else */
226	local_cpu_data->pfm_syst_wide = 1;
227	local_cpu_data->pfm_dcr_pp = 0;
228#endif
229#endif
230	ia64_set_pmc(0, 0);
231	ia64_srlz_d();
232}
233
234
235static int
236pmu_setup_all(void)
237{
238
239	/* This would be a great place to reserve all cpus with
240	 * some sort of call to perfmonctl (something like the
241	 * CREATE_CONTEXT command).  The current interface to
242	 * perfmonctl wants to be called from a different task id
243	 * for each CPU to be set up (and doesn't allow calls from
244	 * modules.
245	 */
246
247	/* disable profiling with the psr.pp bit */
248	if ((smp_call_function(disable_psr, NULL, 0, 1)))
249		return -EFAULT;
250
251	disable_psr(NULL);
252
253	/* now I've reserved the PMUs and they should be quiet */
254
255	if ((smp_call_function(pmu_setup, NULL, 0, 1)))
256		return -EFAULT;
257
258	pmu_setup(NULL);
259	return 0;
260}
261
262
263#ifndef CONFIG_SMP
264/* from linux/arch/ia64/kernel/perfmon.c */
265/*
266 * Originaly Written by Ganesh Venkitachalam, IBM Corp.
267 * Copyright (C) 1999 Ganesh Venkitachalam <venkitac@us.ibm.com>
268 *
269 * Modifications by Stephane Eranian, Hewlett-Packard Co.
270 * Modifications by David Mosberger-Tang, Hewlett-Packard Co.
271 *
272 * Copyright (C) 1999-2002  Hewlett Packard Co
273 *               Stephane Eranian <eranian@hpl.hp.com>
274 *               David Mosberger-Tang <davidm@hpl.hp.com>
275 */
276
277/*
278 * On UP kernels, we do not need to constantly set the psr.pp bit
279 * when a task is scheduled. The psr.pp bit can only be changed in
280 * the kernel because of a user request. Given we are on a UP non preeemptive
281 * kernel we know that no other task is running, so we cna simply update their
282 * psr.pp from their saved state. There is this no impact on the context switch
283 * code compared to the SMP case.
284 */
285static void
286op_tasklist_toggle_pp(unsigned int val)
287{
288	struct task_struct * p;
289	struct pt_regs * regs;
290
291	read_lock(&tasklist_lock);
292
293	for_each_task(p) {
294		regs = (struct pt_regs *)((unsigned long) p + IA64_STK_OFFSET);
295
296		/*
297		 * position on pt_regs saved on stack on 1st entry into the kernel
298		 */
299		regs--;
300
301		/*
302		 * update psr.pp
303		 */
304		ia64_psr(regs)->pp = val;
305	}
306	read_unlock(&tasklist_lock);
307}
308#endif
309
310
311static void
312pmu_start(void * info)
313{
314	struct pt_regs * regs;
315
316	if (info && (*((uint *)info) != op_cpu_id()))
317		return;
318
319	/* printk(KERN_ALERT "oprofile_IA64: pmu_start on cpu %d\n",
320	  	op_cpu_id()); */
321	/* The default control register pp value is copied into psr.pp
322	 * on an interrupt.  This allows interrupt service routines to
323	 * be monitored.
324	 */
325	ia64_set_dcr(ia64_get_dcr() | IA64_DCR_PP);
326
327#ifdef CONFIG_PERFMON
328#ifdef CONFIG_SMP
329#if V_AT_LEAST(2, 4, 21)
330	local_cpu_data->pfm_syst_info |= PFM_CPUINFO_SYST_WIDE;
331	local_cpu_data->pfm_syst_info |= PFM_CPUINFO_DCR_PP;
332	/* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
333#else
334	local_cpu_data->pfm_syst_wide = 1;
335	local_cpu_data->pfm_dcr_pp = 1;
336#endif
337#else
338	op_tasklist_toggle_pp(1);
339#endif
340#endif
341	/* set it in my saved state */
342	regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
343	regs--;
344	ia64_psr(regs)->pp = 1;
345
346	/* set it in my current state */
347	__asm__ __volatile__ ("ssm psr.pp;;"::: "memory");
348	ia64_srlz_d();
349}
350
351
352static void
353pmu_stop(void * info)
354{
355	struct pt_regs * regs;
356
357	if (info && (*((uint *)info) != op_cpu_id()))
358		return;
359
360	/* stop in my current state */
361	__asm__ __volatile__ ("rsm psr.pp;;"::: "memory");
362
363	/* disable the dcr pp */
364	ia64_set_dcr(ia64_get_dcr() & ~IA64_DCR_PP);
365
366#ifdef CONFIG_PERFMON
367#ifdef CONFIG_SMP
368#if V_AT_LEAST(2, 4, 21)
369	local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_SYST_WIDE;
370	local_cpu_data->pfm_syst_info &= ~PFM_CPUINFO_DCR_PP;
371	/* FIXME: what todo with the 3rd flags PFM_CPUINFO_EXCL_IDLE 0x4 */
372#else
373	local_cpu_data->pfm_syst_wide = 0;
374	local_cpu_data->pfm_dcr_pp = 0;
375#endif
376#else
377	pfm_tasklist_toggle_pp(0);
378#endif
379#endif
380
381	/* disable in my saved state */
382	regs = (struct pt_regs *)((unsigned long) current + IA64_STK_OFFSET);
383	regs--;
384	ia64_psr(regs)->pp = 0;
385}
386
387
388static void
389pmu_select_start(uint cpu)
390{
391	if (cpu == op_cpu_id())
392		pmu_start(NULL);
393	else
394		smp_call_function(pmu_start, &cpu, 0, 1);
395}
396
397
398static void
399pmu_select_stop(uint cpu)
400{
401	if (cpu == op_cpu_id())
402		pmu_stop(NULL);
403	else
404		smp_call_function(pmu_stop, &cpu, 0, 1);
405}
406
407
408static void
409pmu_start_all(void)
410{
411	int cpu, i;
412
413	for (cpu=0; cpu < smp_num_cpus; cpu++) {
414		struct _oprof_data * data = &oprof_data[cpu];
415
416		for (i = 0 ; i < op_nr_counters ; ++i) {
417			if (sysctl.ctr[i].enabled) {
418				data->ctr_count[i] = sysctl.ctr[i].count;
419			} else {
420				data->ctr_count[i] = 0;
421			}
422		}
423	}
424
425	if (!install_handler()) {
426		smp_call_function(pmu_start, NULL, 0, 1);
427		pmu_start(NULL);
428	}
429		/* FIXME need some way to fail here */;
430}
431
432
433static void
434pmu_stop_all(void)
435{
436	smp_call_function(pmu_stop, NULL, 0, 1);
437	pmu_stop(NULL);
438	restore_handler();
439}
440
441
442static int
443pmu_check_params(void)
444{
445	int i;
446	int enabled = 0;
447
448	for (i = 0; i < op_nr_counters ; i++) {
449		if (!sysctl.ctr[i].enabled)
450			continue;
451
452		enabled = 1;
453
454		if (!sysctl.ctr[i].user && !sysctl.ctr[i].kernel) {
455			printk(KERN_ERR "oprofile: neither kernel nor user "
456			       "set for counter %d\n", i);
457			return -EINVAL;
458		}
459
460		if (check_range(sysctl.ctr[i].count, 1, OP_MAX_PERF_COUNT,
461			"ctr count value %d not in range (%d %ld)\n"))
462			return -EINVAL;
463	}
464
465	if (!enabled) {
466		printk(KERN_ERR "oprofile: no counters have been enabled.\n");
467		return -EINVAL;
468	}
469
470	return 0;
471}
472
473
474static struct op_msrs cpu_msrs[NR_CPUS];
475
476
477static void free_msr_group(struct op_msr_group * group)
478{
479	if (group->addrs)
480		kfree(group->addrs);
481	if (group->saved)
482		kfree(group->saved);
483	group->addrs = NULL;
484	group->saved = NULL;
485}
486
487
488static void pmu_save_registers(void * dummy)
489{
490	uint i;
491	uint const cpu = op_cpu_id();
492	struct op_msr_group * counters = &cpu_msrs[cpu].counters;
493	struct op_msr_group * controls = &cpu_msrs[cpu].controls;
494
495	counters->addrs = NULL;
496	counters->saved = NULL;
497	controls->addrs = NULL;
498	controls->saved = NULL;
499
500	counters->saved = kmalloc(
501		op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL);
502	if (!counters->saved)
503		goto fault;
504
505	controls->saved = kmalloc(
506		op_nr_counters * sizeof(struct op_saved_msr), GFP_KERNEL);
507	if (!controls->saved)
508		goto fault;
509
510	for (i = 0; i < op_nr_counters; ++i) {
511		controls->saved[i].low = get_pmc(i);
512		counters->saved[i].low = get_pmd(i);
513	}
514	return;
515
516fault:
517	free_msr_group(counters);
518	free_msr_group(controls);
519}
520
521
522static void pmu_restore_registers(void * dummy)
523{
524	uint i;
525	uint const cpu = op_cpu_id();
526	struct op_msr_group * counters = &cpu_msrs[cpu].counters;
527	struct op_msr_group * controls = &cpu_msrs[cpu].controls;
528
529	for (i = 0; i < op_nr_counters; ++i) {
530		set_pmc(controls->saved[i].low, i);
531		set_pmd(counters->saved[i].low, i);
532	}
533
534	free_msr_group(counters);
535	free_msr_group(controls);
536}
537
538
539
540static int
541pmu_init(void)
542{
543	int err = 0;
544
545	/* figure out processor type configure number of bits in pmd
546	   and number of counters */
547	switch (get_cpu_type()) {
548	case CPU_IA64_1:
549		pmd_mask = IA64_1_PMD_MASK_VAL; break;
550	case CPU_IA64_2:
551	case CPU_IA64:
552		pmd_mask = IA64_2_PMD_MASK_VAL; break;
553	default:
554		err = -EIO; break;
555	}
556
557	op_nr_counters = 4;
558
559	if ((err = smp_call_function(pmu_save_registers, NULL, 0, 1)))
560		goto out;
561
562	pmu_save_registers(NULL);
563
564out:
565	return err;
566}
567
568
569static void
570pmu_deinit(void)
571{
572	smp_call_function(pmu_restore_registers, NULL, 0, 1);
573	pmu_restore_registers(NULL);
574}
575
576
577static char * names[] = { "0", "1", "2", "3", };
578
579
580static int
581pmu_add_sysctls(ctl_table * next)
582{
583	ctl_table * start = next;
584	ctl_table * tab;
585	int i, j;
586
587	for (i=0; i < op_nr_counters; i++) {
588		next->ctl_name = 1;
589		next->procname = names[i];
590		next->mode = 0700;
591
592		if (!(tab = kmalloc(sizeof(ctl_table)*7, GFP_KERNEL)))
593			goto cleanup;
594
595		next->child = tab;
596
597		memset(tab, 0, sizeof(ctl_table)*7);
598		tab[0] = ((ctl_table) { 1, "enabled", &sysctl_parms.ctr[i].enabled, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
599		tab[1] = ((ctl_table) { 1, "event", &sysctl_parms.ctr[i].event, sizeof(int), 0600, NULL, lproc_dointvec, NULL,  });
600		tab[2] = ((ctl_table) { 1, "count", &sysctl_parms.ctr[i].count, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
601		tab[3] = ((ctl_table) { 1, "unit_mask", &sysctl_parms.ctr[i].unit_mask, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
602		tab[4] = ((ctl_table) { 1, "kernel", &sysctl_parms.ctr[i].kernel, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
603		tab[5] = ((ctl_table) { 1, "user", &sysctl_parms.ctr[i].user, sizeof(int), 0600, NULL, lproc_dointvec, NULL, });
604		next++;
605	}
606
607	return 0;
608
609cleanup:
610	next = start;
611	for (j = 0; j < i; j++) {
612		kfree(next->child);
613		next++;
614	}
615	return -EFAULT;
616}
617
618
619static void pmu_remove_sysctls(ctl_table * next)
620{
621	int ii;
622
623	for (ii=0; ii < op_nr_counters; ii++) {
624		kfree(next->child);
625		next++;
626	}
627}
628
629
630struct op_int_operations op_nmi_ops = {
631	init: pmu_init,
632	deinit: pmu_deinit,
633	add_sysctls: pmu_add_sysctls,
634	remove_sysctls: pmu_remove_sysctls,
635	check_params: pmu_check_params,
636	setup: pmu_setup_all,
637	start: pmu_start_all,
638	stop: pmu_stop_all,
639	start_cpu: pmu_select_start,
640	stop_cpu: pmu_select_stop,
641};
642
643
644struct op_int_operations const * op_int_interface()
645{
646	return &op_nmi_ops;
647}
648
649/* Need this dummy so module/oprofile.c links */
650struct op_int_operations op_rtc_ops = {
651	init: NULL,
652	deinit: NULL,
653	add_sysctls: NULL,
654	remove_sysctls: NULL,
655	check_params: NULL,
656	setup: NULL,
657	start: NULL,
658	stop: NULL,
659	start_cpu: NULL,
660	stop_cpu: NULL,
661};
662