opd_perfmon.c revision cc2ee177dbb3befca43e36cfc56778b006c3d050
1/**
2 * @file opd_perfmon.c
3 * perfmonctl() handling
4 *
5 * @remark Copyright 2003 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 */
10
11#ifdef __ia64__
12
13/* need this for sched_setaffinity() in <sched.h> */
14#define _GNU_SOURCE
15
16#include "oprofiled.h"
17#include "opd_perfmon.h"
18#include "opd_events.h"
19
20#include "op_cpu_type.h"
21#include "op_libiberty.h"
22#include "op_hw_config.h"
23
24#include <sys/syscall.h>
25#include <sys/wait.h>
26#include <unistd.h>
27#include <limits.h>
28#include <signal.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <errno.h>
33#ifdef HAVE_SCHED_SETAFFINITY
34#include <sched.h>
35#endif
36
37extern op_cpu cpu_type;
38
39#ifndef HAVE_SCHED_SETAFFINITY
40
41/* many glibc's are not yet up to date */
42#ifndef __NR_sched_setaffinity
43#define __NR_sched_setaffinity 1231
44#endif
45
46/* Copied from glibc's <sched.h> and <bits/sched.h> and munged */
47#define CPU_SETSIZE	1024
48#define __NCPUBITS	(8 * sizeof (unsigned long))
49typedef struct
50{
51	unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
52} cpu_set_t;
53
54#define CPU_SET(cpu, cpusetp) \
55	((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
56#define CPU_ZERO(cpusetp) \
57	memset((cpusetp), 0, sizeof(cpu_set_t))
58
59static int
60sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp)
61{
62	return syscall(__NR_sched_setaffinity, pid, len, cpusetp);
63}
64#endif
65
66
67#ifndef HAVE_PERFMONCTL
68#ifndef __NR_perfmonctl
69#define __NR_perfmonctl 1175
70#endif
71
72static int perfmonctl(int fd, int cmd, void * arg, int narg)
73{
74	return syscall(__NR_perfmonctl, fd, cmd, arg, narg);
75}
76#endif
77
78
79static unsigned char uuid[16] = {
80	0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69,
81	0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c
82};
83
84
85static size_t nr_cpus;
86
87struct child {
88	pid_t pid;
89	int up_pipe[2];
90	int ctx_fd;
91	sig_atomic_t sigusr1;
92	sig_atomic_t sigusr2;
93	sig_atomic_t sigterm;
94};
95
96static struct child * children;
97
98static void perfmon_start_child(int ctx_fd)
99{
100	if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) {
101		perror("Couldn't start perfmon: ");
102		exit(EXIT_FAILURE);
103	}
104}
105
106
107static void perfmon_stop_child(int ctx_fd)
108{
109	if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) {
110		perror("Couldn't stop perfmon: ");
111		exit(EXIT_FAILURE);
112	}
113}
114
115
116static void child_sigusr1(int val __attribute__((unused)))
117{
118	size_t i;
119
120	for (i = 0; i < nr_cpus; ++i) {
121		if (children[i].pid == getpid()) {
122			children[i].sigusr1 = 1;
123			return;
124		}
125	}
126}
127
128
129static void child_sigusr2(int val __attribute__((unused)))
130{
131	size_t i;
132
133	for (i = 0; i < nr_cpus; ++i) {
134		if (children[i].pid == getpid()) {
135			children[i].sigusr2 = 1;
136			return;
137		}
138	}
139}
140
141
142static void child_sigterm(int val __attribute__((unused)))
143{
144	printf("Child received SIGTERM, killing parent.\n");
145	kill(getppid(), SIGTERM);
146}
147
148
149static void set_affinity(size_t cpu)
150{
151	cpu_set_t set;
152
153	CPU_ZERO(&set);
154	CPU_SET(cpu, &set);
155
156	int err = sched_setaffinity(getpid(), sizeof(set), &set);
157
158	if (err == -1) {
159		fprintf(stderr, "Failed to set affinity: %s\n",
160			    strerror(errno));
161		exit(EXIT_FAILURE);
162	}
163}
164
165
166static void setup_signals(void)
167{
168	struct sigaction act;
169	sigset_t mask;
170
171	sigemptyset(&mask);
172	sigaddset(&mask, SIGUSR1);
173	sigaddset(&mask, SIGUSR2);
174	sigprocmask(SIG_BLOCK, &mask, NULL);
175
176	act.sa_handler = child_sigusr1;
177	act.sa_flags = 0;
178	sigemptyset(&act.sa_mask);
179
180	if (sigaction(SIGUSR1, &act, NULL)) {
181		perror("oprofiled: install of SIGUSR1 handler failed: ");
182		exit(EXIT_FAILURE);
183	}
184
185	act.sa_handler = child_sigusr2;
186	act.sa_flags = 0;
187	sigemptyset(&act.sa_mask);
188
189	if (sigaction(SIGUSR2, &act, NULL)) {
190		perror("oprofiled: install of SIGUSR2 handler failed: ");
191		exit(EXIT_FAILURE);
192	}
193
194	act.sa_handler = child_sigterm;
195	act.sa_flags = 0;
196	sigemptyset(&act.sa_mask);
197
198	if (sigaction(SIGTERM, &act, NULL)) {
199		perror("oprofiled: install of SIGTERM handler failed: ");
200		exit(EXIT_FAILURE);
201	}
202}
203
204
205/** create the per-cpu context */
206static void create_context(struct child * self)
207{
208	pfarg_context_t ctx;
209	int err;
210
211	memset(&ctx, 0, sizeof(pfarg_context_t));
212	memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16);
213	ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;
214
215	err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1);
216	if (err == -1) {
217		fprintf(stderr, "CREATE_CONTEXT failed: %s\n",
218		        strerror(errno));
219		exit(EXIT_FAILURE);
220	}
221
222	self->ctx_fd = ctx.ctx_fd;
223}
224
225
226/** program the perfmon counters */
227static void write_pmu(struct child * self)
228{
229	pfarg_reg_t pc[OP_MAX_COUNTERS];
230	pfarg_reg_t pd[OP_MAX_COUNTERS];
231	int err;
232	size_t i;
233
234	memset(pc, 0, sizeof(pc));
235	memset(pd, 0, sizeof(pd));
236
237#define PMC_GEN_INTERRUPT (1UL << 5)
238#define PMC_PRIV_MONITOR (1UL << 6)
239/* McKinley requires pmc4 to have bit 23 set (enable PMU).
240 * It is supposedly ignored in other pmc registers.
241 */
242#define PMC_MANDATORY (1UL << 23)
243#define PMC_USER (1UL << 3)
244#define PMC_KERNEL (1UL << 0)
245	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
246		struct opd_event * event = &opd_events[i];
247		pc[i].reg_num = event->counter + 4;
248		pc[i].reg_value = PMC_GEN_INTERRUPT;
249		pc[i].reg_value |= PMC_PRIV_MONITOR;
250		pc[i].reg_value |= PMC_MANDATORY;
251		(event->user) ? (pc[i].reg_value |= PMC_USER)
252		              : (pc[i].reg_value &= ~PMC_USER);
253		(event->kernel) ? (pc[i].reg_value |= PMC_KERNEL)
254		                : (pc[i].reg_value &= ~PMC_KERNEL);
255		pc[i].reg_value &= ~(0xff << 8);
256		pc[i].reg_value |= ((event->value & 0xff) << 8);
257		pc[i].reg_value &= ~(0xf << 16);
258		pc[i].reg_value |= ((event->um & 0xf) << 16);
259		pc[i].reg_smpl_eventid = event->counter;
260	}
261
262	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
263		struct opd_event * event = &opd_events[i];
264		pd[i].reg_value = ~0UL - event->count + 1;
265		pd[i].reg_short_reset = ~0UL - event->count + 1;
266		pd[i].reg_num = event->counter + 4;
267	}
268
269	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i);
270	if (err == -1) {
271		perror("Couldn't write PMCs: ");
272		exit(EXIT_FAILURE);
273	}
274
275	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i);
276	if (err == -1) {
277		perror("Couldn't write PMDs: ");
278		exit(EXIT_FAILURE);
279	}
280}
281
282
283static void load_context(struct child * self)
284{
285	pfarg_load_t load_args;
286	int err;
287
288	memset(&load_args, 0, sizeof(load_args));
289	load_args.load_pid = self->pid;
290
291	err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1);
292	if (err == -1) {
293		perror("Couldn't load context: ");
294		exit(EXIT_FAILURE);
295	}
296}
297
298
299static void notify_parent(struct child * self, size_t cpu)
300{
301	for (;;) {
302		ssize_t ret;
303		ret = write(self->up_pipe[1], &cpu, sizeof(size_t));
304		if (ret == sizeof(size_t))
305			break;
306		if (ret < 0 && errno != EINTR) {
307			fprintf(stderr, "Failed to write child pipe with %s\n",
308			        strerror(errno));
309			exit(EXIT_FAILURE);
310		}
311	}
312}
313
314
315static void run_child(size_t cpu)
316{
317	struct child * self = &children[cpu];
318
319	self->pid = getpid();
320	self->sigusr1 = 0;
321	self->sigusr2 = 0;
322	self->sigterm = 0;
323
324	setup_signals();
325
326	set_affinity(cpu);
327
328	create_context(self);
329
330	write_pmu(self);
331
332	load_context(self);
333
334	notify_parent(self, cpu);
335
336	for (;;) {
337		sigset_t sigmask;
338		sigfillset(&sigmask);
339		sigdelset(&sigmask, SIGUSR1);
340		sigdelset(&sigmask, SIGUSR2);
341		sigdelset(&sigmask, SIGTERM);
342
343		if (self->sigusr1) {
344			printf("PFM_START on CPU%d\n", (int)cpu);
345			fflush(stdout);
346			perfmon_start_child(self->ctx_fd);
347			self->sigusr1 = 0;
348		}
349
350		if (self->sigusr2) {
351			printf("PFM_STOP on CPU%d\n", (int)cpu);
352			fflush(stdout);
353			perfmon_stop_child(self->ctx_fd);
354			self->sigusr2 = 0;
355		}
356
357		sigsuspend(&sigmask);
358	}
359}
360
361
362static void wait_for_child(struct child * child)
363{
364	size_t tmp;
365	for (;;) {
366		ssize_t ret;
367		ret = read(child->up_pipe[0], &tmp, sizeof(size_t));
368		if (ret == sizeof(size_t))
369			break;
370		if (ret < 0 && errno != EINTR) {
371			fprintf(stderr, "Failed to read child pipe with %s\n",
372			        strerror(errno));
373			exit(EXIT_FAILURE);
374		}
375	}
376	printf("Perfmon child up on CPU%d\n", (int)tmp);
377	fflush(stdout);
378
379	close(child->up_pipe[0]);
380	close(child->up_pipe[1]);
381}
382
383
384void perfmon_init(void)
385{
386	size_t i;
387	long nr;
388
389	if (cpu_type == CPU_TIMER_INT)
390		return;
391
392	nr = sysconf(_SC_NPROCESSORS_ONLN);
393	if (nr == -1) {
394		fprintf(stderr, "Couldn't determine number of CPUs.\n");
395		exit(EXIT_FAILURE);
396	}
397
398	nr_cpus = nr;
399
400	children = xmalloc(sizeof(struct child) * nr_cpus);
401
402	for (i = 0; i < nr_cpus; ++i) {
403		int ret;
404
405		if (pipe(children[i].up_pipe)) {
406			perror("Couldn't create child pipe.\n");
407			exit(EXIT_FAILURE);
408		}
409
410		ret = fork();
411		if (ret == -1) {
412			fprintf(stderr, "Couldn't fork perfmon child.\n");
413			exit(EXIT_FAILURE);
414		} else if (ret == 0) {
415			printf("Running perfmon child on CPU%d.\n", (int)i);
416			fflush(stdout);
417			run_child(i);
418		} else {
419			children[i].pid = ret;
420			printf("Waiting on CPU%d\n", (int)i);
421			wait_for_child(&children[i]);
422		}
423	}
424}
425
426
427void perfmon_exit(void)
428{
429	size_t i;
430
431	if (cpu_type == CPU_TIMER_INT)
432		return;
433
434	for (i = 0; i < nr_cpus; ++i) {
435		kill(children[i].pid, SIGKILL);
436		waitpid(children[i].pid, NULL, 0);
437	}
438}
439
440
441void perfmon_start(void)
442{
443	size_t i;
444
445	if (cpu_type == CPU_TIMER_INT)
446		return;
447
448	for (i = 0; i < nr_cpus; ++i)
449		kill(children[i].pid, SIGUSR1);
450}
451
452
453void perfmon_stop(void)
454{
455	size_t i;
456
457	if (cpu_type == CPU_TIMER_INT)
458		return;
459
460	for (i = 0; i < nr_cpus; ++i)
461		kill(children[i].pid, SIGUSR2);
462}
463
464#endif /* __ia64__ */
465