1/**
2 * @file opd_perfmon.c
3 * perfmonctl() handling
4 *
5 * @remark Copyright 2003 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 */
10
11#ifdef __ia64__
12
13/* need this for sched_setaffinity() in <sched.h> */
14#define _GNU_SOURCE
15
16#include "oprofiled.h"
17#include "opd_perfmon.h"
18#include "opd_events.h"
19
20#include "op_cpu_type.h"
21#include "op_libiberty.h"
22#include "op_hw_config.h"
23
24#include <sys/syscall.h>
25#include <sys/wait.h>
26#include <unistd.h>
27#include <limits.h>
28#include <signal.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <errno.h>
33#include <sys/types.h>
34#include <sys/stat.h>
35#ifdef HAVE_SCHED_SETAFFINITY
36#include <sched.h>
37#endif
38
39extern op_cpu cpu_type;
40
41#ifndef HAVE_SCHED_SETAFFINITY
42
43/* many glibc's are not yet up to date */
44#ifndef __NR_sched_setaffinity
45#define __NR_sched_setaffinity 1231
46#endif
47
48/* Copied from glibc's <sched.h> and <bits/sched.h> and munged */
49#define CPU_SETSIZE	1024
50#define __NCPUBITS	(8 * sizeof (unsigned long))
51typedef struct
52{
53	unsigned long __bits[CPU_SETSIZE / __NCPUBITS];
54} cpu_set_t;
55
56#define CPU_SET(cpu, cpusetp) \
57	((cpusetp)->__bits[(cpu)/__NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
58#define CPU_ZERO(cpusetp) \
59	memset((cpusetp), 0, sizeof(cpu_set_t))
60
61static int
62sched_setaffinity(pid_t pid, size_t len, cpu_set_t const * cpusetp)
63{
64	return syscall(__NR_sched_setaffinity, pid, len, cpusetp);
65}
66#endif
67
68
69#ifndef HAVE_PERFMONCTL
70#ifndef __NR_perfmonctl
71#define __NR_perfmonctl 1175
72#endif
73
74static int perfmonctl(int fd, int cmd, void * arg, int narg)
75{
76	return syscall(__NR_perfmonctl, fd, cmd, arg, narg);
77}
78#endif
79
80
81static unsigned char uuid[16] = {
82	0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69,
83	0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c
84};
85
86
87static size_t nr_cpus;
88
89struct child {
90	pid_t pid;
91	int up_pipe[2];
92	int ctx_fd;
93	sig_atomic_t sigusr1;
94	sig_atomic_t sigusr2;
95	sig_atomic_t sigterm;
96};
97
98static struct child * children;
99
100static void perfmon_start_child(int ctx_fd)
101{
102	if (perfmonctl(ctx_fd, PFM_START, 0, 0) == -1) {
103		exit(EXIT_FAILURE);
104	}
105}
106
107
108static void perfmon_stop_child(int ctx_fd)
109{
110	if (perfmonctl(ctx_fd, PFM_STOP, 0, 0) == -1) {
111		exit(EXIT_FAILURE);
112	}
113}
114
115
116static void child_sigusr1(int val __attribute__((unused)))
117{
118	size_t i;
119
120	for (i = 0; i < nr_cpus; ++i) {
121		if (children[i].pid == getpid()) {
122			children[i].sigusr1 = 1;
123			return;
124		}
125	}
126}
127
128
129static void child_sigusr2(int val __attribute__((unused)))
130{
131	size_t i;
132
133	for (i = 0; i < nr_cpus; ++i) {
134		if (children[i].pid == getpid()) {
135			children[i].sigusr2 = 1;
136			return;
137		}
138	}
139}
140
141
142static void child_sigterm(int val __attribute__((unused)))
143{
144	kill(getppid(), SIGTERM);
145}
146
147
148static void set_affinity(size_t cpu)
149{
150	cpu_set_t set;
151	int err;
152
153	CPU_ZERO(&set);
154	CPU_SET(cpu, &set);
155
156	err = sched_setaffinity(getpid(), sizeof(set), &set);
157
158	if (err == -1) {
159		perror("Failed to set affinity");
160		exit(EXIT_FAILURE);
161	}
162}
163
164
165static void setup_signals(void)
166{
167	struct sigaction act;
168	sigset_t mask;
169
170	sigemptyset(&mask);
171	sigaddset(&mask, SIGUSR1);
172	sigaddset(&mask, SIGUSR2);
173	sigprocmask(SIG_BLOCK, &mask, NULL);
174
175	act.sa_handler = child_sigusr1;
176	act.sa_flags = 0;
177	sigemptyset(&act.sa_mask);
178
179	if (sigaction(SIGUSR1, &act, NULL)) {
180		perror("oprofiled: install of SIGUSR1 handler failed");
181		exit(EXIT_FAILURE);
182	}
183
184	act.sa_handler = child_sigusr2;
185	act.sa_flags = 0;
186	sigemptyset(&act.sa_mask);
187
188	if (sigaction(SIGUSR2, &act, NULL)) {
189		perror("oprofiled: install of SIGUSR2 handler failed");
190		exit(EXIT_FAILURE);
191	}
192
193	act.sa_handler = child_sigterm;
194	act.sa_flags = 0;
195	sigemptyset(&act.sa_mask);
196
197	if (sigaction(SIGTERM, &act, NULL)) {
198		perror("oprofiled: install of SIGTERM handler failed");
199		exit(EXIT_FAILURE);
200	}
201}
202
203
204/** create the per-cpu context */
205static void create_context(struct child * self)
206{
207	pfarg_context_t ctx;
208	int err;
209
210	memset(&ctx, 0, sizeof(pfarg_context_t));
211	memcpy(&ctx.ctx_smpl_buf_id, &uuid, 16);
212	ctx.ctx_flags = PFM_FL_SYSTEM_WIDE;
213
214	err = perfmonctl(0, PFM_CREATE_CONTEXT, &ctx, 1);
215	if (err == -1) {
216		perror("CREATE_CONTEXT failed");
217		exit(EXIT_FAILURE);
218	}
219
220	self->ctx_fd = ctx.ctx_fd;
221}
222
223
224/** program the perfmon counters */
225static void write_pmu(struct child * self)
226{
227	pfarg_reg_t pc[OP_MAX_COUNTERS];
228	pfarg_reg_t pd[OP_MAX_COUNTERS];
229	int err;
230	size_t i;
231
232	memset(pc, 0, sizeof(pc));
233	memset(pd, 0, sizeof(pd));
234
235#define PMC_GEN_INTERRUPT (1UL << 5)
236#define PMC_PRIV_MONITOR (1UL << 6)
237/* McKinley requires pmc4 to have bit 23 set (enable PMU).
238 * It is supposedly ignored in other pmc registers.
239 */
240#define PMC_MANDATORY (1UL << 23)
241#define PMC_USER (1UL << 3)
242#define PMC_KERNEL (1UL << 0)
243	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
244		struct opd_event * event = &opd_events[i];
245		pc[i].reg_num = event->counter + 4;
246		pc[i].reg_value = PMC_GEN_INTERRUPT;
247		pc[i].reg_value |= PMC_PRIV_MONITOR;
248		pc[i].reg_value |= PMC_MANDATORY;
249		(event->user) ? (pc[i].reg_value |= PMC_USER)
250		              : (pc[i].reg_value &= ~PMC_USER);
251		(event->kernel) ? (pc[i].reg_value |= PMC_KERNEL)
252		                : (pc[i].reg_value &= ~PMC_KERNEL);
253		pc[i].reg_value &= ~(0xff << 8);
254		pc[i].reg_value |= ((event->value & 0xff) << 8);
255		pc[i].reg_value &= ~(0xf << 16);
256		pc[i].reg_value |= ((event->um & 0xf) << 16);
257		pc[i].reg_smpl_eventid = event->counter;
258	}
259
260	for (i = 0; i < op_nr_counters && opd_events[i].name; ++i) {
261		struct opd_event * event = &opd_events[i];
262		pd[i].reg_value = ~0UL - event->count + 1;
263		pd[i].reg_short_reset = ~0UL - event->count + 1;
264		pd[i].reg_num = event->counter + 4;
265	}
266
267	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMCS, pc, i);
268	if (err == -1) {
269		perror("Couldn't write PMCs");
270		exit(EXIT_FAILURE);
271	}
272
273	err = perfmonctl(self->ctx_fd, PFM_WRITE_PMDS, pd, i);
274	if (err == -1) {
275		perror("Couldn't write PMDs");
276		exit(EXIT_FAILURE);
277	}
278}
279
280
281static void load_context(struct child * self)
282{
283	pfarg_load_t load_args;
284	int err;
285
286	memset(&load_args, 0, sizeof(load_args));
287	load_args.load_pid = self->pid;
288
289	err = perfmonctl(self->ctx_fd, PFM_LOAD_CONTEXT, &load_args, 1);
290	if (err == -1) {
291		perror("Couldn't load context");
292		exit(EXIT_FAILURE);
293	}
294}
295
296
297static void notify_parent(struct child * self, size_t cpu)
298{
299	for (;;) {
300		ssize_t ret;
301		ret = write(self->up_pipe[1], &cpu, sizeof(size_t));
302		if (ret == sizeof(size_t))
303			break;
304		if (ret < 0 && errno != EINTR) {
305			perror("Failed to write child pipe:");
306			exit(EXIT_FAILURE);
307		}
308	}
309}
310
311static struct child * inner_child;
312void close_pipe(void)
313{
314	close(inner_child->up_pipe[1]);
315}
316
317static void run_child(size_t cpu)
318{
319	struct child * self = &children[cpu];
320
321	self->pid = getpid();
322	self->sigusr1 = 0;
323	self->sigusr2 = 0;
324	self->sigterm = 0;
325
326	inner_child = self;
327	if (atexit(close_pipe)){
328		close_pipe();
329		exit(EXIT_FAILURE);
330	}
331
332	umask(0);
333	/* Change directory to allow directory to be removed */
334	if (chdir("/") < 0) {
335		perror("Unable to chdir to \"/\"");
336		exit(EXIT_FAILURE);
337	}
338
339	setup_signals();
340
341	set_affinity(cpu);
342
343	create_context(self);
344
345	write_pmu(self);
346
347	load_context(self);
348
349	notify_parent(self, cpu);
350
351	/* Redirect standard files to /dev/null */
352	freopen( "/dev/null", "r", stdin);
353	freopen( "/dev/null", "w", stdout);
354	freopen( "/dev/null", "w", stderr);
355
356	for (;;) {
357		sigset_t sigmask;
358		sigfillset(&sigmask);
359		sigdelset(&sigmask, SIGUSR1);
360		sigdelset(&sigmask, SIGUSR2);
361		sigdelset(&sigmask, SIGTERM);
362
363		if (self->sigusr1) {
364			perfmon_start_child(self->ctx_fd);
365			self->sigusr1 = 0;
366		}
367
368		if (self->sigusr2) {
369			perfmon_stop_child(self->ctx_fd);
370			self->sigusr2 = 0;
371		}
372
373		sigsuspend(&sigmask);
374	}
375}
376
377
378static void wait_for_child(struct child * child)
379{
380	size_t tmp;
381	for (;;) {
382		ssize_t ret;
383		ret = read(child->up_pipe[0], &tmp, sizeof(size_t));
384		if (ret == sizeof(size_t))
385			break;
386		if ((ret < 0 && errno != EINTR) || ret == 0 ) {
387			perror("Failed to read child pipe");
388			exit(EXIT_FAILURE);
389		}
390	}
391	printf("Perfmon child up on CPU%d\n", (int)tmp);
392	fflush(stdout);
393
394	close(child->up_pipe[0]);
395}
396
397static struct child* xen_ctx;
398
399void perfmon_init(void)
400{
401	size_t i;
402	long nr;
403
404	if (cpu_type == CPU_TIMER_INT)
405		return;
406
407	if (!no_xen) {
408		xen_ctx = xmalloc(sizeof(struct child));
409		xen_ctx->pid = getpid();
410		xen_ctx->up_pipe[0] = -1;
411		xen_ctx->up_pipe[1] = -1;
412		xen_ctx->sigusr1 = 0;
413		xen_ctx->sigusr2 = 0;
414		xen_ctx->sigterm = 0;
415
416		create_context(xen_ctx);
417
418		write_pmu(xen_ctx);
419
420		load_context(xen_ctx);
421		return;
422	}
423
424
425	nr = sysconf(_SC_NPROCESSORS_ONLN);
426	if (nr == -1) {
427		fprintf(stderr, "Couldn't determine number of CPUs.\n");
428		exit(EXIT_FAILURE);
429	}
430
431	nr_cpus = nr;
432
433	children = xmalloc(sizeof(struct child) * nr_cpus);
434	bzero(children, sizeof(struct child) * nr_cpus);
435
436	for (i = 0; i < nr_cpus; ++i) {
437		int ret;
438
439		if (pipe(children[i].up_pipe)) {
440			perror("Couldn't create child pipe");
441			exit(EXIT_FAILURE);
442		}
443
444		ret = fork();
445		if (ret == -1) {
446			perror("Couldn't fork perfmon child");
447			exit(EXIT_FAILURE);
448		} else if (ret == 0) {
449			close(children[i].up_pipe[0]);
450			run_child(i);
451		} else {
452			children[i].pid = ret;
453			close(children[i].up_pipe[1]);
454			printf("Waiting on CPU%d\n", (int)i);
455			wait_for_child(&children[i]);
456		}
457	}
458}
459
460
461void perfmon_exit(void)
462{
463	size_t i;
464
465	if (cpu_type == CPU_TIMER_INT)
466		return;
467
468	if (!no_xen)
469		return;
470
471	for (i = 0; i < nr_cpus; ++i) {
472		if (children[i].pid) {
473			int c_pid = children[i].pid;
474			children[i].pid = 0;
475			if (kill(c_pid, SIGKILL)==0)
476				waitpid(c_pid, NULL, 0);
477		}
478	}
479}
480
481
482void perfmon_start(void)
483{
484	size_t i;
485
486	if (cpu_type == CPU_TIMER_INT)
487		return;
488
489	if (!no_xen) {
490		perfmon_start_child(xen_ctx->ctx_fd);
491		return;
492	}
493
494	for (i = 0; i < nr_cpus; ++i) {
495		if (kill(children[i].pid, SIGUSR1)) {
496			perror("Unable to start perfmon");
497			exit(EXIT_FAILURE);
498		}
499	}
500}
501
502
503void perfmon_stop(void)
504{
505	size_t i;
506
507	if (cpu_type == CPU_TIMER_INT)
508		return;
509
510	if (!no_xen) {
511		perfmon_stop_child(xen_ctx->ctx_fd);
512		return;
513	}
514
515	for (i = 0; i < nr_cpus; ++i)
516		if (kill(children[i].pid, SIGUSR2)) {
517			perror("Unable to stop perfmon");
518			exit(EXIT_FAILURE);
519		}
520}
521
522#endif /* __ia64__ */
523