1/*
2 * Context switch microbenchmark.
3 *
4 * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#define _GNU_SOURCE
13#include <errno.h>
14#include <sched.h>
15#include <string.h>
16#include <stdio.h>
17#include <unistd.h>
18#include <stdlib.h>
19#include <getopt.h>
20#include <signal.h>
21#include <assert.h>
22#include <pthread.h>
23#include <limits.h>
24#include <sys/time.h>
25#include <sys/syscall.h>
26#include <sys/types.h>
27#include <sys/shm.h>
28#include <linux/futex.h>
29#ifdef __powerpc__
30#include <altivec.h>
31#endif
32#include "utils.h"
33
34static unsigned int timeout = 30;
35
36static int touch_vdso;
37struct timeval tv;
38
39static int touch_fp = 1;
40double fp;
41
42static int touch_vector = 1;
43vector int a, b, c;
44
45#ifdef __powerpc__
46static int touch_altivec = 1;
47
48/*
49 * Note: LTO (Link Time Optimisation) doesn't play well with this function
50 * attribute. Be very careful enabling LTO for this test.
51 */
52static void __attribute__((__target__("no-vsx"))) altivec_touch_fn(void)
53{
54	c = a + b;
55}
56#endif
57
58static void touch(void)
59{
60	if (touch_vdso)
61		gettimeofday(&tv, NULL);
62
63	if (touch_fp)
64		fp += 0.1;
65
66#ifdef __powerpc__
67	if (touch_altivec)
68		altivec_touch_fn();
69#endif
70
71	if (touch_vector)
72		c = a + b;
73
74	asm volatile("# %0 %1 %2": : "r"(&tv), "r"(&fp), "r"(&c));
75}
76
77static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
78{
79	int rc;
80	pthread_t tid;
81	cpu_set_t cpuset;
82	pthread_attr_t attr;
83
84	CPU_ZERO(&cpuset);
85	CPU_SET(cpu, &cpuset);
86
87	rc = pthread_attr_init(&attr);
88	if (rc) {
89		errno = rc;
90		perror("pthread_attr_init");
91		exit(1);
92	}
93
94	rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
95	if (rc)	{
96		errno = rc;
97		perror("pthread_attr_setaffinity_np");
98		exit(1);
99	}
100
101	rc = pthread_create(&tid, &attr, fn, arg);
102	if (rc) {
103		errno = rc;
104		perror("pthread_create");
105		exit(1);
106	}
107}
108
109static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
110{
111	int pid;
112	cpu_set_t cpuset;
113
114	pid = fork();
115	if (pid == -1) {
116		perror("fork");
117		exit(1);
118	}
119
120	if (pid)
121		return;
122
123	CPU_ZERO(&cpuset);
124	CPU_SET(cpu, &cpuset);
125
126	if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
127		perror("sched_setaffinity");
128		exit(1);
129	}
130
131	fn(arg);
132
133	exit(0);
134}
135
136static unsigned long iterations;
137static unsigned long iterations_prev;
138
139static void sigalrm_handler(int junk)
140{
141	unsigned long i = iterations;
142
143	printf("%ld\n", i - iterations_prev);
144	iterations_prev = i;
145
146	if (--timeout == 0)
147		kill(0, SIGUSR1);
148
149	alarm(1);
150}
151
152static void sigusr1_handler(int junk)
153{
154	exit(0);
155}
156
157struct actions {
158	void (*setup)(int, int);
159	void *(*thread1)(void *);
160	void *(*thread2)(void *);
161};
162
163#define READ 0
164#define WRITE 1
165
166static int pipe_fd1[2];
167static int pipe_fd2[2];
168
169static void pipe_setup(int cpu1, int cpu2)
170{
171	if (pipe(pipe_fd1) || pipe(pipe_fd2))
172		exit(1);
173}
174
175static void *pipe_thread1(void *arg)
176{
177	signal(SIGALRM, sigalrm_handler);
178	alarm(1);
179
180	while (1) {
181		assert(read(pipe_fd1[READ], &c, 1) == 1);
182		touch();
183
184		assert(write(pipe_fd2[WRITE], &c, 1) == 1);
185		touch();
186
187		iterations += 2;
188	}
189
190	return NULL;
191}
192
193static void *pipe_thread2(void *arg)
194{
195	while (1) {
196		assert(write(pipe_fd1[WRITE], &c, 1) == 1);
197		touch();
198
199		assert(read(pipe_fd2[READ], &c, 1) == 1);
200		touch();
201	}
202
203	return NULL;
204}
205
206static struct actions pipe_actions = {
207	.setup = pipe_setup,
208	.thread1 = pipe_thread1,
209	.thread2 = pipe_thread2,
210};
211
212static void yield_setup(int cpu1, int cpu2)
213{
214	if (cpu1 != cpu2) {
215		fprintf(stderr, "Both threads must be on the same CPU for yield test\n");
216		exit(1);
217	}
218}
219
220static void *yield_thread1(void *arg)
221{
222	signal(SIGALRM, sigalrm_handler);
223	alarm(1);
224
225	while (1) {
226		sched_yield();
227		touch();
228
229		iterations += 2;
230	}
231
232	return NULL;
233}
234
235static void *yield_thread2(void *arg)
236{
237	while (1) {
238		sched_yield();
239		touch();
240	}
241
242	return NULL;
243}
244
245static struct actions yield_actions = {
246	.setup = yield_setup,
247	.thread1 = yield_thread1,
248	.thread2 = yield_thread2,
249};
250
251static long sys_futex(void *addr1, int op, int val1, struct timespec *timeout,
252		      void *addr2, int val3)
253{
254	return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
255}
256
257static unsigned long cmpxchg(unsigned long *p, unsigned long expected,
258			     unsigned long desired)
259{
260	unsigned long exp = expected;
261
262	__atomic_compare_exchange_n(p, &exp, desired, 0,
263				    __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
264	return exp;
265}
266
267static unsigned long xchg(unsigned long *p, unsigned long val)
268{
269	return __atomic_exchange_n(p, val, __ATOMIC_SEQ_CST);
270}
271
272static int processes;
273
274static int mutex_lock(unsigned long *m)
275{
276	int c;
277	int flags = FUTEX_WAIT;
278	if (!processes)
279		flags |= FUTEX_PRIVATE_FLAG;
280
281	c = cmpxchg(m, 0, 1);
282	if (!c)
283		return 0;
284
285	if (c == 1)
286		c = xchg(m, 2);
287
288	while (c) {
289		sys_futex(m, flags, 2, NULL, NULL, 0);
290		c = xchg(m, 2);
291	}
292
293	return 0;
294}
295
296static int mutex_unlock(unsigned long *m)
297{
298	int flags = FUTEX_WAKE;
299	if (!processes)
300		flags |= FUTEX_PRIVATE_FLAG;
301
302	if (*m == 2)
303		*m = 0;
304	else if (xchg(m, 0) == 1)
305		return 0;
306
307	sys_futex(m, flags, 1, NULL, NULL, 0);
308
309	return 0;
310}
311
312static unsigned long *m1, *m2;
313
314static void futex_setup(int cpu1, int cpu2)
315{
316	if (!processes) {
317		static unsigned long _m1, _m2;
318		m1 = &_m1;
319		m2 = &_m2;
320	} else {
321		int shmid;
322		void *shmaddr;
323
324		shmid = shmget(IPC_PRIVATE, getpagesize(), SHM_R | SHM_W);
325		if (shmid < 0) {
326			perror("shmget");
327			exit(1);
328		}
329
330		shmaddr = shmat(shmid, NULL, 0);
331		if (shmaddr == (char *)-1) {
332			perror("shmat");
333			shmctl(shmid, IPC_RMID, NULL);
334			exit(1);
335		}
336
337		shmctl(shmid, IPC_RMID, NULL);
338
339		m1 = shmaddr;
340		m2 = shmaddr + sizeof(*m1);
341	}
342
343	*m1 = 0;
344	*m2 = 0;
345
346	mutex_lock(m1);
347	mutex_lock(m2);
348}
349
350static void *futex_thread1(void *arg)
351{
352	signal(SIGALRM, sigalrm_handler);
353	alarm(1);
354
355	while (1) {
356		mutex_lock(m2);
357		mutex_unlock(m1);
358
359		iterations += 2;
360	}
361
362	return NULL;
363}
364
365static void *futex_thread2(void *arg)
366{
367	while (1) {
368		mutex_unlock(m2);
369		mutex_lock(m1);
370	}
371
372	return NULL;
373}
374
375static struct actions futex_actions = {
376	.setup = futex_setup,
377	.thread1 = futex_thread1,
378	.thread2 = futex_thread2,
379};
380
381static struct option options[] = {
382	{ "test", required_argument, 0, 't' },
383	{ "process", no_argument, &processes, 1 },
384	{ "timeout", required_argument, 0, 's' },
385	{ "vdso", no_argument, &touch_vdso, 1 },
386	{ "no-fp", no_argument, &touch_fp, 0 },
387#ifdef __powerpc__
388	{ "no-altivec", no_argument, &touch_altivec, 0 },
389#endif
390	{ "no-vector", no_argument, &touch_vector, 0 },
391	{ 0, },
392};
393
394static void usage(void)
395{
396	fprintf(stderr, "Usage: context_switch2 <options> CPU1 CPU2\n\n");
397	fprintf(stderr, "\t\t--test=X\tpipe, futex or yield (default)\n");
398	fprintf(stderr, "\t\t--process\tUse processes (default threads)\n");
399	fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
400	fprintf(stderr, "\t\t--vdso\t\ttouch VDSO\n");
401	fprintf(stderr, "\t\t--no-fp\t\tDon't touch FP\n");
402#ifdef __powerpc__
403	fprintf(stderr, "\t\t--no-altivec\tDon't touch altivec\n");
404#endif
405	fprintf(stderr, "\t\t--no-vector\tDon't touch vector\n");
406}
407
408int main(int argc, char *argv[])
409{
410	signed char c;
411	struct actions *actions = &yield_actions;
412	int cpu1;
413	int cpu2;
414	static void (*start_fn)(void *(*fn)(void *), void *arg, unsigned long cpu);
415
416	while (1) {
417		int option_index = 0;
418
419		c = getopt_long(argc, argv, "", options, &option_index);
420
421		if (c == -1)
422			break;
423
424		switch (c) {
425		case 0:
426			if (options[option_index].flag != 0)
427				break;
428
429			usage();
430			exit(1);
431			break;
432
433		case 't':
434			if (!strcmp(optarg, "pipe")) {
435				actions = &pipe_actions;
436			} else if (!strcmp(optarg, "yield")) {
437				actions = &yield_actions;
438			} else if (!strcmp(optarg, "futex")) {
439				actions = &futex_actions;
440			} else {
441				usage();
442				exit(1);
443			}
444			break;
445
446		case 's':
447			timeout = atoi(optarg);
448			break;
449
450		default:
451			usage();
452			exit(1);
453		}
454	}
455
456	if (processes)
457		start_fn = start_process_on;
458	else
459		start_fn = start_thread_on;
460
461	if (((argc - optind) != 2)) {
462		cpu1 = cpu2 = pick_online_cpu();
463	} else {
464		cpu1 = atoi(argv[optind++]);
465		cpu2 = atoi(argv[optind++]);
466	}
467
468	printf("Using %s with ", processes ? "processes" : "threads");
469
470	if (actions == &pipe_actions)
471		printf("pipe");
472	else if (actions == &yield_actions)
473		printf("yield");
474	else
475		printf("futex");
476
477	printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n",
478	       cpu1, cpu2, touch_fp ?  "yes" : "no", touch_altivec ? "yes" : "no",
479	       touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no");
480
481	/* Create a new process group so we can signal everyone for exit */
482	setpgid(getpid(), getpid());
483
484	signal(SIGUSR1, sigusr1_handler);
485
486	actions->setup(cpu1, cpu2);
487
488	start_fn(actions->thread1, NULL, cpu1);
489	start_fn(actions->thread2, NULL, cpu2);
490
491	while (1)
492		sleep(3600);
493
494	return 0;
495}
496