1/*
2 * 32-bit syscall ABI conformance test.
3 *
4 * Copyright (c) 2015 Denys Vlasenko
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * General Public License for more details.
14 */
15/*
16 * Can be built statically:
17 * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
18 */
19#undef _GNU_SOURCE
20#define _GNU_SOURCE 1
21#undef __USE_GNU
22#define __USE_GNU 1
23#include <unistd.h>
24#include <stdlib.h>
25#include <string.h>
26#include <stdio.h>
27#include <signal.h>
28#include <sys/types.h>
29#include <sys/select.h>
30#include <sys/time.h>
31#include <elf.h>
32#include <sys/ptrace.h>
33#include <sys/wait.h>
34
35#if !defined(__i386__)
36int main(int argc, char **argv, char **envp)
37{
38	printf("[SKIP]\tNot a 32-bit x86 userspace\n");
39	return 0;
40}
41#else
42
43long syscall_addr;
44long get_syscall(char **envp)
45{
46	Elf32_auxv_t *auxv;
47	while (*envp++ != NULL)
48		continue;
49	for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
50		if (auxv->a_type == AT_SYSINFO)
51			return auxv->a_un.a_val;
52	printf("[WARN]\tAT_SYSINFO not supplied\n");
53	return 0;
54}
55
56asm (
57	"	.pushsection .text\n"
58	"	.global	int80\n"
59	"int80:\n"
60	"	int	$0x80\n"
61	"	ret\n"
62	"	.popsection\n"
63);
64extern char int80;
65
66struct regs64 {
67	uint64_t rax, rbx, rcx, rdx;
68	uint64_t rsi, rdi, rbp, rsp;
69	uint64_t r8,  r9,  r10, r11;
70	uint64_t r12, r13, r14, r15;
71};
72struct regs64 regs64;
73int kernel_is_64bit;
74
75asm (
76	"	.pushsection .text\n"
77	"	.code64\n"
78	"get_regs64:\n"
79	"	push	%rax\n"
80	"	mov	$regs64, %eax\n"
81	"	pop	0*8(%rax)\n"
82	"	movq	%rbx, 1*8(%rax)\n"
83	"	movq	%rcx, 2*8(%rax)\n"
84	"	movq	%rdx, 3*8(%rax)\n"
85	"	movq	%rsi, 4*8(%rax)\n"
86	"	movq	%rdi, 5*8(%rax)\n"
87	"	movq	%rbp, 6*8(%rax)\n"
88	"	movq	%rsp, 7*8(%rax)\n"
89	"	movq	%r8,  8*8(%rax)\n"
90	"	movq	%r9,  9*8(%rax)\n"
91	"	movq	%r10, 10*8(%rax)\n"
92	"	movq	%r11, 11*8(%rax)\n"
93	"	movq	%r12, 12*8(%rax)\n"
94	"	movq	%r13, 13*8(%rax)\n"
95	"	movq	%r14, 14*8(%rax)\n"
96	"	movq	%r15, 15*8(%rax)\n"
97	"	ret\n"
98	"poison_regs64:\n"
99	"	movq	$0x7f7f7f7f, %r8\n"
100	"	shl	$32, %r8\n"
101	"	orq	$0x7f7f7f7f, %r8\n"
102	"	movq	%r8, %r9\n"
103	"	movq	%r8, %r10\n"
104	"	movq	%r8, %r11\n"
105	"	movq	%r8, %r12\n"
106	"	movq	%r8, %r13\n"
107	"	movq	%r8, %r14\n"
108	"	movq	%r8, %r15\n"
109	"	ret\n"
110	"	.code32\n"
111	"	.popsection\n"
112);
113extern void get_regs64(void);
114extern void poison_regs64(void);
115extern unsigned long call64_from_32(void (*function)(void));
116void print_regs64(void)
117{
118	if (!kernel_is_64bit)
119		return;
120	printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax,  regs64.rbx,  regs64.rcx,  regs64.rdx);
121	printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi,  regs64.rdi,  regs64.rbp,  regs64.rsp);
122	printf(" 8:%016llx  9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 ,  regs64.r9 ,  regs64.r10,  regs64.r11);
123	printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12,  regs64.r13,  regs64.r14,  regs64.r15);
124}
125
126int check_regs64(void)
127{
128	int err = 0;
129	int num = 8;
130	uint64_t *r64 = &regs64.r8;
131
132	if (!kernel_is_64bit)
133		return 0;
134
135	do {
136		if (*r64 == 0x7f7f7f7f7f7f7f7fULL)
137			continue; /* register did not change */
138		if (syscall_addr != (long)&int80) {
139			/*
140			 * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
141			 * either clear them to 0, or for R11, load EFLAGS.
142			 */
143			if (*r64 == 0)
144				continue;
145			if (num == 11) {
146				printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
147				continue;
148			}
149		} else {
150			/* INT80 syscall entrypoint can be used by
151			 * 64-bit programs too, unlike SYSCALL/SYSENTER.
152			 * Therefore it must preserve R12+
153			 * (they are callee-saved registers in 64-bit C ABI).
154			 *
155			 * This was probably historically not intended,
156			 * but R8..11 are clobbered (cleared to 0).
157			 * IOW: they are the only registers which aren't
158			 * preserved across INT80 syscall.
159			 */
160			if (*r64 == 0 && num <= 11)
161				continue;
162		}
163		printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
164		err++;
165	} while (r64++, ++num < 16);
166
167	if (!err)
168		printf("[OK]\tR8..R15 did not leak kernel data\n");
169	return err;
170}
171
172int nfds;
173fd_set rfds;
174fd_set wfds;
175fd_set efds;
176struct timespec timeout;
177sigset_t sigmask;
178struct {
179	sigset_t *sp;
180	int sz;
181} sigmask_desc;
182
183void prep_args()
184{
185	nfds = 42;
186	FD_ZERO(&rfds);
187	FD_ZERO(&wfds);
188	FD_ZERO(&efds);
189	FD_SET(0, &rfds);
190	FD_SET(1, &wfds);
191	FD_SET(2, &efds);
192	timeout.tv_sec = 0;
193	timeout.tv_nsec = 123;
194	sigemptyset(&sigmask);
195	sigaddset(&sigmask, SIGINT);
196	sigaddset(&sigmask, SIGUSR2);
197	sigaddset(&sigmask, SIGRTMAX);
198	sigmask_desc.sp = &sigmask;
199	sigmask_desc.sz = 8; /* bytes */
200}
201
202static void print_flags(const char *name, unsigned long r)
203{
204	static const char *bitarray[] = {
205	"\n" ,"c\n" ,/* Carry Flag */
206	"0 " ,"1 "  ,/* Bit 1 - always on */
207	""   ,"p "  ,/* Parity Flag */
208	"0 " ,"3? " ,
209	""   ,"a "  ,/* Auxiliary carry Flag */
210	"0 " ,"5? " ,
211	""   ,"z "  ,/* Zero Flag */
212	""   ,"s "  ,/* Sign Flag */
213	""   ,"t "  ,/* Trap Flag */
214	""   ,"i "  ,/* Interrupt Flag */
215	""   ,"d "  ,/* Direction Flag */
216	""   ,"o "  ,/* Overflow Flag */
217	"0 " ,"1 "  ,/* I/O Privilege Level (2 bits) */
218	"0"  ,"1"   ,/* I/O Privilege Level (2 bits) */
219	""   ,"n "  ,/* Nested Task */
220	"0 " ,"15? ",
221	""   ,"r "  ,/* Resume Flag */
222	""   ,"v "  ,/* Virtual Mode */
223	""   ,"ac " ,/* Alignment Check/Access Control */
224	""   ,"vif ",/* Virtual Interrupt Flag */
225	""   ,"vip ",/* Virtual Interrupt Pending */
226	""   ,"id " ,/* CPUID detection */
227	NULL
228	};
229	const char **bitstr;
230	int bit;
231
232	printf("%s=%016lx ", name, r);
233	bitstr = bitarray + 42;
234	bit = 21;
235	if ((r >> 22) != 0)
236		printf("(extra bits are set) ");
237	do {
238		if (bitstr[(r >> bit) & 1][0])
239			fputs(bitstr[(r >> bit) & 1], stdout);
240		bitstr -= 2;
241		bit--;
242	} while (bit >= 0);
243}
244
245int run_syscall(void)
246{
247	long flags, bad_arg;
248
249	prep_args();
250
251	if (kernel_is_64bit)
252		call64_from_32(poison_regs64);
253	/*print_regs64();*/
254
255	asm("\n"
256	/* Try 6-arg syscall: pselect. It should return quickly */
257	"	push	%%ebp\n"
258	"	mov	$308, %%eax\n"     /* PSELECT */
259	"	mov	nfds, %%ebx\n"     /* ebx  arg1 */
260	"	mov	$rfds, %%ecx\n"    /* ecx  arg2 */
261	"	mov	$wfds, %%edx\n"    /* edx  arg3 */
262	"	mov	$efds, %%esi\n"    /* esi  arg4 */
263	"	mov	$timeout, %%edi\n" /* edi  arg5 */
264	"	mov	$sigmask_desc, %%ebp\n" /* %ebp arg6 */
265	"	push	$0x200ed7\n"      /* set almost all flags */
266	"	popf\n"		/* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
267	"	call	*syscall_addr\n"
268	/* Check that registers are not clobbered */
269	"	pushf\n"
270	"	pop	%%eax\n"
271	"	cld\n"
272	"	cmp	nfds, %%ebx\n"     /* ebx  arg1 */
273	"	mov	$1, %%ebx\n"
274	"	jne	1f\n"
275	"	cmp	$rfds, %%ecx\n"    /* ecx  arg2 */
276	"	mov	$2, %%ebx\n"
277	"	jne	1f\n"
278	"	cmp	$wfds, %%edx\n"    /* edx  arg3 */
279	"	mov	$3, %%ebx\n"
280	"	jne	1f\n"
281	"	cmp	$efds, %%esi\n"    /* esi  arg4 */
282	"	mov	$4, %%ebx\n"
283	"	jne	1f\n"
284	"	cmp	$timeout, %%edi\n" /* edi  arg5 */
285	"	mov	$5, %%ebx\n"
286	"	jne	1f\n"
287	"	cmpl	$sigmask_desc, %%ebp\n" /* %ebp arg6 */
288	"	mov	$6, %%ebx\n"
289	"	jne	1f\n"
290	"	mov	$0, %%ebx\n"
291	"1:\n"
292	"	pop	%%ebp\n"
293	: "=a" (flags), "=b" (bad_arg)
294	:
295	: "cx", "dx", "si", "di"
296	);
297
298	if (kernel_is_64bit) {
299		memset(&regs64, 0x77, sizeof(regs64));
300		call64_from_32(get_regs64);
301		/*print_regs64();*/
302	}
303
304	/*
305	 * On paravirt kernels, flags are not preserved across syscalls.
306	 * Thus, we do not consider it a bug if some are changed.
307	 * We just show ones which do.
308	 */
309	if ((0x200ed7 ^ flags) != 0) {
310		print_flags("[WARN]\tFlags before", 0x200ed7);
311		print_flags("[WARN]\tFlags  after", flags);
312		print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
313	}
314
315	if (bad_arg) {
316		printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
317		return 1;
318	}
319	printf("[OK]\tArguments are preserved across syscall\n");
320
321	return check_regs64();
322}
323
324int run_syscall_twice()
325{
326	int exitcode = 0;
327	long sv;
328
329	if (syscall_addr) {
330		printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
331		exitcode = run_syscall();
332	}
333	sv = syscall_addr;
334	syscall_addr = (long)&int80;
335	printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
336	exitcode += run_syscall();
337	syscall_addr = sv;
338	return exitcode;
339}
340
341void ptrace_me()
342{
343	pid_t pid;
344
345	fflush(NULL);
346	pid = fork();
347	if (pid < 0)
348		exit(1);
349	if (pid == 0) {
350		/* child */
351		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
352			exit(0);
353		raise(SIGSTOP);
354		return;
355	}
356	/* parent */
357	printf("[RUN]\tRunning tests under ptrace\n");
358	while (1) {
359		int status;
360		pid = waitpid(-1, &status, __WALL);
361		if (WIFEXITED(status))
362			exit(WEXITSTATUS(status));
363		if (WIFSIGNALED(status))
364			exit(WTERMSIG(status));
365		if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
366			exit(255);
367		/*
368		 * Note: we do not inject sig = WSTOPSIG(status).
369		 * We probably should, but careful: do not inject SIGTRAP
370		 * generated by syscall entry/exit stops.
371		 * That kills the child.
372		 */
373		ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
374	}
375}
376
377int main(int argc, char **argv, char **envp)
378{
379	int exitcode = 0;
380	int cs;
381
382	asm("\n"
383	"	movl	%%cs, %%eax\n"
384	: "=a" (cs)
385	);
386	kernel_is_64bit = (cs == 0x23);
387	if (!kernel_is_64bit)
388		printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
389
390	/* This only works for non-static builds:
391	 * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
392	 */
393	syscall_addr = get_syscall(envp);
394
395	exitcode += run_syscall_twice();
396	ptrace_me();
397	exitcode += run_syscall_twice();
398
399	return exitcode;
400}
401#endif
402