1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "sandbox/linux/seccomp-bpf/syscall.h"
6
7#include <asm/unistd.h>
8#include <errno.h>
9
10#include "base/basictypes.h"
11#include "base/logging.h"
12#include "sandbox/linux/seccomp-bpf/linux_seccomp.h"
13
14namespace sandbox {
15
16namespace {
17
18#if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
19    defined(ARCH_CPU_MIPS_FAMILY)
20// Number that's not currently used by any Linux kernel ABIs.
21const int kInvalidSyscallNumber = 0x351d3;
22#else
23#error Unrecognized architecture
24#endif
25
26asm(// We need to be able to tell the kernel exactly where we made a
27    // system call. The C++ compiler likes to sometimes clone or
28    // inline code, which would inadvertently end up duplicating
29    // the entry point.
30    // "gcc" can suppress code duplication with suitable function
31    // attributes, but "clang" doesn't have this ability.
32    // The "clang" developer mailing list suggested that the correct
33    // and portable solution is a file-scope assembly block.
34    // N.B. We do mark our code as a proper function so that backtraces
35    // work correctly. But we make absolutely no attempt to use the
36    // ABI's calling conventions for passing arguments. We will only
37    // ever be called from assembly code and thus can pick more
38    // suitable calling conventions.
39#if defined(__i386__)
40    ".text\n"
41    ".align 16, 0x90\n"
42    ".type SyscallAsm, @function\n"
43    "SyscallAsm:.cfi_startproc\n"
44    // Check if "%eax" is negative. If so, do not attempt to make a
45    // system call. Instead, compute the return address that is visible
46    // to the kernel after we execute "int $0x80". This address can be
47    // used as a marker that BPF code inspects.
48    "test %eax, %eax\n"
49    "jge  1f\n"
50    // Always, make sure that our code is position-independent, or
51    // address space randomization might not work on i386. This means,
52    // we can't use "lea", but instead have to rely on "call/pop".
53    "call 0f;   .cfi_adjust_cfa_offset  4\n"
54    "0:pop  %eax; .cfi_adjust_cfa_offset -4\n"
55    "addl $2f-0b, %eax\n"
56    "ret\n"
57    // Save register that we don't want to clobber. On i386, we need to
58    // save relatively aggressively, as there are a couple or registers
59    // that are used internally (e.g. %ebx for position-independent
60    // code, and %ebp for the frame pointer), and as we need to keep at
61    // least a few registers available for the register allocator.
62    "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n"
63    "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n"
64    "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n"
65    "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n"
66    // Copy entries from the array holding the arguments into the
67    // correct CPU registers.
68    "movl  0(%edi), %ebx\n"
69    "movl  4(%edi), %ecx\n"
70    "movl  8(%edi), %edx\n"
71    "movl 12(%edi), %esi\n"
72    "movl 20(%edi), %ebp\n"
73    "movl 16(%edi), %edi\n"
74    // Enter the kernel.
75    "int  $0x80\n"
76    // This is our "magic" return address that the BPF filter sees.
77    "2:"
78    // Restore any clobbered registers that we didn't declare to the
79    // compiler.
80    "pop  %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n"
81    "pop  %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n"
82    "pop  %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n"
83    "pop  %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n"
84    "ret\n"
85    ".cfi_endproc\n"
86    "9:.size SyscallAsm, 9b-SyscallAsm\n"
87#elif defined(__x86_64__)
88    ".text\n"
89    ".align 16, 0x90\n"
90    ".type SyscallAsm, @function\n"
91    "SyscallAsm:.cfi_startproc\n"
92    // Check if "%rax" is negative. If so, do not attempt to make a
93    // system call. Instead, compute the return address that is visible
94    // to the kernel after we execute "syscall". This address can be
95    // used as a marker that BPF code inspects.
96    "test %rax, %rax\n"
97    "jge  1f\n"
98    // Always make sure that our code is position-independent, or the
99    // linker will throw a hissy fit on x86-64.
100    "call 0f;   .cfi_adjust_cfa_offset  8\n"
101    "0:pop  %rax; .cfi_adjust_cfa_offset -8\n"
102    "addq $2f-0b, %rax\n"
103    "ret\n"
104    // We declared all clobbered registers to the compiler. On x86-64,
105    // there really isn't much of a problem with register pressure. So,
106    // we can go ahead and directly copy the entries from the arguments
107    // array into the appropriate CPU registers.
108    "1:movq  0(%r12), %rdi\n"
109    "movq  8(%r12), %rsi\n"
110    "movq 16(%r12), %rdx\n"
111    "movq 24(%r12), %r10\n"
112    "movq 32(%r12), %r8\n"
113    "movq 40(%r12), %r9\n"
114    // Enter the kernel.
115    "syscall\n"
116    // This is our "magic" return address that the BPF filter sees.
117    "2:ret\n"
118    ".cfi_endproc\n"
119    "9:.size SyscallAsm, 9b-SyscallAsm\n"
120#elif defined(__arm__)
121    // Throughout this file, we use the same mode (ARM vs. thumb)
122    // that the C++ compiler uses. This means, when transfering control
123    // from C++ to assembly code, we do not need to switch modes (e.g.
124    // by using the "bx" instruction). It also means that our assembly
125    // code should not be invoked directly from code that lives in
126    // other compilation units, as we don't bother implementing thumb
127    // interworking. That's OK, as we don't make any of the assembly
128    // symbols public. They are all local to this file.
129    ".text\n"
130    ".align 2\n"
131    ".type SyscallAsm, %function\n"
132#if defined(__thumb__)
133    ".thumb_func\n"
134#else
135    ".arm\n"
136#endif
137    "SyscallAsm:.fnstart\n"
138    "@ args = 0, pretend = 0, frame = 8\n"
139    "@ frame_needed = 1, uses_anonymous_args = 0\n"
140#if defined(__thumb__)
141    ".cfi_startproc\n"
142    "push {r7, lr}\n"
143    ".cfi_offset 14, -4\n"
144    ".cfi_offset  7, -8\n"
145    "mov r7, sp\n"
146    ".cfi_def_cfa_register 7\n"
147    ".cfi_def_cfa_offset 8\n"
148#else
149    "stmfd sp!, {fp, lr}\n"
150    "add fp, sp, #4\n"
151#endif
152    // Check if "r0" is negative. If so, do not attempt to make a
153    // system call. Instead, compute the return address that is visible
154    // to the kernel after we execute "swi 0". This address can be
155    // used as a marker that BPF code inspects.
156    "cmp r0, #0\n"
157    "bge 1f\n"
158    "adr r0, 2f\n"
159    "b   2f\n"
160    // We declared (almost) all clobbered registers to the compiler. On
161    // ARM there is no particular register pressure. So, we can go
162    // ahead and directly copy the entries from the arguments array
163    // into the appropriate CPU registers.
164    "1:ldr r5, [r6, #20]\n"
165    "ldr r4, [r6, #16]\n"
166    "ldr r3, [r6, #12]\n"
167    "ldr r2, [r6, #8]\n"
168    "ldr r1, [r6, #4]\n"
169    "mov r7, r0\n"
170    "ldr r0, [r6, #0]\n"
171    // Enter the kernel
172    "swi 0\n"
173// Restore the frame pointer. Also restore the program counter from
174// the link register; this makes us return to the caller.
175#if defined(__thumb__)
176    "2:pop {r7, pc}\n"
177    ".cfi_endproc\n"
178#else
179    "2:ldmfd sp!, {fp, pc}\n"
180#endif
181    ".fnend\n"
182    "9:.size SyscallAsm, 9b-SyscallAsm\n"
183#elif defined(__mips__)
184    ".text\n"
185    ".align 4\n"
186    ".type SyscallAsm, @function\n"
187    "SyscallAsm:.ent SyscallAsm\n"
188    ".frame  $sp, 40, $ra\n"
189    ".set   push\n"
190    ".set   noreorder\n"
191    "addiu  $sp, $sp, -40\n"
192    "sw     $ra, 36($sp)\n"
193    // Check if "v0" is negative. If so, do not attempt to make a
194    // system call. Instead, compute the return address that is visible
195    // to the kernel after we execute "syscall". This address can be
196    // used as a marker that BPF code inspects.
197    "bgez   $v0, 1f\n"
198    " nop\n"
199    "la     $v0, 2f\n"
200    "b      2f\n"
201    " nop\n"
202    // On MIPS first four arguments go to registers a0 - a3 and any
203    // argument after that goes to stack. We can go ahead and directly
204    // copy the entries from the arguments array into the appropriate
205    // CPU registers and on the stack.
206    "1:lw     $a3, 28($a0)\n"
207    "lw     $a2, 24($a0)\n"
208    "lw     $a1, 20($a0)\n"
209    "lw     $t0, 16($a0)\n"
210    "sw     $a3, 28($sp)\n"
211    "sw     $a2, 24($sp)\n"
212    "sw     $a1, 20($sp)\n"
213    "sw     $t0, 16($sp)\n"
214    "lw     $a3, 12($a0)\n"
215    "lw     $a2, 8($a0)\n"
216    "lw     $a1, 4($a0)\n"
217    "lw     $a0, 0($a0)\n"
218    // Enter the kernel
219    "syscall\n"
220    // This is our "magic" return address that the BPF filter sees.
221    // Restore the return address from the stack.
222    "2:lw     $ra, 36($sp)\n"
223    "jr     $ra\n"
224    " addiu  $sp, $sp, 40\n"
225    ".set    pop\n"
226    ".end    SyscallAsm\n"
227    ".size   SyscallAsm,.-SyscallAsm\n"
228#elif defined(__aarch64__)
229    ".text\n"
230    ".align 2\n"
231    ".type SyscallAsm, %function\n"
232    "SyscallAsm:\n"
233    ".cfi_startproc\n"
234    "cmp x0, #0\n"
235    "b.ge 1f\n"
236    "adr x0,2f\n"
237    "b 2f\n"
238    "1:ldr x5, [x6, #40]\n"
239    "ldr x4, [x6, #32]\n"
240    "ldr x3, [x6, #24]\n"
241    "ldr x2, [x6, #16]\n"
242    "ldr x1, [x6, #8]\n"
243    "mov x8, x0\n"
244    "ldr x0, [x6, #0]\n"
245    // Enter the kernel
246    "svc 0\n"
247    "2:ret\n"
248    ".cfi_endproc\n"
249    ".size SyscallAsm, .-SyscallAsm\n"
250#endif
251    );  // asm
252
253}  // namespace
254
255intptr_t Syscall::InvalidCall() {
256  // Explicitly pass eight zero arguments just in case.
257  return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0);
258}
259
260intptr_t Syscall::Call(int nr,
261                       intptr_t p0,
262                       intptr_t p1,
263                       intptr_t p2,
264                       intptr_t p3,
265                       intptr_t p4,
266                       intptr_t p5,
267                       intptr_t p6,
268                       intptr_t p7) {
269  // We rely on "intptr_t" to be the exact size as a "void *". This is
270  // typically true, but just in case, we add a check. The language
271  // specification allows platforms some leeway in cases, where
272  // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect
273  // that this would only be an issue for IA64, which we are currently not
274  // planning on supporting. And it is even possible that this would work
275  // on IA64, but for lack of actual hardware, I cannot test.
276  COMPILE_ASSERT(sizeof(void*) == sizeof(intptr_t),
277                 pointer_types_and_intptr_must_be_exactly_the_same_size);
278
279  // TODO(nedeljko): Enable use of more than six parameters on architectures
280  //                 where that makes sense.
281#if defined(__mips__)
282  const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7};
283#else
284  DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not "
285                      "added for this architecture";
286  DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not "
287                      "added for this architecture";
288  const intptr_t args[6] = {p0, p1, p2, p3, p4, p5};
289#endif  // defined(__mips__)
290
291// Invoke our file-scope assembly code. The constraints have been picked
292// carefully to match what the rest of the assembly code expects in input,
293// output, and clobbered registers.
294#if defined(__i386__)
295  intptr_t ret = nr;
296  asm volatile(
297      "call SyscallAsm\n"
298      // N.B. These are not the calling conventions normally used by the ABI.
299      : "=a"(ret)
300      : "0"(ret), "D"(args)
301      : "cc", "esp", "memory", "ecx", "edx");
302#elif defined(__x86_64__)
303  intptr_t ret = nr;
304  {
305    register const intptr_t* data __asm__("r12") = args;
306    asm volatile(
307        "lea  -128(%%rsp), %%rsp\n"  // Avoid red zone.
308        "call SyscallAsm\n"
309        "lea  128(%%rsp), %%rsp\n"
310        // N.B. These are not the calling conventions normally used by the ABI.
311        : "=a"(ret)
312        : "0"(ret), "r"(data)
313        : "cc",
314          "rsp",
315          "memory",
316          "rcx",
317          "rdi",
318          "rsi",
319          "rdx",
320          "r8",
321          "r9",
322          "r10",
323          "r11");
324  }
325#elif defined(__arm__)
326  intptr_t ret;
327  {
328    register intptr_t inout __asm__("r0") = nr;
329    register const intptr_t* data __asm__("r6") = args;
330    asm volatile(
331        "bl SyscallAsm\n"
332        // N.B. These are not the calling conventions normally used by the ABI.
333        : "=r"(inout)
334        : "0"(inout), "r"(data)
335        : "cc",
336          "lr",
337          "memory",
338          "r1",
339          "r2",
340          "r3",
341          "r4",
342          "r5"
343#if !defined(__thumb__)
344          // In thumb mode, we cannot use "r7" as a general purpose register, as
345          // it is our frame pointer. We have to manually manage and preserve
346          // it.
347          // In ARM mode, we have a dedicated frame pointer register and "r7" is
348          // thus available as a general purpose register. We don't preserve it,
349          // but instead mark it as clobbered.
350          ,
351          "r7"
352#endif  // !defined(__thumb__)
353        );
354    ret = inout;
355  }
356#elif defined(__mips__)
357  int err_status;
358  intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status);
359
360  if (err_status) {
361    // On error, MIPS returns errno from syscall instead of -errno.
362    // The purpose of this negation is for SandboxSyscall() to behave
363    // more like it would on other architectures.
364    ret = -ret;
365  }
366#elif defined(__aarch64__)
367  intptr_t ret;
368  {
369    register intptr_t inout __asm__("x0") = nr;
370    register const intptr_t* data __asm__("x6") = args;
371    asm volatile("bl SyscallAsm\n"
372                 : "=r"(inout)
373                 : "0"(inout), "r"(data)
374                 : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30");
375    ret = inout;
376  }
377
378#else
379#error "Unimplemented architecture"
380#endif
381  return ret;
382}
383
384void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) {
385#if defined(__mips__)
386  // Mips ABI states that on error a3 CPU register has non zero value and if
387  // there is no error, it should be zero.
388  if (ret_val <= -1 && ret_val >= -4095) {
389    // |ret_val| followes the Syscall::Call() convention of being -errno on
390    // errors. In order to write correct value to return register this sign
391    // needs to be changed back.
392    ret_val = -ret_val;
393    SECCOMP_PARM4(ctx) = 1;
394  } else
395    SECCOMP_PARM4(ctx) = 0;
396#endif
397  SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val);
398}
399
400#if defined(__mips__)
401intptr_t Syscall::SandboxSyscallRaw(int nr,
402                                    const intptr_t* args,
403                                    intptr_t* err_ret) {
404  register intptr_t ret __asm__("v0") = nr;
405  // a3 register becomes non zero on error.
406  register intptr_t err_stat __asm__("a3") = 0;
407  {
408    register const intptr_t* data __asm__("a0") = args;
409    asm volatile(
410        "la $t9, SyscallAsm\n"
411        "jalr $t9\n"
412        " nop\n"
413        : "=r"(ret), "=r"(err_stat)
414        : "0"(ret),
415          "r"(data)
416          // a2 is in the clober list so inline assembly can not change its
417          // value.
418        : "memory", "ra", "t9", "a2");
419  }
420
421  // Set an error status so it can be used outside of this function
422  *err_ret = err_stat;
423
424  return ret;
425}
426#endif  // defined(__mips__)
427
428}  // namespace sandbox
429