1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "sandbox/linux/seccomp-bpf/syscall.h"
6
7#include <errno.h>
8#include <stdint.h>
9
10#include "base/logging.h"
11#include "sandbox/linux/bpf_dsl/seccomp_macros.h"
12
13namespace sandbox {
14
15namespace {
16
17#if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
18    defined(ARCH_CPU_MIPS_FAMILY)
19// Number that's not currently used by any Linux kernel ABIs.
20const int kInvalidSyscallNumber = 0x351d3;
21#else
22#error Unrecognized architecture
23#endif
24
25asm(// We need to be able to tell the kernel exactly where we made a
26    // system call. The C++ compiler likes to sometimes clone or
27    // inline code, which would inadvertently end up duplicating
28    // the entry point.
29    // "gcc" can suppress code duplication with suitable function
30    // attributes, but "clang" doesn't have this ability.
31    // The "clang" developer mailing list suggested that the correct
32    // and portable solution is a file-scope assembly block.
33    // N.B. We do mark our code as a proper function so that backtraces
34    // work correctly. But we make absolutely no attempt to use the
35    // ABI's calling conventions for passing arguments. We will only
36    // ever be called from assembly code and thus can pick more
37    // suitable calling conventions.
38#if defined(__i386__)
39    ".text\n"
40    ".align 16, 0x90\n"
41    ".type SyscallAsm, @function\n"
42    "SyscallAsm:.cfi_startproc\n"
43    // Check if "%eax" is negative. If so, do not attempt to make a
44    // system call. Instead, compute the return address that is visible
45    // to the kernel after we execute "int $0x80". This address can be
46    // used as a marker that BPF code inspects.
47    "test %eax, %eax\n"
48    "jge  1f\n"
49    // Always, make sure that our code is position-independent, or
50    // address space randomization might not work on i386. This means,
51    // we can't use "lea", but instead have to rely on "call/pop".
52    "call 0f;   .cfi_adjust_cfa_offset  4\n"
53    "0:pop  %eax; .cfi_adjust_cfa_offset -4\n"
54    "addl $2f-0b, %eax\n"
55    "ret\n"
56    // Save register that we don't want to clobber. On i386, we need to
57    // save relatively aggressively, as there are a couple or registers
58    // that are used internally (e.g. %ebx for position-independent
59    // code, and %ebp for the frame pointer), and as we need to keep at
60    // least a few registers available for the register allocator.
61    "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n"
62    "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n"
63    "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n"
64    "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n"
65    // Copy entries from the array holding the arguments into the
66    // correct CPU registers.
67    "movl  0(%edi), %ebx\n"
68    "movl  4(%edi), %ecx\n"
69    "movl  8(%edi), %edx\n"
70    "movl 12(%edi), %esi\n"
71    "movl 20(%edi), %ebp\n"
72    "movl 16(%edi), %edi\n"
73    // Enter the kernel.
74    "int  $0x80\n"
75    // This is our "magic" return address that the BPF filter sees.
76    "2:"
77    // Restore any clobbered registers that we didn't declare to the
78    // compiler.
79    "pop  %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n"
80    "pop  %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n"
81    "pop  %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n"
82    "pop  %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n"
83    "ret\n"
84    ".cfi_endproc\n"
85    "9:.size SyscallAsm, 9b-SyscallAsm\n"
86#elif defined(__x86_64__)
87    ".text\n"
88    ".align 16, 0x90\n"
89    ".type SyscallAsm, @function\n"
90    "SyscallAsm:.cfi_startproc\n"
91    // Check if "%rdi" is negative. If so, do not attempt to make a
92    // system call. Instead, compute the return address that is visible
93    // to the kernel after we execute "syscall". This address can be
94    // used as a marker that BPF code inspects.
95    "test %rdi, %rdi\n"
96    "jge  1f\n"
97    // Always make sure that our code is position-independent, or the
98    // linker will throw a hissy fit on x86-64.
99    "lea 2f(%rip), %rax\n"
100    "ret\n"
101    // Now we load the registers used to pass arguments to the system
102    // call: system call number in %rax, and arguments in %rdi, %rsi,
103    // %rdx, %r10, %r8, %r9. Note: These are all caller-save registers
104    // (only %rbx, %rbp, %rsp, and %r12-%r15 are callee-save), so no
105    // need to worry here about spilling registers or CFI directives.
106    "1:movq %rdi, %rax\n"
107    "movq  0(%rsi), %rdi\n"
108    "movq 16(%rsi), %rdx\n"
109    "movq 24(%rsi), %r10\n"
110    "movq 32(%rsi), %r8\n"
111    "movq 40(%rsi), %r9\n"
112    "movq  8(%rsi), %rsi\n"
113    // Enter the kernel.
114    "syscall\n"
115    // This is our "magic" return address that the BPF filter sees.
116    "2:ret\n"
117    ".cfi_endproc\n"
118    "9:.size SyscallAsm, 9b-SyscallAsm\n"
119#elif defined(__arm__)
120    // Throughout this file, we use the same mode (ARM vs. thumb)
121    // that the C++ compiler uses. This means, when transfering control
122    // from C++ to assembly code, we do not need to switch modes (e.g.
123    // by using the "bx" instruction). It also means that our assembly
124    // code should not be invoked directly from code that lives in
125    // other compilation units, as we don't bother implementing thumb
126    // interworking. That's OK, as we don't make any of the assembly
127    // symbols public. They are all local to this file.
128    ".text\n"
129    ".align 2\n"
130    ".type SyscallAsm, %function\n"
131#if defined(__thumb__)
132    ".thumb_func\n"
133#else
134    ".arm\n"
135#endif
136    "SyscallAsm:\n"
137#if !defined(__native_client_nonsfi__)
138    // .fnstart and .fnend pseudo operations creates unwind table.
139    // It also creates a reference to the symbol __aeabi_unwind_cpp_pr0, which
140    // is not provided by PNaCl toolchain. Disable it.
141    ".fnstart\n"
142#endif
143    "@ args = 0, pretend = 0, frame = 8\n"
144    "@ frame_needed = 1, uses_anonymous_args = 0\n"
145#if defined(__thumb__)
146    ".cfi_startproc\n"
147    "push {r7, lr}\n"
148    ".save {r7, lr}\n"
149    ".cfi_offset 14, -4\n"
150    ".cfi_offset  7, -8\n"
151    ".cfi_def_cfa_offset 8\n"
152#else
153    "stmfd sp!, {fp, lr}\n"
154    "add fp, sp, #4\n"
155#endif
156    // Check if "r0" is negative. If so, do not attempt to make a
157    // system call. Instead, compute the return address that is visible
158    // to the kernel after we execute "swi 0". This address can be
159    // used as a marker that BPF code inspects.
160    "cmp r0, #0\n"
161    "bge 1f\n"
162    "adr r0, 2f\n"
163    "b   2f\n"
164    // We declared (almost) all clobbered registers to the compiler. On
165    // ARM there is no particular register pressure. So, we can go
166    // ahead and directly copy the entries from the arguments array
167    // into the appropriate CPU registers.
168    "1:ldr r5, [r6, #20]\n"
169    "ldr r4, [r6, #16]\n"
170    "ldr r3, [r6, #12]\n"
171    "ldr r2, [r6, #8]\n"
172    "ldr r1, [r6, #4]\n"
173    "mov r7, r0\n"
174    "ldr r0, [r6, #0]\n"
175    // Enter the kernel
176    "swi 0\n"
177// Restore the frame pointer. Also restore the program counter from
178// the link register; this makes us return to the caller.
179#if defined(__thumb__)
180    "2:pop {r7, pc}\n"
181    ".cfi_endproc\n"
182#else
183    "2:ldmfd sp!, {fp, pc}\n"
184#endif
185#if !defined(__native_client_nonsfi__)
186    // Do not use .fnstart and .fnend for PNaCl toolchain. See above comment,
187    // for more details.
188    ".fnend\n"
189#endif
190    "9:.size SyscallAsm, 9b-SyscallAsm\n"
191#elif defined(__mips__)
192    ".text\n"
193    ".align 4\n"
194    ".type SyscallAsm, @function\n"
195    "SyscallAsm:.ent SyscallAsm\n"
196    ".frame  $sp, 40, $ra\n"
197    ".set   push\n"
198    ".set   noreorder\n"
199    "addiu  $sp, $sp, -40\n"
200    "sw     $ra, 36($sp)\n"
201    // Check if "v0" is negative. If so, do not attempt to make a
202    // system call. Instead, compute the return address that is visible
203    // to the kernel after we execute "syscall". This address can be
204    // used as a marker that BPF code inspects.
205    "bgez   $v0, 1f\n"
206    " nop\n"
207    "la     $v0, 2f\n"
208    "b      2f\n"
209    " nop\n"
210    // On MIPS first four arguments go to registers a0 - a3 and any
211    // argument after that goes to stack. We can go ahead and directly
212    // copy the entries from the arguments array into the appropriate
213    // CPU registers and on the stack.
214    "1:lw     $a3, 28($a0)\n"
215    "lw     $a2, 24($a0)\n"
216    "lw     $a1, 20($a0)\n"
217    "lw     $t0, 16($a0)\n"
218    "sw     $a3, 28($sp)\n"
219    "sw     $a2, 24($sp)\n"
220    "sw     $a1, 20($sp)\n"
221    "sw     $t0, 16($sp)\n"
222    "lw     $a3, 12($a0)\n"
223    "lw     $a2, 8($a0)\n"
224    "lw     $a1, 4($a0)\n"
225    "lw     $a0, 0($a0)\n"
226    // Enter the kernel
227    "syscall\n"
228    // This is our "magic" return address that the BPF filter sees.
229    // Restore the return address from the stack.
230    "2:lw     $ra, 36($sp)\n"
231    "jr     $ra\n"
232    " addiu  $sp, $sp, 40\n"
233    ".set    pop\n"
234    ".end    SyscallAsm\n"
235    ".size   SyscallAsm,.-SyscallAsm\n"
236#elif defined(__aarch64__)
237    ".text\n"
238    ".align 2\n"
239    ".type SyscallAsm, %function\n"
240    "SyscallAsm:\n"
241    ".cfi_startproc\n"
242    "cmp x0, #0\n"
243    "b.ge 1f\n"
244    "adr x0,2f\n"
245    "b 2f\n"
246    "1:ldr x5, [x6, #40]\n"
247    "ldr x4, [x6, #32]\n"
248    "ldr x3, [x6, #24]\n"
249    "ldr x2, [x6, #16]\n"
250    "ldr x1, [x6, #8]\n"
251    "mov x8, x0\n"
252    "ldr x0, [x6, #0]\n"
253    // Enter the kernel
254    "svc 0\n"
255    "2:ret\n"
256    ".cfi_endproc\n"
257    ".size SyscallAsm, .-SyscallAsm\n"
258#endif
259    );  // asm
260
261#if defined(__x86_64__)
262extern "C" {
263intptr_t SyscallAsm(intptr_t nr, const intptr_t args[6]);
264}
265#endif
266
267}  // namespace
268
269intptr_t Syscall::InvalidCall() {
270  // Explicitly pass eight zero arguments just in case.
271  return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0);
272}
273
274intptr_t Syscall::Call(int nr,
275                       intptr_t p0,
276                       intptr_t p1,
277                       intptr_t p2,
278                       intptr_t p3,
279                       intptr_t p4,
280                       intptr_t p5,
281                       intptr_t p6,
282                       intptr_t p7) {
283  // We rely on "intptr_t" to be the exact size as a "void *". This is
284  // typically true, but just in case, we add a check. The language
285  // specification allows platforms some leeway in cases, where
286  // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect
287  // that this would only be an issue for IA64, which we are currently not
288  // planning on supporting. And it is even possible that this would work
289  // on IA64, but for lack of actual hardware, I cannot test.
290  static_assert(sizeof(void*) == sizeof(intptr_t),
291                "pointer types and intptr_t must be exactly the same size");
292
293  // TODO(nedeljko): Enable use of more than six parameters on architectures
294  //                 where that makes sense.
295#if defined(__mips__)
296  const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7};
297#else
298  DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not "
299                      "added for this architecture";
300  DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not "
301                      "added for this architecture";
302  const intptr_t args[6] = {p0, p1, p2, p3, p4, p5};
303#endif  // defined(__mips__)
304
305// Invoke our file-scope assembly code. The constraints have been picked
306// carefully to match what the rest of the assembly code expects in input,
307// output, and clobbered registers.
308#if defined(__i386__)
309  intptr_t ret = nr;
310  asm volatile(
311      "call SyscallAsm\n"
312      // N.B. These are not the calling conventions normally used by the ABI.
313      : "=a"(ret)
314      : "0"(ret), "D"(args)
315      : "cc", "esp", "memory", "ecx", "edx");
316#elif defined(__x86_64__)
317  intptr_t ret = SyscallAsm(nr, args);
318#elif defined(__arm__)
319  intptr_t ret;
320  {
321    register intptr_t inout __asm__("r0") = nr;
322    register const intptr_t* data __asm__("r6") = args;
323    asm volatile(
324        "bl SyscallAsm\n"
325        // N.B. These are not the calling conventions normally used by the ABI.
326        : "=r"(inout)
327        : "0"(inout), "r"(data)
328        : "cc",
329          "lr",
330          "memory",
331          "r1",
332          "r2",
333          "r3",
334          "r4",
335          "r5"
336#if !defined(__thumb__)
337          // In thumb mode, we cannot use "r7" as a general purpose register, as
338          // it is our frame pointer. We have to manually manage and preserve
339          // it.
340          // In ARM mode, we have a dedicated frame pointer register and "r7" is
341          // thus available as a general purpose register. We don't preserve it,
342          // but instead mark it as clobbered.
343          ,
344          "r7"
345#endif  // !defined(__thumb__)
346        );
347    ret = inout;
348  }
349#elif defined(__mips__)
350  int err_status;
351  intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status);
352
353  if (err_status) {
354    // On error, MIPS returns errno from syscall instead of -errno.
355    // The purpose of this negation is for SandboxSyscall() to behave
356    // more like it would on other architectures.
357    ret = -ret;
358  }
359#elif defined(__aarch64__)
360  intptr_t ret;
361  {
362    register intptr_t inout __asm__("x0") = nr;
363    register const intptr_t* data __asm__("x6") = args;
364    asm volatile("bl SyscallAsm\n"
365                 : "=r"(inout)
366                 : "0"(inout), "r"(data)
367                 : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30");
368    ret = inout;
369  }
370
371#else
372#error "Unimplemented architecture"
373#endif
374  return ret;
375}
376
377void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) {
378#if defined(__mips__)
379  // Mips ABI states that on error a3 CPU register has non zero value and if
380  // there is no error, it should be zero.
381  if (ret_val <= -1 && ret_val >= -4095) {
382    // |ret_val| followes the Syscall::Call() convention of being -errno on
383    // errors. In order to write correct value to return register this sign
384    // needs to be changed back.
385    ret_val = -ret_val;
386    SECCOMP_PARM4(ctx) = 1;
387  } else
388    SECCOMP_PARM4(ctx) = 0;
389#endif
390  SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val);
391}
392
393#if defined(__mips__)
394intptr_t Syscall::SandboxSyscallRaw(int nr,
395                                    const intptr_t* args,
396                                    intptr_t* err_ret) {
397  register intptr_t ret __asm__("v0") = nr;
398  // a3 register becomes non zero on error.
399  register intptr_t err_stat __asm__("a3") = 0;
400  {
401    register const intptr_t* data __asm__("a0") = args;
402    asm volatile(
403        "la $t9, SyscallAsm\n"
404        "jalr $t9\n"
405        " nop\n"
406        : "=r"(ret), "=r"(err_stat)
407        : "0"(ret),
408          "r"(data)
409          // a2 is in the clober list so inline assembly can not change its
410          // value.
411        : "memory", "ra", "t9", "a2");
412  }
413
414  // Set an error status so it can be used outside of this function
415  *err_ret = err_stat;
416
417  return ret;
418}
419#endif  // defined(__mips__)
420
421}  // namespace sandbox
422