1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "sandbox/linux/seccomp-bpf/syscall.h" 6 7#include <errno.h> 8#include <stdint.h> 9 10#include "base/logging.h" 11#include "sandbox/linux/bpf_dsl/seccomp_macros.h" 12 13namespace sandbox { 14 15namespace { 16 17#if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \ 18 defined(ARCH_CPU_MIPS_FAMILY) 19// Number that's not currently used by any Linux kernel ABIs. 20const int kInvalidSyscallNumber = 0x351d3; 21#else 22#error Unrecognized architecture 23#endif 24 25asm(// We need to be able to tell the kernel exactly where we made a 26 // system call. The C++ compiler likes to sometimes clone or 27 // inline code, which would inadvertently end up duplicating 28 // the entry point. 29 // "gcc" can suppress code duplication with suitable function 30 // attributes, but "clang" doesn't have this ability. 31 // The "clang" developer mailing list suggested that the correct 32 // and portable solution is a file-scope assembly block. 33 // N.B. We do mark our code as a proper function so that backtraces 34 // work correctly. But we make absolutely no attempt to use the 35 // ABI's calling conventions for passing arguments. We will only 36 // ever be called from assembly code and thus can pick more 37 // suitable calling conventions. 38#if defined(__i386__) 39 ".text\n" 40 ".align 16, 0x90\n" 41 ".type SyscallAsm, @function\n" 42 "SyscallAsm:.cfi_startproc\n" 43 // Check if "%eax" is negative. If so, do not attempt to make a 44 // system call. Instead, compute the return address that is visible 45 // to the kernel after we execute "int $0x80". This address can be 46 // used as a marker that BPF code inspects. 47 "test %eax, %eax\n" 48 "jge 1f\n" 49 // Always, make sure that our code is position-independent, or 50 // address space randomization might not work on i386. This means, 51 // we can't use "lea", but instead have to rely on "call/pop". 52 "call 0f; .cfi_adjust_cfa_offset 4\n" 53 "0:pop %eax; .cfi_adjust_cfa_offset -4\n" 54 "addl $2f-0b, %eax\n" 55 "ret\n" 56 // Save register that we don't want to clobber. On i386, we need to 57 // save relatively aggressively, as there are a couple or registers 58 // that are used internally (e.g. %ebx for position-independent 59 // code, and %ebp for the frame pointer), and as we need to keep at 60 // least a few registers available for the register allocator. 61 "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n" 62 "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n" 63 "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n" 64 "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n" 65 // Copy entries from the array holding the arguments into the 66 // correct CPU registers. 67 "movl 0(%edi), %ebx\n" 68 "movl 4(%edi), %ecx\n" 69 "movl 8(%edi), %edx\n" 70 "movl 12(%edi), %esi\n" 71 "movl 20(%edi), %ebp\n" 72 "movl 16(%edi), %edi\n" 73 // Enter the kernel. 74 "int $0x80\n" 75 // This is our "magic" return address that the BPF filter sees. 76 "2:" 77 // Restore any clobbered registers that we didn't declare to the 78 // compiler. 79 "pop %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n" 80 "pop %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n" 81 "pop %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n" 82 "pop %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n" 83 "ret\n" 84 ".cfi_endproc\n" 85 "9:.size SyscallAsm, 9b-SyscallAsm\n" 86#elif defined(__x86_64__) 87 ".text\n" 88 ".align 16, 0x90\n" 89 ".type SyscallAsm, @function\n" 90 "SyscallAsm:.cfi_startproc\n" 91 // Check if "%rdi" is negative. If so, do not attempt to make a 92 // system call. Instead, compute the return address that is visible 93 // to the kernel after we execute "syscall". This address can be 94 // used as a marker that BPF code inspects. 95 "test %rdi, %rdi\n" 96 "jge 1f\n" 97 // Always make sure that our code is position-independent, or the 98 // linker will throw a hissy fit on x86-64. 99 "lea 2f(%rip), %rax\n" 100 "ret\n" 101 // Now we load the registers used to pass arguments to the system 102 // call: system call number in %rax, and arguments in %rdi, %rsi, 103 // %rdx, %r10, %r8, %r9. Note: These are all caller-save registers 104 // (only %rbx, %rbp, %rsp, and %r12-%r15 are callee-save), so no 105 // need to worry here about spilling registers or CFI directives. 106 "1:movq %rdi, %rax\n" 107 "movq 0(%rsi), %rdi\n" 108 "movq 16(%rsi), %rdx\n" 109 "movq 24(%rsi), %r10\n" 110 "movq 32(%rsi), %r8\n" 111 "movq 40(%rsi), %r9\n" 112 "movq 8(%rsi), %rsi\n" 113 // Enter the kernel. 114 "syscall\n" 115 // This is our "magic" return address that the BPF filter sees. 116 "2:ret\n" 117 ".cfi_endproc\n" 118 "9:.size SyscallAsm, 9b-SyscallAsm\n" 119#elif defined(__arm__) 120 // Throughout this file, we use the same mode (ARM vs. thumb) 121 // that the C++ compiler uses. This means, when transfering control 122 // from C++ to assembly code, we do not need to switch modes (e.g. 123 // by using the "bx" instruction). It also means that our assembly 124 // code should not be invoked directly from code that lives in 125 // other compilation units, as we don't bother implementing thumb 126 // interworking. That's OK, as we don't make any of the assembly 127 // symbols public. They are all local to this file. 128 ".text\n" 129 ".align 2\n" 130 ".type SyscallAsm, %function\n" 131#if defined(__thumb__) 132 ".thumb_func\n" 133#else 134 ".arm\n" 135#endif 136 "SyscallAsm:\n" 137#if !defined(__native_client_nonsfi__) 138 // .fnstart and .fnend pseudo operations creates unwind table. 139 // It also creates a reference to the symbol __aeabi_unwind_cpp_pr0, which 140 // is not provided by PNaCl toolchain. Disable it. 141 ".fnstart\n" 142#endif 143 "@ args = 0, pretend = 0, frame = 8\n" 144 "@ frame_needed = 1, uses_anonymous_args = 0\n" 145#if defined(__thumb__) 146 ".cfi_startproc\n" 147 "push {r7, lr}\n" 148 ".save {r7, lr}\n" 149 ".cfi_offset 14, -4\n" 150 ".cfi_offset 7, -8\n" 151 ".cfi_def_cfa_offset 8\n" 152#else 153 "stmfd sp!, {fp, lr}\n" 154 "add fp, sp, #4\n" 155#endif 156 // Check if "r0" is negative. If so, do not attempt to make a 157 // system call. Instead, compute the return address that is visible 158 // to the kernel after we execute "swi 0". This address can be 159 // used as a marker that BPF code inspects. 160 "cmp r0, #0\n" 161 "bge 1f\n" 162 "adr r0, 2f\n" 163 "b 2f\n" 164 // We declared (almost) all clobbered registers to the compiler. On 165 // ARM there is no particular register pressure. So, we can go 166 // ahead and directly copy the entries from the arguments array 167 // into the appropriate CPU registers. 168 "1:ldr r5, [r6, #20]\n" 169 "ldr r4, [r6, #16]\n" 170 "ldr r3, [r6, #12]\n" 171 "ldr r2, [r6, #8]\n" 172 "ldr r1, [r6, #4]\n" 173 "mov r7, r0\n" 174 "ldr r0, [r6, #0]\n" 175 // Enter the kernel 176 "swi 0\n" 177// Restore the frame pointer. Also restore the program counter from 178// the link register; this makes us return to the caller. 179#if defined(__thumb__) 180 "2:pop {r7, pc}\n" 181 ".cfi_endproc\n" 182#else 183 "2:ldmfd sp!, {fp, pc}\n" 184#endif 185#if !defined(__native_client_nonsfi__) 186 // Do not use .fnstart and .fnend for PNaCl toolchain. See above comment, 187 // for more details. 188 ".fnend\n" 189#endif 190 "9:.size SyscallAsm, 9b-SyscallAsm\n" 191#elif defined(__mips__) 192 ".text\n" 193 ".align 4\n" 194 ".type SyscallAsm, @function\n" 195 "SyscallAsm:.ent SyscallAsm\n" 196 ".frame $sp, 40, $ra\n" 197 ".set push\n" 198 ".set noreorder\n" 199 "addiu $sp, $sp, -40\n" 200 "sw $ra, 36($sp)\n" 201 // Check if "v0" is negative. If so, do not attempt to make a 202 // system call. Instead, compute the return address that is visible 203 // to the kernel after we execute "syscall". This address can be 204 // used as a marker that BPF code inspects. 205 "bgez $v0, 1f\n" 206 " nop\n" 207 "la $v0, 2f\n" 208 "b 2f\n" 209 " nop\n" 210 // On MIPS first four arguments go to registers a0 - a3 and any 211 // argument after that goes to stack. We can go ahead and directly 212 // copy the entries from the arguments array into the appropriate 213 // CPU registers and on the stack. 214 "1:lw $a3, 28($a0)\n" 215 "lw $a2, 24($a0)\n" 216 "lw $a1, 20($a0)\n" 217 "lw $t0, 16($a0)\n" 218 "sw $a3, 28($sp)\n" 219 "sw $a2, 24($sp)\n" 220 "sw $a1, 20($sp)\n" 221 "sw $t0, 16($sp)\n" 222 "lw $a3, 12($a0)\n" 223 "lw $a2, 8($a0)\n" 224 "lw $a1, 4($a0)\n" 225 "lw $a0, 0($a0)\n" 226 // Enter the kernel 227 "syscall\n" 228 // This is our "magic" return address that the BPF filter sees. 229 // Restore the return address from the stack. 230 "2:lw $ra, 36($sp)\n" 231 "jr $ra\n" 232 " addiu $sp, $sp, 40\n" 233 ".set pop\n" 234 ".end SyscallAsm\n" 235 ".size SyscallAsm,.-SyscallAsm\n" 236#elif defined(__aarch64__) 237 ".text\n" 238 ".align 2\n" 239 ".type SyscallAsm, %function\n" 240 "SyscallAsm:\n" 241 ".cfi_startproc\n" 242 "cmp x0, #0\n" 243 "b.ge 1f\n" 244 "adr x0,2f\n" 245 "b 2f\n" 246 "1:ldr x5, [x6, #40]\n" 247 "ldr x4, [x6, #32]\n" 248 "ldr x3, [x6, #24]\n" 249 "ldr x2, [x6, #16]\n" 250 "ldr x1, [x6, #8]\n" 251 "mov x8, x0\n" 252 "ldr x0, [x6, #0]\n" 253 // Enter the kernel 254 "svc 0\n" 255 "2:ret\n" 256 ".cfi_endproc\n" 257 ".size SyscallAsm, .-SyscallAsm\n" 258#endif 259 ); // asm 260 261#if defined(__x86_64__) 262extern "C" { 263intptr_t SyscallAsm(intptr_t nr, const intptr_t args[6]); 264} 265#endif 266 267} // namespace 268 269intptr_t Syscall::InvalidCall() { 270 // Explicitly pass eight zero arguments just in case. 271 return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0); 272} 273 274intptr_t Syscall::Call(int nr, 275 intptr_t p0, 276 intptr_t p1, 277 intptr_t p2, 278 intptr_t p3, 279 intptr_t p4, 280 intptr_t p5, 281 intptr_t p6, 282 intptr_t p7) { 283 // We rely on "intptr_t" to be the exact size as a "void *". This is 284 // typically true, but just in case, we add a check. The language 285 // specification allows platforms some leeway in cases, where 286 // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect 287 // that this would only be an issue for IA64, which we are currently not 288 // planning on supporting. And it is even possible that this would work 289 // on IA64, but for lack of actual hardware, I cannot test. 290 static_assert(sizeof(void*) == sizeof(intptr_t), 291 "pointer types and intptr_t must be exactly the same size"); 292 293 // TODO(nedeljko): Enable use of more than six parameters on architectures 294 // where that makes sense. 295#if defined(__mips__) 296 const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7}; 297#else 298 DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not " 299 "added for this architecture"; 300 DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not " 301 "added for this architecture"; 302 const intptr_t args[6] = {p0, p1, p2, p3, p4, p5}; 303#endif // defined(__mips__) 304 305// Invoke our file-scope assembly code. The constraints have been picked 306// carefully to match what the rest of the assembly code expects in input, 307// output, and clobbered registers. 308#if defined(__i386__) 309 intptr_t ret = nr; 310 asm volatile( 311 "call SyscallAsm\n" 312 // N.B. These are not the calling conventions normally used by the ABI. 313 : "=a"(ret) 314 : "0"(ret), "D"(args) 315 : "cc", "esp", "memory", "ecx", "edx"); 316#elif defined(__x86_64__) 317 intptr_t ret = SyscallAsm(nr, args); 318#elif defined(__arm__) 319 intptr_t ret; 320 { 321 register intptr_t inout __asm__("r0") = nr; 322 register const intptr_t* data __asm__("r6") = args; 323 asm volatile( 324 "bl SyscallAsm\n" 325 // N.B. These are not the calling conventions normally used by the ABI. 326 : "=r"(inout) 327 : "0"(inout), "r"(data) 328 : "cc", 329 "lr", 330 "memory", 331 "r1", 332 "r2", 333 "r3", 334 "r4", 335 "r5" 336#if !defined(__thumb__) 337 // In thumb mode, we cannot use "r7" as a general purpose register, as 338 // it is our frame pointer. We have to manually manage and preserve 339 // it. 340 // In ARM mode, we have a dedicated frame pointer register and "r7" is 341 // thus available as a general purpose register. We don't preserve it, 342 // but instead mark it as clobbered. 343 , 344 "r7" 345#endif // !defined(__thumb__) 346 ); 347 ret = inout; 348 } 349#elif defined(__mips__) 350 int err_status; 351 intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status); 352 353 if (err_status) { 354 // On error, MIPS returns errno from syscall instead of -errno. 355 // The purpose of this negation is for SandboxSyscall() to behave 356 // more like it would on other architectures. 357 ret = -ret; 358 } 359#elif defined(__aarch64__) 360 intptr_t ret; 361 { 362 register intptr_t inout __asm__("x0") = nr; 363 register const intptr_t* data __asm__("x6") = args; 364 asm volatile("bl SyscallAsm\n" 365 : "=r"(inout) 366 : "0"(inout), "r"(data) 367 : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30"); 368 ret = inout; 369 } 370 371#else 372#error "Unimplemented architecture" 373#endif 374 return ret; 375} 376 377void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) { 378#if defined(__mips__) 379 // Mips ABI states that on error a3 CPU register has non zero value and if 380 // there is no error, it should be zero. 381 if (ret_val <= -1 && ret_val >= -4095) { 382 // |ret_val| followes the Syscall::Call() convention of being -errno on 383 // errors. In order to write correct value to return register this sign 384 // needs to be changed back. 385 ret_val = -ret_val; 386 SECCOMP_PARM4(ctx) = 1; 387 } else 388 SECCOMP_PARM4(ctx) = 0; 389#endif 390 SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val); 391} 392 393#if defined(__mips__) 394intptr_t Syscall::SandboxSyscallRaw(int nr, 395 const intptr_t* args, 396 intptr_t* err_ret) { 397 register intptr_t ret __asm__("v0") = nr; 398 // a3 register becomes non zero on error. 399 register intptr_t err_stat __asm__("a3") = 0; 400 { 401 register const intptr_t* data __asm__("a0") = args; 402 asm volatile( 403 "la $t9, SyscallAsm\n" 404 "jalr $t9\n" 405 " nop\n" 406 : "=r"(ret), "=r"(err_stat) 407 : "0"(ret), 408 "r"(data) 409 // a2 is in the clober list so inline assembly can not change its 410 // value. 411 : "memory", "ra", "t9", "a2"); 412 } 413 414 // Set an error status so it can be used outside of this function 415 *err_ret = err_stat; 416 417 return ret; 418} 419#endif // defined(__mips__) 420 421} // namespace sandbox 422