1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "sandbox/linux/seccomp-bpf/syscall.h" 6 7#include <asm/unistd.h> 8#include <errno.h> 9 10#include "base/basictypes.h" 11#include "base/logging.h" 12#include "sandbox/linux/seccomp-bpf/linux_seccomp.h" 13 14namespace sandbox { 15 16namespace { 17 18#if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \ 19 defined(ARCH_CPU_MIPS_FAMILY) 20// Number that's not currently used by any Linux kernel ABIs. 21const int kInvalidSyscallNumber = 0x351d3; 22#else 23#error Unrecognized architecture 24#endif 25 26asm(// We need to be able to tell the kernel exactly where we made a 27 // system call. The C++ compiler likes to sometimes clone or 28 // inline code, which would inadvertently end up duplicating 29 // the entry point. 30 // "gcc" can suppress code duplication with suitable function 31 // attributes, but "clang" doesn't have this ability. 32 // The "clang" developer mailing list suggested that the correct 33 // and portable solution is a file-scope assembly block. 34 // N.B. We do mark our code as a proper function so that backtraces 35 // work correctly. But we make absolutely no attempt to use the 36 // ABI's calling conventions for passing arguments. We will only 37 // ever be called from assembly code and thus can pick more 38 // suitable calling conventions. 39#if defined(__i386__) 40 ".text\n" 41 ".align 16, 0x90\n" 42 ".type SyscallAsm, @function\n" 43 "SyscallAsm:.cfi_startproc\n" 44 // Check if "%eax" is negative. If so, do not attempt to make a 45 // system call. Instead, compute the return address that is visible 46 // to the kernel after we execute "int $0x80". This address can be 47 // used as a marker that BPF code inspects. 48 "test %eax, %eax\n" 49 "jge 1f\n" 50 // Always, make sure that our code is position-independent, or 51 // address space randomization might not work on i386. This means, 52 // we can't use "lea", but instead have to rely on "call/pop". 53 "call 0f; .cfi_adjust_cfa_offset 4\n" 54 "0:pop %eax; .cfi_adjust_cfa_offset -4\n" 55 "addl $2f-0b, %eax\n" 56 "ret\n" 57 // Save register that we don't want to clobber. On i386, we need to 58 // save relatively aggressively, as there are a couple or registers 59 // that are used internally (e.g. %ebx for position-independent 60 // code, and %ebp for the frame pointer), and as we need to keep at 61 // least a few registers available for the register allocator. 62 "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n" 63 "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n" 64 "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n" 65 "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n" 66 // Copy entries from the array holding the arguments into the 67 // correct CPU registers. 68 "movl 0(%edi), %ebx\n" 69 "movl 4(%edi), %ecx\n" 70 "movl 8(%edi), %edx\n" 71 "movl 12(%edi), %esi\n" 72 "movl 20(%edi), %ebp\n" 73 "movl 16(%edi), %edi\n" 74 // Enter the kernel. 75 "int $0x80\n" 76 // This is our "magic" return address that the BPF filter sees. 77 "2:" 78 // Restore any clobbered registers that we didn't declare to the 79 // compiler. 80 "pop %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n" 81 "pop %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n" 82 "pop %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n" 83 "pop %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n" 84 "ret\n" 85 ".cfi_endproc\n" 86 "9:.size SyscallAsm, 9b-SyscallAsm\n" 87#elif defined(__x86_64__) 88 ".text\n" 89 ".align 16, 0x90\n" 90 ".type SyscallAsm, @function\n" 91 "SyscallAsm:.cfi_startproc\n" 92 // Check if "%rax" is negative. If so, do not attempt to make a 93 // system call. Instead, compute the return address that is visible 94 // to the kernel after we execute "syscall". This address can be 95 // used as a marker that BPF code inspects. 96 "test %rax, %rax\n" 97 "jge 1f\n" 98 // Always make sure that our code is position-independent, or the 99 // linker will throw a hissy fit on x86-64. 100 "call 0f; .cfi_adjust_cfa_offset 8\n" 101 "0:pop %rax; .cfi_adjust_cfa_offset -8\n" 102 "addq $2f-0b, %rax\n" 103 "ret\n" 104 // We declared all clobbered registers to the compiler. On x86-64, 105 // there really isn't much of a problem with register pressure. So, 106 // we can go ahead and directly copy the entries from the arguments 107 // array into the appropriate CPU registers. 108 "1:movq 0(%r12), %rdi\n" 109 "movq 8(%r12), %rsi\n" 110 "movq 16(%r12), %rdx\n" 111 "movq 24(%r12), %r10\n" 112 "movq 32(%r12), %r8\n" 113 "movq 40(%r12), %r9\n" 114 // Enter the kernel. 115 "syscall\n" 116 // This is our "magic" return address that the BPF filter sees. 117 "2:ret\n" 118 ".cfi_endproc\n" 119 "9:.size SyscallAsm, 9b-SyscallAsm\n" 120#elif defined(__arm__) 121 // Throughout this file, we use the same mode (ARM vs. thumb) 122 // that the C++ compiler uses. This means, when transfering control 123 // from C++ to assembly code, we do not need to switch modes (e.g. 124 // by using the "bx" instruction). It also means that our assembly 125 // code should not be invoked directly from code that lives in 126 // other compilation units, as we don't bother implementing thumb 127 // interworking. That's OK, as we don't make any of the assembly 128 // symbols public. They are all local to this file. 129 ".text\n" 130 ".align 2\n" 131 ".type SyscallAsm, %function\n" 132#if defined(__thumb__) 133 ".thumb_func\n" 134#else 135 ".arm\n" 136#endif 137 "SyscallAsm:.fnstart\n" 138 "@ args = 0, pretend = 0, frame = 8\n" 139 "@ frame_needed = 1, uses_anonymous_args = 0\n" 140#if defined(__thumb__) 141 ".cfi_startproc\n" 142 "push {r7, lr}\n" 143 ".cfi_offset 14, -4\n" 144 ".cfi_offset 7, -8\n" 145 "mov r7, sp\n" 146 ".cfi_def_cfa_register 7\n" 147 ".cfi_def_cfa_offset 8\n" 148#else 149 "stmfd sp!, {fp, lr}\n" 150 "add fp, sp, #4\n" 151#endif 152 // Check if "r0" is negative. If so, do not attempt to make a 153 // system call. Instead, compute the return address that is visible 154 // to the kernel after we execute "swi 0". This address can be 155 // used as a marker that BPF code inspects. 156 "cmp r0, #0\n" 157 "bge 1f\n" 158 "adr r0, 2f\n" 159 "b 2f\n" 160 // We declared (almost) all clobbered registers to the compiler. On 161 // ARM there is no particular register pressure. So, we can go 162 // ahead and directly copy the entries from the arguments array 163 // into the appropriate CPU registers. 164 "1:ldr r5, [r6, #20]\n" 165 "ldr r4, [r6, #16]\n" 166 "ldr r3, [r6, #12]\n" 167 "ldr r2, [r6, #8]\n" 168 "ldr r1, [r6, #4]\n" 169 "mov r7, r0\n" 170 "ldr r0, [r6, #0]\n" 171 // Enter the kernel 172 "swi 0\n" 173// Restore the frame pointer. Also restore the program counter from 174// the link register; this makes us return to the caller. 175#if defined(__thumb__) 176 "2:pop {r7, pc}\n" 177 ".cfi_endproc\n" 178#else 179 "2:ldmfd sp!, {fp, pc}\n" 180#endif 181 ".fnend\n" 182 "9:.size SyscallAsm, 9b-SyscallAsm\n" 183#elif defined(__mips__) 184 ".text\n" 185 ".align 4\n" 186 ".type SyscallAsm, @function\n" 187 "SyscallAsm:.ent SyscallAsm\n" 188 ".frame $sp, 40, $ra\n" 189 ".set push\n" 190 ".set noreorder\n" 191 "addiu $sp, $sp, -40\n" 192 "sw $ra, 36($sp)\n" 193 // Check if "v0" is negative. If so, do not attempt to make a 194 // system call. Instead, compute the return address that is visible 195 // to the kernel after we execute "syscall". This address can be 196 // used as a marker that BPF code inspects. 197 "bgez $v0, 1f\n" 198 " nop\n" 199 "la $v0, 2f\n" 200 "b 2f\n" 201 " nop\n" 202 // On MIPS first four arguments go to registers a0 - a3 and any 203 // argument after that goes to stack. We can go ahead and directly 204 // copy the entries from the arguments array into the appropriate 205 // CPU registers and on the stack. 206 "1:lw $a3, 28($a0)\n" 207 "lw $a2, 24($a0)\n" 208 "lw $a1, 20($a0)\n" 209 "lw $t0, 16($a0)\n" 210 "sw $a3, 28($sp)\n" 211 "sw $a2, 24($sp)\n" 212 "sw $a1, 20($sp)\n" 213 "sw $t0, 16($sp)\n" 214 "lw $a3, 12($a0)\n" 215 "lw $a2, 8($a0)\n" 216 "lw $a1, 4($a0)\n" 217 "lw $a0, 0($a0)\n" 218 // Enter the kernel 219 "syscall\n" 220 // This is our "magic" return address that the BPF filter sees. 221 // Restore the return address from the stack. 222 "2:lw $ra, 36($sp)\n" 223 "jr $ra\n" 224 " addiu $sp, $sp, 40\n" 225 ".set pop\n" 226 ".end SyscallAsm\n" 227 ".size SyscallAsm,.-SyscallAsm\n" 228#elif defined(__aarch64__) 229 ".text\n" 230 ".align 2\n" 231 ".type SyscallAsm, %function\n" 232 "SyscallAsm:\n" 233 ".cfi_startproc\n" 234 "cmp x0, #0\n" 235 "b.ge 1f\n" 236 "adr x0,2f\n" 237 "b 2f\n" 238 "1:ldr x5, [x6, #40]\n" 239 "ldr x4, [x6, #32]\n" 240 "ldr x3, [x6, #24]\n" 241 "ldr x2, [x6, #16]\n" 242 "ldr x1, [x6, #8]\n" 243 "mov x8, x0\n" 244 "ldr x0, [x6, #0]\n" 245 // Enter the kernel 246 "svc 0\n" 247 "2:ret\n" 248 ".cfi_endproc\n" 249 ".size SyscallAsm, .-SyscallAsm\n" 250#endif 251 ); // asm 252 253} // namespace 254 255intptr_t Syscall::InvalidCall() { 256 // Explicitly pass eight zero arguments just in case. 257 return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0); 258} 259 260intptr_t Syscall::Call(int nr, 261 intptr_t p0, 262 intptr_t p1, 263 intptr_t p2, 264 intptr_t p3, 265 intptr_t p4, 266 intptr_t p5, 267 intptr_t p6, 268 intptr_t p7) { 269 // We rely on "intptr_t" to be the exact size as a "void *". This is 270 // typically true, but just in case, we add a check. The language 271 // specification allows platforms some leeway in cases, where 272 // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect 273 // that this would only be an issue for IA64, which we are currently not 274 // planning on supporting. And it is even possible that this would work 275 // on IA64, but for lack of actual hardware, I cannot test. 276 COMPILE_ASSERT(sizeof(void*) == sizeof(intptr_t), 277 pointer_types_and_intptr_must_be_exactly_the_same_size); 278 279 // TODO(nedeljko): Enable use of more than six parameters on architectures 280 // where that makes sense. 281#if defined(__mips__) 282 const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7}; 283#else 284 DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not " 285 "added for this architecture"; 286 DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not " 287 "added for this architecture"; 288 const intptr_t args[6] = {p0, p1, p2, p3, p4, p5}; 289#endif // defined(__mips__) 290 291// Invoke our file-scope assembly code. The constraints have been picked 292// carefully to match what the rest of the assembly code expects in input, 293// output, and clobbered registers. 294#if defined(__i386__) 295 intptr_t ret = nr; 296 asm volatile( 297 "call SyscallAsm\n" 298 // N.B. These are not the calling conventions normally used by the ABI. 299 : "=a"(ret) 300 : "0"(ret), "D"(args) 301 : "cc", "esp", "memory", "ecx", "edx"); 302#elif defined(__x86_64__) 303 intptr_t ret = nr; 304 { 305 register const intptr_t* data __asm__("r12") = args; 306 asm volatile( 307 "lea -128(%%rsp), %%rsp\n" // Avoid red zone. 308 "call SyscallAsm\n" 309 "lea 128(%%rsp), %%rsp\n" 310 // N.B. These are not the calling conventions normally used by the ABI. 311 : "=a"(ret) 312 : "0"(ret), "r"(data) 313 : "cc", 314 "rsp", 315 "memory", 316 "rcx", 317 "rdi", 318 "rsi", 319 "rdx", 320 "r8", 321 "r9", 322 "r10", 323 "r11"); 324 } 325#elif defined(__arm__) 326 intptr_t ret; 327 { 328 register intptr_t inout __asm__("r0") = nr; 329 register const intptr_t* data __asm__("r6") = args; 330 asm volatile( 331 "bl SyscallAsm\n" 332 // N.B. These are not the calling conventions normally used by the ABI. 333 : "=r"(inout) 334 : "0"(inout), "r"(data) 335 : "cc", 336 "lr", 337 "memory", 338 "r1", 339 "r2", 340 "r3", 341 "r4", 342 "r5" 343#if !defined(__thumb__) 344 // In thumb mode, we cannot use "r7" as a general purpose register, as 345 // it is our frame pointer. We have to manually manage and preserve 346 // it. 347 // In ARM mode, we have a dedicated frame pointer register and "r7" is 348 // thus available as a general purpose register. We don't preserve it, 349 // but instead mark it as clobbered. 350 , 351 "r7" 352#endif // !defined(__thumb__) 353 ); 354 ret = inout; 355 } 356#elif defined(__mips__) 357 int err_status; 358 intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status); 359 360 if (err_status) { 361 // On error, MIPS returns errno from syscall instead of -errno. 362 // The purpose of this negation is for SandboxSyscall() to behave 363 // more like it would on other architectures. 364 ret = -ret; 365 } 366#elif defined(__aarch64__) 367 intptr_t ret; 368 { 369 register intptr_t inout __asm__("x0") = nr; 370 register const intptr_t* data __asm__("x6") = args; 371 asm volatile("bl SyscallAsm\n" 372 : "=r"(inout) 373 : "0"(inout), "r"(data) 374 : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30"); 375 ret = inout; 376 } 377 378#else 379#error "Unimplemented architecture" 380#endif 381 return ret; 382} 383 384void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) { 385#if defined(__mips__) 386 // Mips ABI states that on error a3 CPU register has non zero value and if 387 // there is no error, it should be zero. 388 if (ret_val <= -1 && ret_val >= -4095) { 389 // |ret_val| followes the Syscall::Call() convention of being -errno on 390 // errors. In order to write correct value to return register this sign 391 // needs to be changed back. 392 ret_val = -ret_val; 393 SECCOMP_PARM4(ctx) = 1; 394 } else 395 SECCOMP_PARM4(ctx) = 0; 396#endif 397 SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val); 398} 399 400#if defined(__mips__) 401intptr_t Syscall::SandboxSyscallRaw(int nr, 402 const intptr_t* args, 403 intptr_t* err_ret) { 404 register intptr_t ret __asm__("v0") = nr; 405 // a3 register becomes non zero on error. 406 register intptr_t err_stat __asm__("a3") = 0; 407 { 408 register const intptr_t* data __asm__("a0") = args; 409 asm volatile( 410 "la $t9, SyscallAsm\n" 411 "jalr $t9\n" 412 " nop\n" 413 : "=r"(ret), "=r"(err_stat) 414 : "0"(ret), 415 "r"(data) 416 // a2 is in the clober list so inline assembly can not change its 417 // value. 418 : "memory", "ra", "t9", "a2"); 419 } 420 421 // Set an error status so it can be used outside of this function 422 *err_ret = err_stat; 423 424 return ret; 425} 426#endif // defined(__mips__) 427 428} // namespace sandbox 429