1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/nacl/loader/nonsfi/nonsfi_sandbox.h" 6 7#include <errno.h> 8#include <fcntl.h> 9#include <linux/futex.h> 10#include <linux/net.h> 11#include <sys/mman.h> 12#include <sys/prctl.h> 13#include <sys/ptrace.h> 14#include <sys/socket.h> 15#include <sys/syscall.h> 16#include <sys/time.h> 17 18#include "base/basictypes.h" 19#include "base/logging.h" 20#include "base/time/time.h" 21#include "build/build_config.h" 22#include "content/public/common/sandbox_init.h" 23#include "sandbox/linux/bpf_dsl/bpf_dsl.h" 24#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h" 25#include "sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h" 26#include "sandbox/linux/services/linux_syscalls.h" 27 28#if defined(__arm__) && !defined(MAP_STACK) 29// Chrome OS Daisy (ARM) build environment has old headers. 30#define MAP_STACK 0x20000 31#endif 32 33#define CASES SANDBOX_BPF_DSL_CASES 34 35using sandbox::CrashSIGSYS; 36using sandbox::CrashSIGSYSClone; 37using sandbox::CrashSIGSYSFutex; 38using sandbox::CrashSIGSYSPrctl; 39using sandbox::bpf_dsl::Allow; 40using sandbox::bpf_dsl::Arg; 41using sandbox::bpf_dsl::BoolExpr; 42using sandbox::bpf_dsl::Error; 43using sandbox::bpf_dsl::If; 44using sandbox::bpf_dsl::ResultExpr; 45 46namespace nacl { 47namespace nonsfi { 48namespace { 49 50ResultExpr RestrictFcntlCommands() { 51 const Arg<int> cmd(1); 52 const Arg<long> long_arg(2); 53 54 // We allow following cases: 55 // 1. F_SETFD + FD_CLOEXEC: libevent's epoll_init uses this. 56 // 2. F_GETFL: Used by SetNonBlocking in 57 // message_pump_libevent.cc and Channel::ChannelImpl::CreatePipe 58 // in ipc_channel_posix.cc. Note that the latter does not work 59 // with EPERM. 60 // 3. F_SETFL: Used by evutil_make_socket_nonblocking in 61 // libevent and SetNonBlocking. As the latter mix O_NONBLOCK to 62 // the return value of F_GETFL, so we need to allow O_ACCMODE in 63 // addition to O_NONBLOCK. 64 const uint64_t kAllowedMask = O_ACCMODE | O_NONBLOCK; 65 return If((cmd == F_SETFD && long_arg == FD_CLOEXEC) || cmd == F_GETFL || 66 (cmd == F_SETFL && (long_arg & ~kAllowedMask) == 0), 67 Allow()).Else(CrashSIGSYS()); 68} 69 70ResultExpr RestrictClone() { 71 // We allow clone only for new thread creation. 72 const Arg<int> flags(0); 73 return If(flags == (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | 74 CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS | 75 CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID), 76 Allow()).Else(CrashSIGSYSClone()); 77} 78 79ResultExpr RestrictFutexOperation() { 80 // TODO(hamaji): Allow only FUTEX_PRIVATE_FLAG futexes. 81 const uint64_t kAllowedFutexFlags = FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME; 82 const Arg<int> op(1); 83 return Switch(op & ~kAllowedFutexFlags) 84 .CASES((FUTEX_WAIT, 85 FUTEX_WAKE, 86 FUTEX_REQUEUE, 87 FUTEX_CMP_REQUEUE, 88 FUTEX_WAKE_OP, 89 FUTEX_WAIT_BITSET, 90 FUTEX_WAKE_BITSET), 91 Allow()) 92 .Default(CrashSIGSYSFutex()); 93} 94 95ResultExpr RestrictPrctl() { 96 // base::PlatformThread::SetName() uses PR_SET_NAME so we return 97 // EPERM for it. Otherwise, we will raise SIGSYS. 98 const Arg<int> option(0); 99 return If(option == PR_SET_NAME, Error(EPERM)).Else(CrashSIGSYSPrctl()); 100} 101 102#if defined(__i386__) 103ResultExpr RestrictSocketcall() { 104 // We only allow socketpair, sendmsg, and recvmsg. 105 const Arg<int> call(0); 106 return If(call == SYS_SOCKETPAIR || call == SYS_SHUTDOWN || 107 call == SYS_SENDMSG || call == SYS_RECVMSG, 108 Allow()).Else(CrashSIGSYS()); 109} 110#endif 111 112ResultExpr RestrictMprotect() { 113 // TODO(jln, keescook, drewry): Limit the use of mprotect by adding 114 // some features to linux kernel. 115 const uint64_t kAllowedMask = PROT_READ | PROT_WRITE | PROT_EXEC; 116 const Arg<int> prot(2); 117 return If((prot & ~kAllowedMask) == 0, Allow()).Else(CrashSIGSYS()); 118} 119 120ResultExpr RestrictMmap() { 121 const uint64_t kAllowedFlagMask = 122 MAP_SHARED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK | MAP_FIXED; 123 // When PROT_EXEC is specified, IRT mmap of Non-SFI NaCl helper 124 // calls mmap without PROT_EXEC and then adds PROT_EXEC by mprotect, 125 // so we do not need to allow PROT_EXEC in mmap. 126 const uint64_t kAllowedProtMask = PROT_READ | PROT_WRITE; 127 const Arg<int> prot(2), flags(3); 128 return If((prot & ~kAllowedProtMask) == 0 && (flags & ~kAllowedFlagMask) == 0, 129 Allow()).Else(CrashSIGSYS()); 130} 131 132#if defined(__x86_64__) || defined(__arm__) 133ResultExpr RestrictSocketpair() { 134 // Only allow AF_UNIX, PF_UNIX. Crash if anything else is seen. 135 COMPILE_ASSERT(AF_UNIX == PF_UNIX, af_unix_pf_unix_different); 136 const Arg<int> domain(0); 137 return If(domain == AF_UNIX, Allow()).Else(CrashSIGSYS()); 138} 139#endif 140 141bool IsGracefullyDenied(int sysno) { 142 switch (sysno) { 143 // libevent tries this first and then falls back to poll if 144 // epoll_create fails. 145 case __NR_epoll_create: 146 // third_party/libevent uses them, but we can just return -1 from 147 // them as it is just checking getuid() != geteuid() and 148 // getgid() != getegid() 149#if defined(__i386__) || defined(__arm__) 150 case __NR_getegid32: 151 case __NR_geteuid32: 152 case __NR_getgid32: 153 case __NR_getuid32: 154#endif 155 case __NR_getegid: 156 case __NR_geteuid: 157 case __NR_getgid: 158 case __NR_getuid: 159 // tcmalloc calls madvise in TCMalloc_SystemRelease. 160 case __NR_madvise: 161 // EPERM instead of SIGSYS as glibc tries to open files in /proc. 162 // openat via opendir via get_nprocs_conf and open via get_nprocs. 163 // TODO(hamaji): Remove this when we switch to newlib. 164 case __NR_open: 165 case __NR_openat: 166 // For RunSandboxSanityChecks(). 167 case __NR_ptrace: 168 // glibc uses this for its pthread implementation. If we return 169 // EPERM for this, glibc will stop using this. 170 // TODO(hamaji): newlib does not use this. Make this SIGTRAP once 171 // we have switched to newlib. 172 case __NR_set_robust_list: 173 // This is obsolete in ARM EABI, but x86 glibc indirectly calls 174 // this in sysconf. 175#if defined(__i386__) || defined(__x86_64__) 176 case __NR_time: 177#endif 178 return true; 179 180 default: 181 return false; 182 } 183} 184 185void RunSandboxSanityChecks() { 186 errno = 0; 187 // Make a ptrace request with an invalid PID. 188 long ptrace_ret = ptrace(PTRACE_PEEKUSER, -1 /* pid */, NULL, NULL); 189 CHECK_EQ(-1, ptrace_ret); 190 // Without the sandbox on, this ptrace call would ESRCH instead. 191 CHECK_EQ(EPERM, errno); 192} 193 194} // namespace 195 196ResultExpr NaClNonSfiBPFSandboxPolicy::EvaluateSyscall(int sysno) const { 197 switch (sysno) { 198 // Allowed syscalls. 199#if defined(__i386__) || defined(__arm__) 200 case __NR__llseek: 201#elif defined(__x86_64__) 202 case __NR_lseek: 203#endif 204 case __NR_close: 205 case __NR_dup: 206 case __NR_dup2: 207 case __NR_exit: 208 case __NR_exit_group: 209#if defined(__i386__) || defined(__arm__) 210 case __NR_fstat64: 211#elif defined(__x86_64__) 212 case __NR_fstat: 213#endif 214 // TODO(hamaji): Remove the need of gettid. Currently, this is 215 // called from PlatformThread::CurrentId(). 216 case __NR_gettid: 217 case __NR_gettimeofday: 218 case __NR_munmap: 219 case __NR_nanosleep: 220 // TODO(hamaji): Remove the need of pipe. Currently, this is 221 // called from base::MessagePumpLibevent::Init(). 222 case __NR_pipe: 223 case __NR_poll: 224 case __NR_pread64: 225 case __NR_pwrite64: 226 case __NR_read: 227 case __NR_restart_syscall: 228 case __NR_sched_yield: 229 // __NR_times needed as clock() is called by CommandBufferHelper, which is 230 // used by NaCl applications that use Pepper's 3D interfaces. 231 // See crbug.com/264856 for details. 232 case __NR_times: 233 case __NR_write: 234#if defined(__arm__) 235 case __ARM_NR_cacheflush: 236#endif 237 return Allow(); 238 239 case __NR_clock_getres: 240 case __NR_clock_gettime: 241 return sandbox::RestrictClockID(); 242 243 case __NR_clone: 244 return RestrictClone(); 245 246#if defined(__x86_64__) 247 case __NR_fcntl: 248#endif 249#if defined(__i386__) || defined(__arm__) 250 case __NR_fcntl64: 251#endif 252 return RestrictFcntlCommands(); 253 254 case __NR_futex: 255 return RestrictFutexOperation(); 256 257#if defined(__x86_64__) 258 case __NR_mmap: 259#endif 260#if defined(__i386__) || defined(__arm__) 261 case __NR_mmap2: 262#endif 263 return RestrictMmap(); 264 case __NR_mprotect: 265 return RestrictMprotect(); 266 267 case __NR_prctl: 268 return RestrictPrctl(); 269 270#if defined(__i386__) 271 case __NR_socketcall: 272 return RestrictSocketcall(); 273#endif 274#if defined(__x86_64__) || defined(__arm__) 275 case __NR_recvmsg: 276 case __NR_sendmsg: 277 case __NR_shutdown: 278 return Allow(); 279 case __NR_socketpair: 280 return RestrictSocketpair(); 281#endif 282 283 case __NR_brk: 284 // The behavior of brk on Linux is different from other system 285 // calls. It does not return errno but the current break on 286 // failure. glibc thinks brk failed if the return value of brk 287 // is less than the requested address (i.e., brk(addr) < addr). 288 // So, glibc thinks brk succeeded if we return -EPERM and we 289 // need to return zero instead. 290 return Error(0); 291 292 default: 293 if (IsGracefullyDenied(sysno)) 294 return Error(EPERM); 295 return CrashSIGSYS(); 296 } 297} 298 299ResultExpr NaClNonSfiBPFSandboxPolicy::InvalidSyscall() const { 300 return CrashSIGSYS(); 301} 302 303bool InitializeBPFSandbox() { 304 bool sandbox_is_initialized = content::InitializeSandbox( 305 scoped_ptr<sandbox::bpf_dsl::SandboxBPFDSLPolicy>( 306 new nacl::nonsfi::NaClNonSfiBPFSandboxPolicy())); 307 if (!sandbox_is_initialized) 308 return false; 309 RunSandboxSanityChecks(); 310 return true; 311} 312 313} // namespace nonsfi 314} // namespace nacl 315