1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/nacl/loader/nonsfi/nonsfi_sandbox.h"
6
7#include <errno.h>
8#include <fcntl.h>
9#include <linux/futex.h>
10#include <linux/net.h>
11#include <sys/mman.h>
12#include <sys/prctl.h>
13#include <sys/ptrace.h>
14#include <sys/socket.h>
15#include <sys/syscall.h>
16#include <sys/time.h>
17
18#include "base/basictypes.h"
19#include "base/logging.h"
20#include "base/time/time.h"
21#include "build/build_config.h"
22#include "content/public/common/sandbox_init.h"
23#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
24#include "sandbox/linux/seccomp-bpf-helpers/sigsys_handlers.h"
25#include "sandbox/linux/seccomp-bpf-helpers/syscall_parameters_restrictions.h"
26#include "sandbox/linux/services/linux_syscalls.h"
27
28#if defined(__arm__) && !defined(MAP_STACK)
29// Chrome OS Daisy (ARM) build environment has old headers.
30#define MAP_STACK 0x20000
31#endif
32
33#define CASES SANDBOX_BPF_DSL_CASES
34
35using sandbox::CrashSIGSYS;
36using sandbox::CrashSIGSYSClone;
37using sandbox::CrashSIGSYSFutex;
38using sandbox::CrashSIGSYSPrctl;
39using sandbox::bpf_dsl::Allow;
40using sandbox::bpf_dsl::Arg;
41using sandbox::bpf_dsl::BoolExpr;
42using sandbox::bpf_dsl::Error;
43using sandbox::bpf_dsl::If;
44using sandbox::bpf_dsl::ResultExpr;
45
46namespace nacl {
47namespace nonsfi {
48namespace {
49
50ResultExpr RestrictFcntlCommands() {
51  const Arg<int> cmd(1);
52  const Arg<long> long_arg(2);
53
54  // We allow following cases:
55  // 1. F_SETFD + FD_CLOEXEC: libevent's epoll_init uses this.
56  // 2. F_GETFL: Used by SetNonBlocking in
57  // message_pump_libevent.cc and Channel::ChannelImpl::CreatePipe
58  // in ipc_channel_posix.cc. Note that the latter does not work
59  // with EPERM.
60  // 3. F_SETFL: Used by evutil_make_socket_nonblocking in
61  // libevent and SetNonBlocking. As the latter mix O_NONBLOCK to
62  // the return value of F_GETFL, so we need to allow O_ACCMODE in
63  // addition to O_NONBLOCK.
64  const uint64_t kAllowedMask = O_ACCMODE | O_NONBLOCK;
65  return If((cmd == F_SETFD && long_arg == FD_CLOEXEC) || cmd == F_GETFL ||
66                (cmd == F_SETFL && (long_arg & ~kAllowedMask) == 0),
67            Allow()).Else(CrashSIGSYS());
68}
69
70ResultExpr RestrictClone() {
71  // We allow clone only for new thread creation.
72  const Arg<int> flags(0);
73  return If(flags == (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
74                      CLONE_THREAD | CLONE_SYSVSEM | CLONE_SETTLS |
75                      CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID),
76            Allow()).Else(CrashSIGSYSClone());
77}
78
79ResultExpr RestrictFutexOperation() {
80  // TODO(hamaji): Allow only FUTEX_PRIVATE_FLAG futexes.
81  const uint64_t kAllowedFutexFlags = FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME;
82  const Arg<int> op(1);
83  return Switch(op & ~kAllowedFutexFlags)
84      .CASES((FUTEX_WAIT,
85              FUTEX_WAKE,
86              FUTEX_REQUEUE,
87              FUTEX_CMP_REQUEUE,
88              FUTEX_WAKE_OP,
89              FUTEX_WAIT_BITSET,
90              FUTEX_WAKE_BITSET),
91             Allow())
92      .Default(CrashSIGSYSFutex());
93}
94
95ResultExpr RestrictPrctl() {
96  // base::PlatformThread::SetName() uses PR_SET_NAME so we return
97  // EPERM for it. Otherwise, we will raise SIGSYS.
98  const Arg<int> option(0);
99  return If(option == PR_SET_NAME, Error(EPERM)).Else(CrashSIGSYSPrctl());
100}
101
102#if defined(__i386__)
103ResultExpr RestrictSocketcall() {
104  // We only allow socketpair, sendmsg, and recvmsg.
105  const Arg<int> call(0);
106  return If(call == SYS_SOCKETPAIR || call == SYS_SHUTDOWN ||
107                call == SYS_SENDMSG || call == SYS_RECVMSG,
108            Allow()).Else(CrashSIGSYS());
109}
110#endif
111
112ResultExpr RestrictMprotect() {
113  // TODO(jln, keescook, drewry): Limit the use of mprotect by adding
114  // some features to linux kernel.
115  const uint64_t kAllowedMask = PROT_READ | PROT_WRITE | PROT_EXEC;
116  const Arg<int> prot(2);
117  return If((prot & ~kAllowedMask) == 0, Allow()).Else(CrashSIGSYS());
118}
119
120ResultExpr RestrictMmap() {
121  const uint64_t kAllowedFlagMask =
122      MAP_SHARED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK | MAP_FIXED;
123  // When PROT_EXEC is specified, IRT mmap of Non-SFI NaCl helper
124  // calls mmap without PROT_EXEC and then adds PROT_EXEC by mprotect,
125  // so we do not need to allow PROT_EXEC in mmap.
126  const uint64_t kAllowedProtMask = PROT_READ | PROT_WRITE;
127  const Arg<int> prot(2), flags(3);
128  return If((prot & ~kAllowedProtMask) == 0 && (flags & ~kAllowedFlagMask) == 0,
129            Allow()).Else(CrashSIGSYS());
130}
131
132#if defined(__x86_64__) || defined(__arm__)
133ResultExpr RestrictSocketpair() {
134  // Only allow AF_UNIX, PF_UNIX. Crash if anything else is seen.
135  COMPILE_ASSERT(AF_UNIX == PF_UNIX, af_unix_pf_unix_different);
136  const Arg<int> domain(0);
137  return If(domain == AF_UNIX, Allow()).Else(CrashSIGSYS());
138}
139#endif
140
141bool IsGracefullyDenied(int sysno) {
142  switch (sysno) {
143    // libevent tries this first and then falls back to poll if
144    // epoll_create fails.
145    case __NR_epoll_create:
146    // third_party/libevent uses them, but we can just return -1 from
147    // them as it is just checking getuid() != geteuid() and
148    // getgid() != getegid()
149#if defined(__i386__) || defined(__arm__)
150    case __NR_getegid32:
151    case __NR_geteuid32:
152    case __NR_getgid32:
153    case __NR_getuid32:
154#endif
155    case __NR_getegid:
156    case __NR_geteuid:
157    case __NR_getgid:
158    case __NR_getuid:
159    // tcmalloc calls madvise in TCMalloc_SystemRelease.
160    case __NR_madvise:
161    // EPERM instead of SIGSYS as glibc tries to open files in /proc.
162    // openat via opendir via get_nprocs_conf and open via get_nprocs.
163    // TODO(hamaji): Remove this when we switch to newlib.
164    case __NR_open:
165    case __NR_openat:
166    // For RunSandboxSanityChecks().
167    case __NR_ptrace:
168    // glibc uses this for its pthread implementation. If we return
169    // EPERM for this, glibc will stop using this.
170    // TODO(hamaji): newlib does not use this. Make this SIGTRAP once
171    // we have switched to newlib.
172    case __NR_set_robust_list:
173    // This is obsolete in ARM EABI, but x86 glibc indirectly calls
174    // this in sysconf.
175#if defined(__i386__) || defined(__x86_64__)
176    case __NR_time:
177#endif
178      return true;
179
180    default:
181      return false;
182  }
183}
184
185void RunSandboxSanityChecks() {
186  errno = 0;
187  // Make a ptrace request with an invalid PID.
188  long ptrace_ret = ptrace(PTRACE_PEEKUSER, -1 /* pid */, NULL, NULL);
189  CHECK_EQ(-1, ptrace_ret);
190  // Without the sandbox on, this ptrace call would ESRCH instead.
191  CHECK_EQ(EPERM, errno);
192}
193
194}  // namespace
195
196ResultExpr NaClNonSfiBPFSandboxPolicy::EvaluateSyscall(int sysno) const {
197  switch (sysno) {
198    // Allowed syscalls.
199#if defined(__i386__) || defined(__arm__)
200    case __NR__llseek:
201#elif defined(__x86_64__)
202    case __NR_lseek:
203#endif
204    case __NR_close:
205    case __NR_dup:
206    case __NR_dup2:
207    case __NR_exit:
208    case __NR_exit_group:
209#if defined(__i386__) || defined(__arm__)
210    case __NR_fstat64:
211#elif defined(__x86_64__)
212    case __NR_fstat:
213#endif
214    // TODO(hamaji): Remove the need of gettid. Currently, this is
215    // called from PlatformThread::CurrentId().
216    case __NR_gettid:
217    case __NR_gettimeofday:
218    case __NR_munmap:
219    case __NR_nanosleep:
220    // TODO(hamaji): Remove the need of pipe. Currently, this is
221    // called from base::MessagePumpLibevent::Init().
222    case __NR_pipe:
223    case __NR_poll:
224    case __NR_pread64:
225    case __NR_pwrite64:
226    case __NR_read:
227    case __NR_restart_syscall:
228    case __NR_sched_yield:
229    // __NR_times needed as clock() is called by CommandBufferHelper, which is
230    // used by NaCl applications that use Pepper's 3D interfaces.
231    // See crbug.com/264856 for details.
232    case __NR_times:
233    case __NR_write:
234#if defined(__arm__)
235    case __ARM_NR_cacheflush:
236#endif
237      return Allow();
238
239    case __NR_clock_getres:
240    case __NR_clock_gettime:
241      return sandbox::RestrictClockID();
242
243    case __NR_clone:
244      return RestrictClone();
245
246#if defined(__x86_64__)
247    case __NR_fcntl:
248#endif
249#if defined(__i386__) || defined(__arm__)
250    case __NR_fcntl64:
251#endif
252      return RestrictFcntlCommands();
253
254    case __NR_futex:
255      return RestrictFutexOperation();
256
257#if defined(__x86_64__)
258    case __NR_mmap:
259#endif
260#if defined(__i386__) || defined(__arm__)
261    case __NR_mmap2:
262#endif
263      return RestrictMmap();
264    case __NR_mprotect:
265      return RestrictMprotect();
266
267    case __NR_prctl:
268      return RestrictPrctl();
269
270#if defined(__i386__)
271    case __NR_socketcall:
272      return RestrictSocketcall();
273#endif
274#if defined(__x86_64__) || defined(__arm__)
275    case __NR_recvmsg:
276    case __NR_sendmsg:
277    case __NR_shutdown:
278      return Allow();
279    case __NR_socketpair:
280      return RestrictSocketpair();
281#endif
282
283    case __NR_brk:
284      // The behavior of brk on Linux is different from other system
285      // calls. It does not return errno but the current break on
286      // failure. glibc thinks brk failed if the return value of brk
287      // is less than the requested address (i.e., brk(addr) < addr).
288      // So, glibc thinks brk succeeded if we return -EPERM and we
289      // need to return zero instead.
290      return Error(0);
291
292    default:
293      if (IsGracefullyDenied(sysno))
294        return Error(EPERM);
295      return CrashSIGSYS();
296  }
297}
298
299ResultExpr NaClNonSfiBPFSandboxPolicy::InvalidSyscall() const {
300  return CrashSIGSYS();
301}
302
303bool InitializeBPFSandbox() {
304  bool sandbox_is_initialized = content::InitializeSandbox(
305      scoped_ptr<sandbox::bpf_dsl::SandboxBPFDSLPolicy>(
306          new nacl::nonsfi::NaClNonSfiBPFSandboxPolicy()));
307  if (!sandbox_is_initialized)
308    return false;
309  RunSandboxSanityChecks();
310  return true;
311}
312
313}  // namespace nonsfi
314}  // namespace nacl
315