1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
6
7#include <errno.h>
8#include <stdint.h>
9#include <sys/prctl.h>
10#include <sys/types.h>
11#include <unistd.h>
12
13#include "base/compiler_specific.h"
14#include "base/files/scoped_file.h"
15#include "base/logging.h"
16#include "base/macros.h"
17#include "base/memory/scoped_ptr.h"
18#include "base/posix/eintr_wrapper.h"
19#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
20#include "sandbox/linux/bpf_dsl/codegen.h"
21#include "sandbox/linux/bpf_dsl/policy.h"
22#include "sandbox/linux/bpf_dsl/policy_compiler.h"
23#include "sandbox/linux/bpf_dsl/seccomp_macros.h"
24#include "sandbox/linux/bpf_dsl/syscall_set.h"
25#include "sandbox/linux/seccomp-bpf/die.h"
26#include "sandbox/linux/seccomp-bpf/syscall.h"
27#include "sandbox/linux/seccomp-bpf/trap.h"
28#include "sandbox/linux/services/proc_util.h"
29#include "sandbox/linux/services/syscall_wrappers.h"
30#include "sandbox/linux/services/thread_helpers.h"
31#include "sandbox/linux/system_headers/linux_filter.h"
32#include "sandbox/linux/system_headers/linux_seccomp.h"
33#include "sandbox/linux/system_headers/linux_syscalls.h"
34#include "third_party/valgrind/valgrind.h"
35
36namespace sandbox {
37
38namespace {
39
40bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; }
41
42bool IsSingleThreaded(int proc_fd) {
43  return ThreadHelpers::IsSingleThreaded(proc_fd);
44}
45
46// Check if the kernel supports seccomp-filter (a.k.a. seccomp mode 2) via
47// prctl().
48bool KernelSupportsSeccompBPF() {
49  errno = 0;
50  const int rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, nullptr);
51
52  if (rv == -1 && EFAULT == errno) {
53    return true;
54  }
55  return false;
56}
57
58// LG introduced a buggy syscall, sys_set_media_ext, with the same number as
59// seccomp. Return true if the current kernel has this buggy syscall.
60//
61// We want this to work with upcoming versions of seccomp, so we pass bogus
62// flags that are unlikely to ever be used by the kernel. A normal kernel would
63// return -EINVAL, but a buggy LG kernel would return 1.
64bool KernelHasLGBug() {
65#if defined(OS_ANDROID)
66  // sys_set_media will see this as NULL, which should be a safe (non-crashing)
67  // way to invoke it. A genuine seccomp syscall will see it as
68  // SECCOMP_SET_MODE_STRICT.
69  const unsigned int operation = 0;
70  // Chosen by fair dice roll. Guaranteed to be random.
71  const unsigned int flags = 0xf7a46a5c;
72  const int rv = sys_seccomp(operation, flags, nullptr);
73  // A genuine kernel would return -EINVAL (which would set rv to -1 and errno
74  // to EINVAL), or at the very least return some kind of error (which would
75  // set rv to -1). Any other behavior indicates that whatever code received
76  // our syscall was not the real seccomp.
77  if (rv != -1) {
78    return true;
79  }
80#endif  // defined(OS_ANDROID)
81
82  return false;
83}
84
85// Check if the kernel supports seccomp-filter via the seccomp system call
86// and the TSYNC feature to enable seccomp on all threads.
87bool KernelSupportsSeccompTsync() {
88  if (KernelHasLGBug()) {
89    return false;
90  }
91
92  errno = 0;
93  const int rv =
94      sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, nullptr);
95
96  if (rv == -1 && errno == EFAULT) {
97    return true;
98  } else {
99    // TODO(jln): turn these into DCHECK after 417888 is considered fixed.
100    CHECK_EQ(-1, rv);
101    CHECK(ENOSYS == errno || EINVAL == errno);
102    return false;
103  }
104}
105
106uint64_t EscapePC() {
107  intptr_t rv = Syscall::Call(-1);
108  if (rv == -1 && errno == ENOSYS) {
109    return 0;
110  }
111  return static_cast<uint64_t>(static_cast<uintptr_t>(rv));
112}
113
114intptr_t SandboxPanicTrap(const struct arch_seccomp_data&, void* aux) {
115  SANDBOX_DIE(static_cast<const char*>(aux));
116}
117
118bpf_dsl::ResultExpr SandboxPanic(const char* error) {
119  return bpf_dsl::Trap(SandboxPanicTrap, error);
120}
121
122}  // namespace
123
124SandboxBPF::SandboxBPF(bpf_dsl::Policy* policy)
125    : proc_fd_(), sandbox_has_started_(false), policy_(policy) {
126}
127
128SandboxBPF::~SandboxBPF() {
129}
130
131// static
132bool SandboxBPF::SupportsSeccompSandbox(SeccompLevel level) {
133  // Never pretend to support seccomp with Valgrind, as it
134  // throws the tool off.
135  if (IsRunningOnValgrind()) {
136    return false;
137  }
138
139  switch (level) {
140    case SeccompLevel::SINGLE_THREADED:
141      return KernelSupportsSeccompBPF();
142    case SeccompLevel::MULTI_THREADED:
143      return KernelSupportsSeccompTsync();
144  }
145  NOTREACHED();
146  return false;
147}
148
149bool SandboxBPF::StartSandbox(SeccompLevel seccomp_level) {
150  DCHECK(policy_);
151  CHECK(seccomp_level == SeccompLevel::SINGLE_THREADED ||
152        seccomp_level == SeccompLevel::MULTI_THREADED);
153
154  if (sandbox_has_started_) {
155    SANDBOX_DIE(
156        "Cannot repeatedly start sandbox. Create a separate Sandbox "
157        "object instead.");
158    return false;
159  }
160
161  if (!proc_fd_.is_valid()) {
162    SetProcFd(ProcUtil::OpenProc());
163  }
164
165  const bool supports_tsync = KernelSupportsSeccompTsync();
166
167  if (seccomp_level == SeccompLevel::SINGLE_THREADED) {
168    // Wait for /proc/self/task/ to update if needed and assert the
169    // process is single threaded.
170    ThreadHelpers::AssertSingleThreaded(proc_fd_.get());
171  } else if (seccomp_level == SeccompLevel::MULTI_THREADED) {
172    if (IsSingleThreaded(proc_fd_.get())) {
173      SANDBOX_DIE("Cannot start sandbox; "
174                  "process may be single-threaded when reported as not");
175      return false;
176    }
177    if (!supports_tsync) {
178      SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "
179                  "filters for a threadgroup");
180      return false;
181    }
182  }
183
184  // We no longer need access to any files in /proc. We want to do this
185  // before installing the filters, just in case that our policy denies
186  // close().
187  if (proc_fd_.is_valid()) {
188    proc_fd_.reset();
189  }
190
191  // Install the filters.
192  InstallFilter(supports_tsync ||
193                seccomp_level == SeccompLevel::MULTI_THREADED);
194
195  return true;
196}
197
198void SandboxBPF::SetProcFd(base::ScopedFD proc_fd) {
199  proc_fd_.swap(proc_fd);
200}
201
202// static
203bool SandboxBPF::IsValidSyscallNumber(int sysnum) {
204  return SyscallSet::IsValid(sysnum);
205}
206
207// static
208bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {
209  return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);
210}
211
212// static
213intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {
214  return Syscall::Call(
215      args.nr, static_cast<intptr_t>(args.args[0]),
216      static_cast<intptr_t>(args.args[1]), static_cast<intptr_t>(args.args[2]),
217      static_cast<intptr_t>(args.args[3]), static_cast<intptr_t>(args.args[4]),
218      static_cast<intptr_t>(args.args[5]));
219}
220
221CodeGen::Program SandboxBPF::AssembleFilter() {
222  DCHECK(policy_);
223
224  bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());
225  if (Trap::SandboxDebuggingAllowedByUser()) {
226    compiler.DangerousSetEscapePC(EscapePC());
227  }
228  compiler.SetPanicFunc(SandboxPanic);
229  return compiler.Compile();
230}
231
232void SandboxBPF::InstallFilter(bool must_sync_threads) {
233  // We want to be very careful in not imposing any requirements on the
234  // policies that are set with SetSandboxPolicy(). This means, as soon as
235  // the sandbox is active, we shouldn't be relying on libraries that could
236  // be making system calls. This, for example, means we should avoid
237  // using the heap and we should avoid using STL functions.
238  // Temporarily copy the contents of the "program" vector into a
239  // stack-allocated array; and then explicitly destroy that object.
240  // This makes sure we don't ex- or implicitly call new/delete after we
241  // installed the BPF filter program in the kernel. Depending on the
242  // system memory allocator that is in effect, these operators can result
243  // in system calls to things like munmap() or brk().
244  CodeGen::Program program = AssembleFilter();
245
246  struct sock_filter bpf[program.size()];
247  const struct sock_fprog prog = {static_cast<unsigned short>(program.size()),
248                                  bpf};
249  memcpy(bpf, &program[0], sizeof(bpf));
250  CodeGen::Program().swap(program);  // vector swap trick
251
252  // Make an attempt to release memory that is no longer needed here, rather
253  // than in the destructor. Try to avoid as much as possible to presume of
254  // what will be possible to do in the new (sandboxed) execution environment.
255  policy_.reset();
256
257  if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
258    SANDBOX_DIE("Kernel refuses to enable no-new-privs");
259  }
260
261  // Install BPF filter program. If the thread state indicates multi-threading
262  // support, then the kernel hass the seccomp system call. Otherwise, fall
263  // back on prctl, which requires the process to be single-threaded.
264  if (must_sync_threads) {
265    int rv =
266        sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &prog);
267    if (rv) {
268      SANDBOX_DIE(
269          "Kernel refuses to turn on and synchronize threads for BPF filters");
270    }
271  } else {
272    if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
273      SANDBOX_DIE("Kernel refuses to turn on BPF filters");
274    }
275  }
276
277  sandbox_has_started_ = true;
278}
279
280}  // namespace sandbox
281