1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "sandbox/linux/bpf_dsl/policy_compiler.h"
6
7#include <errno.h>
8#include <stddef.h>
9#include <stdint.h>
10#include <sys/syscall.h>
11
12#include <limits>
13
14#include "base/logging.h"
15#include "base/macros.h"
16#include "sandbox/linux/bpf_dsl/bpf_dsl.h"
17#include "sandbox/linux/bpf_dsl/bpf_dsl_impl.h"
18#include "sandbox/linux/bpf_dsl/codegen.h"
19#include "sandbox/linux/bpf_dsl/policy.h"
20#include "sandbox/linux/bpf_dsl/seccomp_macros.h"
21#include "sandbox/linux/bpf_dsl/syscall_set.h"
22#include "sandbox/linux/system_headers/linux_filter.h"
23#include "sandbox/linux/system_headers/linux_seccomp.h"
24#include "sandbox/linux/system_headers/linux_syscalls.h"
25
26namespace sandbox {
27namespace bpf_dsl {
28
29namespace {
30
31#if defined(__i386__) || defined(__x86_64__)
32const bool kIsIntel = true;
33#else
34const bool kIsIntel = false;
35#endif
36#if defined(__x86_64__) && defined(__ILP32__)
37const bool kIsX32 = true;
38#else
39const bool kIsX32 = false;
40#endif
41
42const int kSyscallsRequiredForUnsafeTraps[] = {
43    __NR_rt_sigprocmask,
44    __NR_rt_sigreturn,
45#if defined(__NR_sigprocmask)
46    __NR_sigprocmask,
47#endif
48#if defined(__NR_sigreturn)
49    __NR_sigreturn,
50#endif
51};
52
53bool HasExactlyOneBit(uint64_t x) {
54  // Common trick; e.g., see http://stackoverflow.com/a/108329.
55  return x != 0 && (x & (x - 1)) == 0;
56}
57
58ResultExpr DefaultPanic(const char* error) {
59  return Kill();
60}
61
62// A Trap() handler that returns an "errno" value. The value is encoded
63// in the "aux" parameter.
64intptr_t ReturnErrno(const struct arch_seccomp_data&, void* aux) {
65  // TrapFnc functions report error by following the native kernel convention
66  // of returning an exit code in the range of -1..-4096. They do not try to
67  // set errno themselves. The glibc wrapper that triggered the SIGSYS will
68  // ultimately do so for us.
69  int err = reinterpret_cast<intptr_t>(aux) & SECCOMP_RET_DATA;
70  return -err;
71}
72
73bool HasUnsafeTraps(const Policy* policy) {
74  DCHECK(policy);
75  for (uint32_t sysnum : SyscallSet::ValidOnly()) {
76    if (policy->EvaluateSyscall(sysnum)->HasUnsafeTraps()) {
77      return true;
78    }
79  }
80  return policy->InvalidSyscall()->HasUnsafeTraps();
81}
82
83}  // namespace
84
85struct PolicyCompiler::Range {
86  uint32_t from;
87  CodeGen::Node node;
88};
89
90PolicyCompiler::PolicyCompiler(const Policy* policy, TrapRegistry* registry)
91    : policy_(policy),
92      registry_(registry),
93      escapepc_(0),
94      panic_func_(DefaultPanic),
95      gen_(),
96      has_unsafe_traps_(HasUnsafeTraps(policy_)) {
97  DCHECK(policy);
98}
99
100PolicyCompiler::~PolicyCompiler() {
101}
102
103CodeGen::Program PolicyCompiler::Compile() {
104  CHECK(policy_->InvalidSyscall()->IsDeny())
105      << "Policies should deny invalid system calls";
106
107  // If our BPF program has unsafe traps, enable support for them.
108  if (has_unsafe_traps_) {
109    CHECK_NE(0U, escapepc_) << "UnsafeTrap() requires a valid escape PC";
110
111    for (int sysnum : kSyscallsRequiredForUnsafeTraps) {
112      CHECK(policy_->EvaluateSyscall(sysnum)->IsAllow())
113          << "Policies that use UnsafeTrap() must unconditionally allow all "
114             "required system calls";
115    }
116
117    CHECK(registry_->EnableUnsafeTraps())
118        << "We'd rather die than enable unsafe traps";
119  }
120
121  // Assemble the BPF filter program.
122  return gen_.Compile(AssemblePolicy());
123}
124
125void PolicyCompiler::DangerousSetEscapePC(uint64_t escapepc) {
126  escapepc_ = escapepc;
127}
128
129void PolicyCompiler::SetPanicFunc(PanicFunc panic_func) {
130  panic_func_ = panic_func;
131}
132
133CodeGen::Node PolicyCompiler::AssemblePolicy() {
134  // A compiled policy consists of three logical parts:
135  //   1. Check that the "arch" field matches the expected architecture.
136  //   2. If the policy involves unsafe traps, check if the syscall was
137  //      invoked by Syscall::Call, and then allow it unconditionally.
138  //   3. Check the system call number and jump to the appropriate compiled
139  //      system call policy number.
140  return CheckArch(MaybeAddEscapeHatch(DispatchSyscall()));
141}
142
143CodeGen::Node PolicyCompiler::CheckArch(CodeGen::Node passed) {
144  // If the architecture doesn't match SECCOMP_ARCH, disallow the
145  // system call.
146  return gen_.MakeInstruction(
147      BPF_LD + BPF_W + BPF_ABS, SECCOMP_ARCH_IDX,
148      gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, SECCOMP_ARCH, passed,
149                           CompileResult(panic_func_(
150                               "Invalid audit architecture in BPF filter"))));
151}
152
153CodeGen::Node PolicyCompiler::MaybeAddEscapeHatch(CodeGen::Node rest) {
154  // If no unsafe traps, then simply return |rest|.
155  if (!has_unsafe_traps_) {
156    return rest;
157  }
158
159  // We already enabled unsafe traps in Compile, but enable them again to give
160  // the trap registry a second chance to complain before we add the backdoor.
161  CHECK(registry_->EnableUnsafeTraps());
162
163  // Allow system calls, if they originate from our magic return address.
164  const uint32_t lopc = static_cast<uint32_t>(escapepc_);
165  const uint32_t hipc = static_cast<uint32_t>(escapepc_ >> 32);
166
167  // BPF cannot do native 64-bit comparisons, so we have to compare
168  // both 32-bit halves of the instruction pointer. If they match what
169  // we expect, we return ERR_ALLOWED. If either or both don't match,
170  // we continue evalutating the rest of the sandbox policy.
171  //
172  // For simplicity, we check the full 64-bit instruction pointer even
173  // on 32-bit architectures.
174  return gen_.MakeInstruction(
175      BPF_LD + BPF_W + BPF_ABS, SECCOMP_IP_LSB_IDX,
176      gen_.MakeInstruction(
177          BPF_JMP + BPF_JEQ + BPF_K, lopc,
178          gen_.MakeInstruction(
179              BPF_LD + BPF_W + BPF_ABS, SECCOMP_IP_MSB_IDX,
180              gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, hipc,
181                                   CompileResult(Allow()), rest)),
182          rest));
183}
184
185CodeGen::Node PolicyCompiler::DispatchSyscall() {
186  // Evaluate all possible system calls and group their Nodes into
187  // ranges of identical codes.
188  Ranges ranges;
189  FindRanges(&ranges);
190
191  // Compile the system call ranges to an optimized BPF jumptable
192  CodeGen::Node jumptable = AssembleJumpTable(ranges.begin(), ranges.end());
193
194  // Grab the system call number, so that we can check it and then
195  // execute the jump table.
196  return gen_.MakeInstruction(
197      BPF_LD + BPF_W + BPF_ABS, SECCOMP_NR_IDX, CheckSyscallNumber(jumptable));
198}
199
200CodeGen::Node PolicyCompiler::CheckSyscallNumber(CodeGen::Node passed) {
201  if (kIsIntel) {
202    // On Intel architectures, verify that system call numbers are in the
203    // expected number range.
204    CodeGen::Node invalidX32 =
205        CompileResult(panic_func_("Illegal mixing of system call ABIs"));
206    if (kIsX32) {
207      // The newer x32 API always sets bit 30.
208      return gen_.MakeInstruction(
209          BPF_JMP + BPF_JSET + BPF_K, 0x40000000, passed, invalidX32);
210    } else {
211      // The older i386 and x86-64 APIs clear bit 30 on all system calls.
212      return gen_.MakeInstruction(
213          BPF_JMP + BPF_JSET + BPF_K, 0x40000000, invalidX32, passed);
214    }
215  }
216
217  // TODO(mdempsky): Similar validation for other architectures?
218  return passed;
219}
220
221void PolicyCompiler::FindRanges(Ranges* ranges) {
222  // Please note that "struct seccomp_data" defines system calls as a signed
223  // int32_t, but BPF instructions always operate on unsigned quantities. We
224  // deal with this disparity by enumerating from MIN_SYSCALL to MAX_SYSCALL,
225  // and then verifying that the rest of the number range (both positive and
226  // negative) all return the same Node.
227  const CodeGen::Node invalid_node = CompileResult(policy_->InvalidSyscall());
228  uint32_t old_sysnum = 0;
229  CodeGen::Node old_node =
230      SyscallSet::IsValid(old_sysnum)
231          ? CompileResult(policy_->EvaluateSyscall(old_sysnum))
232          : invalid_node;
233
234  for (uint32_t sysnum : SyscallSet::All()) {
235    CodeGen::Node node =
236        SyscallSet::IsValid(sysnum)
237            ? CompileResult(policy_->EvaluateSyscall(static_cast<int>(sysnum)))
238            : invalid_node;
239    // N.B., here we rely on CodeGen folding (i.e., returning the same
240    // node value for) identical code sequences, otherwise our jump
241    // table will blow up in size.
242    if (node != old_node) {
243      ranges->push_back(Range{old_sysnum, old_node});
244      old_sysnum = sysnum;
245      old_node = node;
246    }
247  }
248  ranges->push_back(Range{old_sysnum, old_node});
249}
250
251CodeGen::Node PolicyCompiler::AssembleJumpTable(Ranges::const_iterator start,
252                                                Ranges::const_iterator stop) {
253  // We convert the list of system call ranges into jump table that performs
254  // a binary search over the ranges.
255  // As a sanity check, we need to have at least one distinct ranges for us
256  // to be able to build a jump table.
257  CHECK(start < stop) << "Invalid iterator range";
258  const auto n = stop - start;
259  if (n == 1) {
260    // If we have narrowed things down to a single range object, we can
261    // return from the BPF filter program.
262    return start->node;
263  }
264
265  // Pick the range object that is located at the mid point of our list.
266  // We compare our system call number against the lowest valid system call
267  // number in this range object. If our number is lower, it is outside of
268  // this range object. If it is greater or equal, it might be inside.
269  Ranges::const_iterator mid = start + n / 2;
270
271  // Sub-divide the list of ranges and continue recursively.
272  CodeGen::Node jf = AssembleJumpTable(start, mid);
273  CodeGen::Node jt = AssembleJumpTable(mid, stop);
274  return gen_.MakeInstruction(BPF_JMP + BPF_JGE + BPF_K, mid->from, jt, jf);
275}
276
277CodeGen::Node PolicyCompiler::CompileResult(const ResultExpr& res) {
278  return res->Compile(this);
279}
280
281CodeGen::Node PolicyCompiler::MaskedEqual(int argno,
282                                          size_t width,
283                                          uint64_t mask,
284                                          uint64_t value,
285                                          CodeGen::Node passed,
286                                          CodeGen::Node failed) {
287  // Sanity check that arguments make sense.
288  CHECK(argno >= 0 && argno < 6) << "Invalid argument number " << argno;
289  CHECK(width == 4 || width == 8) << "Invalid argument width " << width;
290  CHECK_NE(0U, mask) << "Zero mask is invalid";
291  CHECK_EQ(value, value & mask) << "Value contains masked out bits";
292  if (sizeof(void*) == 4) {
293    CHECK_EQ(4U, width) << "Invalid width on 32-bit platform";
294  }
295  if (width == 4) {
296    CHECK_EQ(0U, mask >> 32) << "Mask exceeds argument size";
297    CHECK_EQ(0U, value >> 32) << "Value exceeds argument size";
298  }
299
300  // We want to emit code to check "(arg & mask) == value" where arg, mask, and
301  // value are 64-bit values, but the BPF machine is only 32-bit. We implement
302  // this by independently testing the upper and lower 32-bits and continuing to
303  // |passed| if both evaluate true, or to |failed| if either evaluate false.
304  return MaskedEqualHalf(argno, width, mask, value, ArgHalf::UPPER,
305                         MaskedEqualHalf(argno, width, mask, value,
306                                         ArgHalf::LOWER, passed, failed),
307                         failed);
308}
309
310CodeGen::Node PolicyCompiler::MaskedEqualHalf(int argno,
311                                              size_t width,
312                                              uint64_t full_mask,
313                                              uint64_t full_value,
314                                              ArgHalf half,
315                                              CodeGen::Node passed,
316                                              CodeGen::Node failed) {
317  if (width == 4 && half == ArgHalf::UPPER) {
318    // Special logic for sanity checking the upper 32-bits of 32-bit system
319    // call arguments.
320
321    // TODO(mdempsky): Compile Unexpected64bitArgument() just per program.
322    CodeGen::Node invalid_64bit = Unexpected64bitArgument();
323
324    const uint32_t upper = SECCOMP_ARG_MSB_IDX(argno);
325    const uint32_t lower = SECCOMP_ARG_LSB_IDX(argno);
326
327    if (sizeof(void*) == 4) {
328      // On 32-bit platforms, the upper 32-bits should always be 0:
329      //   LDW  [upper]
330      //   JEQ  0, passed, invalid
331      return gen_.MakeInstruction(
332          BPF_LD + BPF_W + BPF_ABS,
333          upper,
334          gen_.MakeInstruction(
335              BPF_JMP + BPF_JEQ + BPF_K, 0, passed, invalid_64bit));
336    }
337
338    // On 64-bit platforms, the upper 32-bits may be 0 or ~0; but we only allow
339    // ~0 if the sign bit of the lower 32-bits is set too:
340    //   LDW  [upper]
341    //   JEQ  0, passed, (next)
342    //   JEQ  ~0, (next), invalid
343    //   LDW  [lower]
344    //   JSET (1<<31), passed, invalid
345    //
346    // TODO(mdempsky): The JSET instruction could perhaps jump to passed->next
347    // instead, as the first instruction of passed should be "LDW [lower]".
348    return gen_.MakeInstruction(
349        BPF_LD + BPF_W + BPF_ABS,
350        upper,
351        gen_.MakeInstruction(
352            BPF_JMP + BPF_JEQ + BPF_K,
353            0,
354            passed,
355            gen_.MakeInstruction(
356                BPF_JMP + BPF_JEQ + BPF_K,
357                std::numeric_limits<uint32_t>::max(),
358                gen_.MakeInstruction(
359                    BPF_LD + BPF_W + BPF_ABS,
360                    lower,
361                    gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K,
362                                         1U << 31,
363                                         passed,
364                                         invalid_64bit)),
365                invalid_64bit)));
366  }
367
368  const uint32_t idx = (half == ArgHalf::UPPER) ? SECCOMP_ARG_MSB_IDX(argno)
369                                                : SECCOMP_ARG_LSB_IDX(argno);
370  const uint32_t mask = (half == ArgHalf::UPPER) ? full_mask >> 32 : full_mask;
371  const uint32_t value =
372      (half == ArgHalf::UPPER) ? full_value >> 32 : full_value;
373
374  // Emit a suitable instruction sequence for (arg & mask) == value.
375
376  // For (arg & 0) == 0, just return passed.
377  if (mask == 0) {
378    CHECK_EQ(0U, value);
379    return passed;
380  }
381
382  // For (arg & ~0) == value, emit:
383  //   LDW  [idx]
384  //   JEQ  value, passed, failed
385  if (mask == std::numeric_limits<uint32_t>::max()) {
386    return gen_.MakeInstruction(
387        BPF_LD + BPF_W + BPF_ABS,
388        idx,
389        gen_.MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, value, passed, failed));
390  }
391
392  // For (arg & mask) == 0, emit:
393  //   LDW  [idx]
394  //   JSET mask, failed, passed
395  // (Note: failed and passed are intentionally swapped.)
396  if (value == 0) {
397    return gen_.MakeInstruction(
398        BPF_LD + BPF_W + BPF_ABS,
399        idx,
400        gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, mask, failed, passed));
401  }
402
403  // For (arg & x) == x where x is a single-bit value, emit:
404  //   LDW  [idx]
405  //   JSET mask, passed, failed
406  if (mask == value && HasExactlyOneBit(mask)) {
407    return gen_.MakeInstruction(
408        BPF_LD + BPF_W + BPF_ABS,
409        idx,
410        gen_.MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, mask, passed, failed));
411  }
412
413  // Generic fallback:
414  //   LDW  [idx]
415  //   AND  mask
416  //   JEQ  value, passed, failed
417  return gen_.MakeInstruction(
418      BPF_LD + BPF_W + BPF_ABS,
419      idx,
420      gen_.MakeInstruction(
421          BPF_ALU + BPF_AND + BPF_K,
422          mask,
423          gen_.MakeInstruction(
424              BPF_JMP + BPF_JEQ + BPF_K, value, passed, failed)));
425}
426
427CodeGen::Node PolicyCompiler::Unexpected64bitArgument() {
428  return CompileResult(panic_func_("Unexpected 64bit argument detected"));
429}
430
431CodeGen::Node PolicyCompiler::Return(uint32_t ret) {
432  if (has_unsafe_traps_ && (ret & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
433    // When inside an UnsafeTrap() callback, we want to allow all system calls.
434    // This means, we must conditionally disable the sandbox -- and that's not
435    // something that kernel-side BPF filters can do, as they cannot inspect
436    // any state other than the syscall arguments.
437    // But if we redirect all error handlers to user-space, then we can easily
438    // make this decision.
439    // The performance penalty for this extra round-trip to user-space is not
440    // actually that bad, as we only ever pay it for denied system calls; and a
441    // typical program has very few of these.
442    return Trap(ReturnErrno, reinterpret_cast<void*>(ret & SECCOMP_RET_DATA),
443                true);
444  }
445
446  return gen_.MakeInstruction(BPF_RET + BPF_K, ret);
447}
448
449CodeGen::Node PolicyCompiler::Trap(TrapRegistry::TrapFnc fnc,
450                                   const void* aux,
451                                   bool safe) {
452  uint16_t trap_id = registry_->Add(fnc, aux, safe);
453  return gen_.MakeInstruction(BPF_RET + BPF_K, SECCOMP_RET_TRAP + trap_id);
454}
455
456bool PolicyCompiler::IsRequiredForUnsafeTrap(int sysno) {
457  for (size_t i = 0; i < arraysize(kSyscallsRequiredForUnsafeTraps); ++i) {
458    if (sysno == kSyscallsRequiredForUnsafeTraps[i]) {
459      return true;
460    }
461  }
462  return false;
463}
464
465}  // namespace bpf_dsl
466}  // namespace sandbox
467