1f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// Use of this source code is governed by a BSD-style license that can be
3f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// found in the LICENSE file.
4f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
5f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include "sandbox/linux/bpf_dsl/codegen.h"
6f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
724854748fba09df2a29f0d08d558c3acea70e7a1Alex Vakulenko#include <stddef.h>
824854748fba09df2a29f0d08d558c3acea70e7a1Alex Vakulenko#include <stdint.h>
924854748fba09df2a29f0d08d558c3acea70e7a1Alex Vakulenko
10f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include <limits>
11f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include <utility>
12f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
13f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include "base/logging.h"
14f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include "sandbox/linux/system_headers/linux_filter.h"
15f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
16f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// This CodeGen implementation strives for simplicity while still
17f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// generating acceptable BPF programs under typical usage patterns
18f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// (e.g., by PolicyCompiler).
19f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//
20f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// The key to its simplicity is that BPF programs only support forward
21f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// jumps/branches, which allows constraining the DAG construction API
22f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// to make instruction nodes immutable. Immutable nodes admits a
23f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// simple greedy approach of emitting new instructions as needed and
24f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// then reusing existing ones that have already been emitted. This
25f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// cleanly avoids any need to compute basic blocks or apply
26f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// topological sorting because the API effectively sorts instructions
27f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// for us (e.g., before MakeInstruction() can be called to emit a
28f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// branch instruction, it must have already been called for each
29f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// branch path).
30f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//
31f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// This greedy algorithm is not without (theoretical) weakness though:
32f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//
33f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//   1. In the general case, we don't eliminate dead code.  If needed,
34f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//      we could trace back through the program in Compile() and elide
35f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//      any unneeded instructions, but in practice we only emit live
36f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//      instructions anyway.
37f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//
38f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//   2. By not dividing instructions into basic blocks and sorting, we
39f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//      lose an opportunity to move non-branch/non-return instructions
40f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//      adjacent to their successor instructions, which means we might
41f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//      need to emit additional jumps. But in practice, they'll
42f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//      already be nearby as long as callers don't go out of their way
43f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//      to interleave MakeInstruction() calls for unrelated code
44f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//      sequences.
45f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
46f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenkonamespace sandbox {
47f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
48f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// kBranchRange is the maximum value that can be stored in
49f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// sock_filter's 8-bit jt and jf fields.
50f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenkoconst size_t kBranchRange = std::numeric_limits<uint8_t>::max();
51f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
52f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenkoconst CodeGen::Node CodeGen::kNullNode;
53f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
54f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::CodeGen() : program_(), equivalent_(), memos_() {
55f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko}
56f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
57f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::~CodeGen() {
58f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko}
59f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
6024854748fba09df2a29f0d08d558c3acea70e7a1Alex VakulenkoCodeGen::Program CodeGen::Compile(CodeGen::Node head) {
6124854748fba09df2a29f0d08d558c3acea70e7a1Alex Vakulenko  return Program(program_.rbegin() + Offset(head), program_.rend());
62f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko}
63f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
64f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::Node CodeGen::MakeInstruction(uint16_t code,
65f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko                                       uint32_t k,
66f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko                                       Node jt,
67f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko                                       Node jf) {
68f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // To avoid generating redundant code sequences, we memoize the
69f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // results from AppendInstruction().
70f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode));
71f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  CodeGen::Node* node = &res.first->second;
72f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  if (res.second) {  // Newly inserted memo entry.
73f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    *node = AppendInstruction(code, k, jt, jf);
74f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  }
75f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  return *node;
76f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko}
77f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
78f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::Node CodeGen::AppendInstruction(uint16_t code,
79f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko                                         uint32_t k,
80f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko                                         Node jt,
81f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko                                         Node jf) {
82f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  if (BPF_CLASS(code) == BPF_JMP) {
83f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed";
84f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
85f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    // Optimally adding jumps is rather tricky, so we use a quick
86f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    // approximation: by artificially reducing |jt|'s range, |jt| will
87f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    // stay within its true range even if we add a jump for |jf|.
88f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    jt = WithinRange(jt, kBranchRange - 1);
89f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    jf = WithinRange(jf, kBranchRange);
90f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    return Append(code, k, Offset(jt), Offset(jf));
91f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  }
92f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
93f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf";
94f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  if (BPF_CLASS(code) == BPF_RET) {
95f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt";
96f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  } else {
97f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    // For non-branch/non-return instructions, execution always
98f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    // proceeds to the next instruction; so we need to arrange for
99f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    // that to be |jt|.
100f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    jt = WithinRange(jt, 0);
101f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    CHECK_EQ(0U, Offset(jt)) << "ICE: Failed to setup next instruction";
102f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  }
103f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  return Append(code, k, 0, 0);
104f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko}
105f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
106f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::Node CodeGen::WithinRange(Node target, size_t range) {
107f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // Just use |target| if it's already within range.
108f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  if (Offset(target) <= range) {
109f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    return target;
110f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  }
111f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
112f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // Alternatively, look for an equivalent instruction within range.
113f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  if (Offset(equivalent_.at(target)) <= range) {
114f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    return equivalent_.at(target);
115f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  }
116f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
117f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // Otherwise, fall back to emitting a jump instruction.
118f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  Node jump = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0);
119f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  equivalent_.at(target) = jump;
120f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  return jump;
121f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko}
122f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
123f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) {
124f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
125f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    CHECK_LE(jt, kBranchRange);
126f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    CHECK_LE(jf, kBranchRange);
127f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  } else {
128f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    CHECK_EQ(0U, jt);
129f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    CHECK_EQ(0U, jf);
130f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  }
131f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
132f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS));
133f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  CHECK_EQ(program_.size(), equivalent_.size());
134f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
135f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  Node res = program_.size();
136f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  program_.push_back(sock_filter{
137f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko      code, static_cast<uint8_t>(jt), static_cast<uint8_t>(jf), k});
138f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  equivalent_.push_back(res);
139f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  return res;
140f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko}
141f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
142f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenkosize_t CodeGen::Offset(Node target) const {
143f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  CHECK_LT(target, program_.size()) << "Bogus offset target node";
144f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  return (program_.size() - 1) - target;
145f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko}
146f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
147f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko}  // namespace sandbox
148