1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "sandbox/linux/bpf_dsl/codegen.h" 6 7#include <stddef.h> 8#include <stdint.h> 9 10#include <limits> 11#include <utility> 12 13#include "base/logging.h" 14#include "sandbox/linux/system_headers/linux_filter.h" 15 16// This CodeGen implementation strives for simplicity while still 17// generating acceptable BPF programs under typical usage patterns 18// (e.g., by PolicyCompiler). 19// 20// The key to its simplicity is that BPF programs only support forward 21// jumps/branches, which allows constraining the DAG construction API 22// to make instruction nodes immutable. Immutable nodes admits a 23// simple greedy approach of emitting new instructions as needed and 24// then reusing existing ones that have already been emitted. This 25// cleanly avoids any need to compute basic blocks or apply 26// topological sorting because the API effectively sorts instructions 27// for us (e.g., before MakeInstruction() can be called to emit a 28// branch instruction, it must have already been called for each 29// branch path). 30// 31// This greedy algorithm is not without (theoretical) weakness though: 32// 33// 1. In the general case, we don't eliminate dead code. If needed, 34// we could trace back through the program in Compile() and elide 35// any unneeded instructions, but in practice we only emit live 36// instructions anyway. 37// 38// 2. By not dividing instructions into basic blocks and sorting, we 39// lose an opportunity to move non-branch/non-return instructions 40// adjacent to their successor instructions, which means we might 41// need to emit additional jumps. But in practice, they'll 42// already be nearby as long as callers don't go out of their way 43// to interleave MakeInstruction() calls for unrelated code 44// sequences. 45 46namespace sandbox { 47 48// kBranchRange is the maximum value that can be stored in 49// sock_filter's 8-bit jt and jf fields. 50const size_t kBranchRange = std::numeric_limits<uint8_t>::max(); 51 52const CodeGen::Node CodeGen::kNullNode; 53 54CodeGen::CodeGen() : program_(), equivalent_(), memos_() { 55} 56 57CodeGen::~CodeGen() { 58} 59 60CodeGen::Program CodeGen::Compile(CodeGen::Node head) { 61 return Program(program_.rbegin() + Offset(head), program_.rend()); 62} 63 64CodeGen::Node CodeGen::MakeInstruction(uint16_t code, 65 uint32_t k, 66 Node jt, 67 Node jf) { 68 // To avoid generating redundant code sequences, we memoize the 69 // results from AppendInstruction(). 70 auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode)); 71 CodeGen::Node* node = &res.first->second; 72 if (res.second) { // Newly inserted memo entry. 73 *node = AppendInstruction(code, k, jt, jf); 74 } 75 return *node; 76} 77 78CodeGen::Node CodeGen::AppendInstruction(uint16_t code, 79 uint32_t k, 80 Node jt, 81 Node jf) { 82 if (BPF_CLASS(code) == BPF_JMP) { 83 CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed"; 84 85 // Optimally adding jumps is rather tricky, so we use a quick 86 // approximation: by artificially reducing |jt|'s range, |jt| will 87 // stay within its true range even if we add a jump for |jf|. 88 jt = WithinRange(jt, kBranchRange - 1); 89 jf = WithinRange(jf, kBranchRange); 90 return Append(code, k, Offset(jt), Offset(jf)); 91 } 92 93 CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf"; 94 if (BPF_CLASS(code) == BPF_RET) { 95 CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt"; 96 } else { 97 // For non-branch/non-return instructions, execution always 98 // proceeds to the next instruction; so we need to arrange for 99 // that to be |jt|. 100 jt = WithinRange(jt, 0); 101 CHECK_EQ(0U, Offset(jt)) << "ICE: Failed to setup next instruction"; 102 } 103 return Append(code, k, 0, 0); 104} 105 106CodeGen::Node CodeGen::WithinRange(Node target, size_t range) { 107 // Just use |target| if it's already within range. 108 if (Offset(target) <= range) { 109 return target; 110 } 111 112 // Alternatively, look for an equivalent instruction within range. 113 if (Offset(equivalent_.at(target)) <= range) { 114 return equivalent_.at(target); 115 } 116 117 // Otherwise, fall back to emitting a jump instruction. 118 Node jump = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0); 119 equivalent_.at(target) = jump; 120 return jump; 121} 122 123CodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) { 124 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) { 125 CHECK_LE(jt, kBranchRange); 126 CHECK_LE(jf, kBranchRange); 127 } else { 128 CHECK_EQ(0U, jt); 129 CHECK_EQ(0U, jf); 130 } 131 132 CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS)); 133 CHECK_EQ(program_.size(), equivalent_.size()); 134 135 Node res = program_.size(); 136 program_.push_back(sock_filter{ 137 code, static_cast<uint8_t>(jt), static_cast<uint8_t>(jf), k}); 138 equivalent_.push_back(res); 139 return res; 140} 141 142size_t CodeGen::Offset(Node target) const { 143 CHECK_LT(target, program_.size()) << "Bogus offset target node"; 144 return (program_.size() - 1) - target; 145} 146 147} // namespace sandbox 148