1f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// Use of this source code is governed by a BSD-style license that can be 3f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// found in the LICENSE file. 4f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 5f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include "sandbox/linux/bpf_dsl/codegen.h" 6f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 724854748fba09df2a29f0d08d558c3acea70e7a1Alex Vakulenko#include <stddef.h> 824854748fba09df2a29f0d08d558c3acea70e7a1Alex Vakulenko#include <stdint.h> 924854748fba09df2a29f0d08d558c3acea70e7a1Alex Vakulenko 10f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include <limits> 11f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include <utility> 12f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 13f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include "base/logging.h" 14f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include "sandbox/linux/system_headers/linux_filter.h" 15f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 16f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// This CodeGen implementation strives for simplicity while still 17f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// generating acceptable BPF programs under typical usage patterns 18f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// (e.g., by PolicyCompiler). 19f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// 20f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// The key to its simplicity is that BPF programs only support forward 21f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// jumps/branches, which allows constraining the DAG construction API 22f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// to make instruction nodes immutable. Immutable nodes admits a 23f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// simple greedy approach of emitting new instructions as needed and 24f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// then reusing existing ones that have already been emitted. This 25f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// cleanly avoids any need to compute basic blocks or apply 26f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// topological sorting because the API effectively sorts instructions 27f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// for us (e.g., before MakeInstruction() can be called to emit a 28f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// branch instruction, it must have already been called for each 29f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// branch path). 30f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// 31f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// This greedy algorithm is not without (theoretical) weakness though: 32f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// 33f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// 1. In the general case, we don't eliminate dead code. If needed, 34f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// we could trace back through the program in Compile() and elide 35f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// any unneeded instructions, but in practice we only emit live 36f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// instructions anyway. 37f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// 38f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// 2. By not dividing instructions into basic blocks and sorting, we 39f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// lose an opportunity to move non-branch/non-return instructions 40f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// adjacent to their successor instructions, which means we might 41f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// need to emit additional jumps. But in practice, they'll 42f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// already be nearby as long as callers don't go out of their way 43f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// to interleave MakeInstruction() calls for unrelated code 44f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// sequences. 45f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 46f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenkonamespace sandbox { 47f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 48f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// kBranchRange is the maximum value that can be stored in 49f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// sock_filter's 8-bit jt and jf fields. 50f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenkoconst size_t kBranchRange = std::numeric_limits<uint8_t>::max(); 51f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 52f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenkoconst CodeGen::Node CodeGen::kNullNode; 53f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 54f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::CodeGen() : program_(), equivalent_(), memos_() { 55f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko} 56f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 57f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::~CodeGen() { 58f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko} 59f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 6024854748fba09df2a29f0d08d558c3acea70e7a1Alex VakulenkoCodeGen::Program CodeGen::Compile(CodeGen::Node head) { 6124854748fba09df2a29f0d08d558c3acea70e7a1Alex Vakulenko return Program(program_.rbegin() + Offset(head), program_.rend()); 62f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko} 63f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 64f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::Node CodeGen::MakeInstruction(uint16_t code, 65f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko uint32_t k, 66f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko Node jt, 67f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko Node jf) { 68f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko // To avoid generating redundant code sequences, we memoize the 69f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko // results from AppendInstruction(). 70f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko auto res = memos_.insert(std::make_pair(MemoKey(code, k, jt, jf), kNullNode)); 71f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CodeGen::Node* node = &res.first->second; 72f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko if (res.second) { // Newly inserted memo entry. 73f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko *node = AppendInstruction(code, k, jt, jf); 74f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko } 75f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko return *node; 76f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko} 77f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 78f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::Node CodeGen::AppendInstruction(uint16_t code, 79f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko uint32_t k, 80f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko Node jt, 81f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko Node jf) { 82f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko if (BPF_CLASS(code) == BPF_JMP) { 83f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CHECK_NE(BPF_JA, BPF_OP(code)) << "CodeGen inserts JAs as needed"; 84f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 85f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko // Optimally adding jumps is rather tricky, so we use a quick 86f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko // approximation: by artificially reducing |jt|'s range, |jt| will 87f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko // stay within its true range even if we add a jump for |jf|. 88f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko jt = WithinRange(jt, kBranchRange - 1); 89f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko jf = WithinRange(jf, kBranchRange); 90f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko return Append(code, k, Offset(jt), Offset(jf)); 91f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko } 92f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 93f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CHECK_EQ(kNullNode, jf) << "Non-branch instructions shouldn't provide jf"; 94f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko if (BPF_CLASS(code) == BPF_RET) { 95f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CHECK_EQ(kNullNode, jt) << "Return instructions shouldn't provide jt"; 96f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko } else { 97f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko // For non-branch/non-return instructions, execution always 98f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko // proceeds to the next instruction; so we need to arrange for 99f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko // that to be |jt|. 100f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko jt = WithinRange(jt, 0); 101f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CHECK_EQ(0U, Offset(jt)) << "ICE: Failed to setup next instruction"; 102f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko } 103f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko return Append(code, k, 0, 0); 104f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko} 105f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 106f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::Node CodeGen::WithinRange(Node target, size_t range) { 107f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko // Just use |target| if it's already within range. 108f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko if (Offset(target) <= range) { 109f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko return target; 110f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko } 111f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 112f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko // Alternatively, look for an equivalent instruction within range. 113f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko if (Offset(equivalent_.at(target)) <= range) { 114f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko return equivalent_.at(target); 115f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko } 116f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 117f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko // Otherwise, fall back to emitting a jump instruction. 118f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko Node jump = Append(BPF_JMP | BPF_JA, Offset(target), 0, 0); 119f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko equivalent_.at(target) = jump; 120f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko return jump; 121f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko} 122f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 123f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex VakulenkoCodeGen::Node CodeGen::Append(uint16_t code, uint32_t k, size_t jt, size_t jf) { 124f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) { 125f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CHECK_LE(jt, kBranchRange); 126f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CHECK_LE(jf, kBranchRange); 127f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko } else { 128f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CHECK_EQ(0U, jt); 129f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CHECK_EQ(0U, jf); 130f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko } 131f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 132f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CHECK_LT(program_.size(), static_cast<size_t>(BPF_MAXINSNS)); 133f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CHECK_EQ(program_.size(), equivalent_.size()); 134f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 135f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko Node res = program_.size(); 136f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko program_.push_back(sock_filter{ 137f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko code, static_cast<uint8_t>(jt), static_cast<uint8_t>(jf), k}); 138f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko equivalent_.push_back(res); 139f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko return res; 140f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko} 141f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 142f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenkosize_t CodeGen::Offset(Node target) const { 143f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko CHECK_LT(target, program_.size()) << "Bogus offset target node"; 144f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko return (program_.size() - 1) - target; 145f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko} 146f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko 147f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko} // namespace sandbox 148