1f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// Use of this source code is governed by a BSD-style license that can be
3f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// found in the LICENSE file.
4f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
5f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#ifndef SANDBOX_LINUX_BPF_DSL_CODEGEN_H__
6f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#define SANDBOX_LINUX_BPF_DSL_CODEGEN_H__
7f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
8f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include <stddef.h>
9f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include <stdint.h>
10f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
11f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include <map>
12f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include <vector>
13f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
14f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include "base/macros.h"
15f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include "base/tuple.h"
16f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#include "sandbox/sandbox_export.h"
17f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
18f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenkostruct sock_filter;
19f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
20f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenkonamespace sandbox {
21f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
22f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// The code generator implements a basic assembler that can convert a
23f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// graph of BPF instructions into a well-formed array of BPF
24f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// instructions. Most notably, it ensures that jumps are always
25f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// forward and don't exceed the limit of 255 instructions imposed by
26f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// the instruction set.
27f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//
28f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// Callers would typically create a new CodeGen object and then use it
29f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// to build a DAG of instruction nodes. They'll eventually call
30f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko// Compile() to convert this DAG to a Program.
31f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//
32f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//   CodeGen gen;
33f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//   CodeGen::Node allow, branch, dag;
34f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//
35f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//   allow =
36f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//     gen.MakeInstruction(BPF_RET+BPF_K,
37f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//                         ErrorCode(ErrorCode::ERR_ALLOWED).err()));
38f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//   branch =
39f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//     gen.MakeInstruction(BPF_JMP+BPF_EQ+BPF_K, __NR_getpid,
40f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//                         Trap(GetPidHandler, NULL), allow);
41f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//   dag =
42f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//     gen.MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
43f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//                         offsetof(struct arch_seccomp_data, nr), branch);
44f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//
45f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//   // Simplified code follows; in practice, it is important to avoid calling
46f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//   // any C++ destructors after starting the sandbox.
4724854748fba09df2a29f0d08d558c3acea70e7a1Alex Vakulenko//   CodeGen::Program program = gen.Compile(dag);
48f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//   const struct sock_fprog prog = {
4924854748fba09df2a29f0d08d558c3acea70e7a1Alex Vakulenko//     static_cast<unsigned short>(program.size()), &program[0] };
50f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//   prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
51f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko//
52f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenkoclass SANDBOX_EXPORT CodeGen {
53f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko public:
54f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // A vector of BPF instructions that need to be installed as a filter
55f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // program in the kernel.
56f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  typedef std::vector<struct sock_filter> Program;
57f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
58f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // Node represents a node within the instruction DAG being compiled.
59f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  using Node = Program::size_type;
60f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
61f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // kNullNode represents the "null" node; i.e., the reserved node
62f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // value guaranteed to not equal any actual nodes.
63f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  static const Node kNullNode = -1;
64f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
65f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  CodeGen();
66f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  ~CodeGen();
67f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
68f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // MakeInstruction creates a node representing the specified
69f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // instruction, or returns and existing equivalent node if one
70f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // exists. For details on the possible parameters refer to
71f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // https://www.kernel.org/doc/Documentation/networking/filter.txt.
72f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // TODO(mdempsky): Reconsider using default arguments here.
73f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  Node MakeInstruction(uint16_t code,
74f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko                       uint32_t k,
75f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko                       Node jt = kNullNode,
76f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko                       Node jf = kNullNode);
77f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
78f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // Compile linearizes the instruction DAG rooted at |head| into a
79f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // program that can be executed by a BPF virtual machine.
8024854748fba09df2a29f0d08d558c3acea70e7a1Alex Vakulenko  Program Compile(Node head);
81f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
82f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko private:
83f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  using MemoKey = base::Tuple<uint16_t, uint32_t, Node, Node>;
84f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  struct MemoKeyLess {
85f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko    bool operator()(const MemoKey& lhs, const MemoKey& rhs) const;
86f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  };
87f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
88f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // AppendInstruction adds a new instruction, ensuring that |jt| and
89f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // |jf| are within range as necessary for |code|.
90f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  Node AppendInstruction(uint16_t code, uint32_t k, Node jt, Node jf);
91f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
92f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // WithinRange returns a node equivalent to |next| that is at most
93f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // |range| instructions away from the (logical) beginning of the
94f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // program.
95f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  Node WithinRange(Node next, size_t range);
96f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
97f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // Append appends a new instruction to the physical end (i.e.,
98f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // logical beginning) of |program_|.
99f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  Node Append(uint16_t code, uint32_t k, size_t jt, size_t jf);
100f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
101f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // Offset returns how many instructions exist in |program_| after |target|.
102f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  size_t Offset(Node target) const;
103f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
104f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // NOTE: program_ is the compiled program in *reverse*, so that
105f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // indices remain stable as we add instructions.
106f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  Program program_;
107f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
108f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // equivalent_ stores the most recent semantically-equivalent node for each
109f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // instruction in program_. A node is defined as semantically-equivalent to N
110f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // if it has the same instruction code and constant as N and its successor
111f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // nodes (if any) are semantically-equivalent to N's successor nodes, or
112f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  // if it's an unconditional jump to a node semantically-equivalent to N.
113f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  std::vector<Node> equivalent_;
114f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
115f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  std::map<MemoKey, Node, MemoKeyLess> memos_;
116f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
117f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko  DISALLOW_COPY_AND_ASSIGN(CodeGen);
118f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko};
119f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
120f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko}  // namespace sandbox
121f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko
122f6024733c0d1eed88f68520b5e6a20b96e212ad6Alex Vakulenko#endif  // SANDBOX_LINUX_BPF_DSL_CODEGEN_H__
123