1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "sandbox/linux/bpf_dsl/codegen.h"
6
7#include <stddef.h>
8#include <stdint.h>
9
10#include <map>
11#include <utility>
12#include <vector>
13
14#include "base/macros.h"
15#include "base/md5.h"
16#include "base/strings/string_piece.h"
17#include "sandbox/linux/system_headers/linux_filter.h"
18#include "testing/gtest/include/gtest/gtest.h"
19
20namespace sandbox {
21namespace {
22
23// Hash provides an abstraction for building "hash trees" from BPF
24// control flow graphs, and efficiently identifying equivalent graphs.
25//
26// For simplicity, we use MD5, because base happens to provide a
27// convenient API for its use. However, any collision-resistant hash
28// should suffice.
29class Hash {
30 public:
31  static const Hash kZero;
32
33  Hash() : digest_() {}
34
35  Hash(uint16_t code,
36       uint32_t k,
37       const Hash& jt = kZero,
38       const Hash& jf = kZero)
39      : digest_() {
40    base::MD5Context ctx;
41    base::MD5Init(&ctx);
42    HashValue(&ctx, code);
43    HashValue(&ctx, k);
44    HashValue(&ctx, jt);
45    HashValue(&ctx, jf);
46    base::MD5Final(&digest_, &ctx);
47  }
48
49  Hash(const Hash& hash) = default;
50  Hash& operator=(const Hash& rhs) = default;
51
52  friend bool operator==(const Hash& lhs, const Hash& rhs) {
53    return lhs.Base16() == rhs.Base16();
54  }
55  friend bool operator!=(const Hash& lhs, const Hash& rhs) {
56    return !(lhs == rhs);
57  }
58
59 private:
60  template <typename T>
61  void HashValue(base::MD5Context* ctx, const T& value) {
62    base::MD5Update(ctx,
63                    base::StringPiece(reinterpret_cast<const char*>(&value),
64                                      sizeof(value)));
65  }
66
67  std::string Base16() const {
68    return base::MD5DigestToBase16(digest_);
69  }
70
71  base::MD5Digest digest_;
72};
73
74const Hash Hash::kZero;
75
76// Sanity check that equality and inequality work on Hash as required.
77TEST(CodeGen, HashSanity) {
78  std::vector<Hash> hashes;
79
80  // Push a bunch of logically distinct hashes.
81  hashes.push_back(Hash::kZero);
82  for (int i = 0; i < 4; ++i) {
83    hashes.push_back(Hash(i & 1, i & 2));
84  }
85  for (int i = 0; i < 16; ++i) {
86    hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8)));
87  }
88  for (int i = 0; i < 64; ++i) {
89    hashes.push_back(
90        Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32)));
91  }
92
93  for (const Hash& a : hashes) {
94    for (const Hash& b : hashes) {
95      // Hashes should equal themselves, but not equal all others.
96      if (&a == &b) {
97        EXPECT_EQ(a, b);
98      } else {
99        EXPECT_NE(a, b);
100      }
101    }
102  }
103}
104
105// ProgramTest provides a fixture for writing compiling sample
106// programs with CodeGen and verifying the linearized output matches
107// the input DAG.
108class ProgramTest : public ::testing::Test {
109 protected:
110  ProgramTest() : gen_(), node_hashes_() {}
111
112  // MakeInstruction calls CodeGen::MakeInstruction() and associated
113  // the returned address with a hash of the instruction.
114  CodeGen::Node MakeInstruction(uint16_t code,
115                                uint32_t k,
116                                CodeGen::Node jt = CodeGen::kNullNode,
117                                CodeGen::Node jf = CodeGen::kNullNode) {
118    CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf);
119    EXPECT_NE(CodeGen::kNullNode, res);
120
121    Hash digest(code, k, Lookup(jt), Lookup(jf));
122    auto it = node_hashes_.insert(std::make_pair(res, digest));
123    EXPECT_EQ(digest, it.first->second);
124
125    return res;
126  }
127
128  // RunTest compiles the program and verifies that the output matches
129  // what is expected.  It should be called at the end of each program
130  // test case.
131  void RunTest(CodeGen::Node head) {
132    // Compile the program
133    CodeGen::Program program = gen_.Compile(head);
134
135    // Walk the program backwards, and compute the hash for each instruction.
136    std::vector<Hash> prog_hashes(program.size());
137    for (size_t i = program.size(); i > 0; --i) {
138      const sock_filter& insn = program.at(i - 1);
139      Hash& hash = prog_hashes.at(i - 1);
140
141      if (BPF_CLASS(insn.code) == BPF_JMP) {
142        if (BPF_OP(insn.code) == BPF_JA) {
143          // The compiler adds JA instructions as needed, so skip them.
144          hash = prog_hashes.at(i + insn.k);
145        } else {
146          hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt),
147                      prog_hashes.at(i + insn.jf));
148        }
149      } else if (BPF_CLASS(insn.code) == BPF_RET) {
150        hash = Hash(insn.code, insn.k);
151      } else {
152        hash = Hash(insn.code, insn.k, prog_hashes.at(i));
153      }
154    }
155
156    EXPECT_EQ(Lookup(head), prog_hashes.at(0));
157  }
158
159 private:
160  const Hash& Lookup(CodeGen::Node next) const {
161    if (next == CodeGen::kNullNode) {
162      return Hash::kZero;
163    }
164    auto it = node_hashes_.find(next);
165    if (it == node_hashes_.end()) {
166      ADD_FAILURE() << "No hash found for node " << next;
167      return Hash::kZero;
168    }
169    return it->second;
170  }
171
172  CodeGen gen_;
173  std::map<CodeGen::Node, Hash> node_hashes_;
174
175  DISALLOW_COPY_AND_ASSIGN(ProgramTest);
176};
177
178TEST_F(ProgramTest, OneInstruction) {
179  // Create the most basic valid BPF program:
180  //    RET 0
181  CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0);
182  RunTest(head);
183}
184
185TEST_F(ProgramTest, SimpleBranch) {
186  // Create a program with a single branch:
187  //    JUMP if eq 42 then $0 else $1
188  // 0: RET 1
189  // 1: RET 0
190  CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42,
191                                       MakeInstruction(BPF_RET + BPF_K, 1),
192                                       MakeInstruction(BPF_RET + BPF_K, 0));
193  RunTest(head);
194}
195
196TEST_F(ProgramTest, AtypicalBranch) {
197  // Create a program with a single branch:
198  //    JUMP if eq 42 then $0 else $0
199  // 0: RET 0
200
201  CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0);
202  CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret);
203
204  // N.B.: As the instructions in both sides of the branch are already
205  //       the same object, we do not actually have any "mergeable" branches.
206  //       This needs to be reflected in our choice of "flags".
207  RunTest(head);
208}
209
210TEST_F(ProgramTest, Complex) {
211  // Creates a basic BPF program that we'll use to test some of the code:
212  //    JUMP if eq 42 the $0 else $1     (insn6)
213  // 0: LD 23                            (insn5)
214  // 1: JUMP if eq 42 then $2 else $4    (insn4)
215  // 2: JUMP to $3                       (insn2)
216  // 3: LD 42                            (insn1)
217  //    RET 42                           (insn0)
218  // 4: LD 42                            (insn3)
219  //    RET 42                           (insn3+)
220  CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42);
221  CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0);
222  CodeGen::Node insn2 = insn1;  // Implicit JUMP
223
224  // We explicitly duplicate instructions to test that they're merged.
225  CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42,
226                                        MakeInstruction(BPF_RET + BPF_K, 42));
227  EXPECT_EQ(insn2, insn3);
228
229  CodeGen::Node insn4 =
230      MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3);
231  CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4);
232
233  // Force a basic block that ends in neither a jump instruction nor a return
234  // instruction. It only contains "insn5". This exercises one of the less
235  // common code paths in the topo-sort algorithm.
236  // This also gives us a diamond-shaped pattern in our graph, which stresses
237  // another aspect of the topo-sort algorithm (namely, the ability to
238  // correctly count the incoming branches for subtrees that are not disjunct).
239  CodeGen::Node insn6 =
240      MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4);
241
242  RunTest(insn6);
243}
244
245TEST_F(ProgramTest, ConfusingTails) {
246  // This simple program demonstrates https://crbug.com/351103/
247  // The two "LOAD 0" instructions are blocks of their own. MergeTails() could
248  // be tempted to merge them since they are the same. However, they are
249  // not mergeable because they fall-through to non semantically equivalent
250  // blocks.
251  // Without the fix for this bug, this program should trigger the check in
252  // CompileAndCompare: the serialized graphs from the program and its compiled
253  // version will differ.
254  //
255  //  0) LOAD 1  // ???
256  //  1) if A == 0x1; then JMP 2 else JMP 3
257  //  2) LOAD 0  // System call number
258  //  3) if A == 0x2; then JMP 4 else JMP 5
259  //  4) LOAD 0  // System call number
260  //  5) if A == 0x1; then JMP 6 else JMP 7
261  //  6) RET 0
262  //  7) RET 1
263
264  CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
265  CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
266  CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
267  CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
268  CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
269  CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
270  CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
271  CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
272
273  RunTest(i0);
274}
275
276TEST_F(ProgramTest, ConfusingTailsBasic) {
277  // Without the fix for https://crbug.com/351103/, (see
278  // SampleProgramConfusingTails()), this would generate a cyclic graph and
279  // crash as the two "LOAD 0" instructions would get merged.
280  //
281  // 0) LOAD 1  // ???
282  // 1) if A == 0x1; then JMP 2 else JMP 3
283  // 2) LOAD 0  // System call number
284  // 3) if A == 0x2; then JMP 4 else JMP 5
285  // 4) LOAD 0  // System call number
286  // 5) RET 1
287
288  CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1);
289  CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5);
290  CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
291  CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3);
292  CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
293  CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
294
295  RunTest(i0);
296}
297
298TEST_F(ProgramTest, ConfusingTailsMergeable) {
299  // This is similar to SampleProgramConfusingTails(), except that
300  // instructions 2 and 4 are now RET instructions.
301  // In PointerCompare(), this exercises the path where two blocks are of the
302  // same length and identical and the last instruction is a JMP or RET, so the
303  // following blocks don't need to be looked at and the blocks are mergeable.
304  //
305  // 0) LOAD 1  // ???
306  // 1) if A == 0x1; then JMP 2 else JMP 3
307  // 2) RET 42
308  // 3) if A == 0x2; then JMP 4 else JMP 5
309  // 4) RET 42
310  // 5) if A == 0x1; then JMP 6 else JMP 7
311  // 6) RET 0
312  // 7) RET 1
313
314  CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1);
315  CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0);
316  CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7);
317  CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42);
318  CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5);
319  CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42);
320  CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3);
321  CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1);
322
323  RunTest(i0);
324}
325
326TEST_F(ProgramTest, InstructionFolding) {
327  // Check that simple instructions are folded as expected.
328  CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0);
329  EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
330  CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1);
331  EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0));
332  EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
333  EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1));
334
335  // Check that complex sequences are folded too.
336  CodeGen::Node c =
337      MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0,
338                      MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b));
339  EXPECT_EQ(c, MakeInstruction(
340                   BPF_LD + BPF_W + BPF_ABS, 0,
341                   MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)));
342
343  RunTest(c);
344}
345
346TEST_F(ProgramTest, FarBranches) {
347  // BPF instructions use 8-bit fields for branch offsets, which means
348  // branch targets must be within 255 instructions of the branch
349  // instruction. CodeGen abstracts away this detail by inserting jump
350  // instructions as needed, which we test here by generating programs
351  // that should trigger any interesting boundary conditions.
352
353  // Populate with 260 initial instruction nodes.
354  std::vector<CodeGen::Node> nodes;
355  nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
356  for (size_t i = 1; i < 260; ++i) {
357    nodes.push_back(
358        MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
359  }
360
361  // Exhaustively test branch offsets near BPF's limits.
362  for (size_t jt = 250; jt < 260; ++jt) {
363    for (size_t jf = 250; jf < 260; ++jf) {
364      nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0,
365                                      nodes.rbegin()[jt], nodes.rbegin()[jf]));
366      RunTest(nodes.back());
367    }
368  }
369}
370
371TEST_F(ProgramTest, JumpReuse) {
372  // As a code size optimization, we try to reuse jumps when possible
373  // instead of emitting new ones. Here we make sure that optimization
374  // is working as intended.
375  //
376  // NOTE: To simplify testing, we rely on implementation details
377  // about what CodeGen::Node values indicate (i.e., vector indices),
378  // but CodeGen users should treat them as opaque values.
379
380  // Populate with 260 initial instruction nodes.
381  std::vector<CodeGen::Node> nodes;
382  nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0));
383  for (size_t i = 1; i < 260; ++i) {
384    nodes.push_back(
385        MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back()));
386  }
387
388  // Branching to nodes[0] and nodes[1] should require 3 new
389  // instructions: two far jumps plus the branch itself.
390  CodeGen::Node one =
391      MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]);
392  EXPECT_EQ(nodes.back() + 3, one);  // XXX: Implementation detail!
393  RunTest(one);
394
395  // Branching again to the same target nodes should require only one
396  // new instruction, as we can reuse the previous branch's jumps.
397  CodeGen::Node two =
398      MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]);
399  EXPECT_EQ(one + 1, two);  // XXX: Implementation detail!
400  RunTest(two);
401}
402
403}  // namespace
404}  // namespace sandbox
405