1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <algorithm>
6#include <set>
7#include <vector>
8
9#include "sandbox/linux/seccomp-bpf/codegen.h"
10#include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
11#include "sandbox/linux/tests/unit_tests.h"
12
13namespace playground2 {
14
15class SandboxUnittestHelper : public Sandbox {
16 public:
17  typedef Sandbox::Program Program;
18};
19
20// We want to access some of the private methods in the code generator. We
21// do so by defining a "friend" that makes these methods public for us.
22class CodeGenUnittestHelper : public CodeGen {
23 public:
24  void FindBranchTargets(const Instruction& instructions,
25                         BranchTargets *branch_targets) {
26    CodeGen::FindBranchTargets(instructions, branch_targets);
27  }
28
29  BasicBlock *CutGraphIntoBasicBlocks(Instruction *insns,
30                                      const BranchTargets& branch_targets,
31                                      TargetsToBlocks *blocks) {
32    return CodeGen::CutGraphIntoBasicBlocks(insns, branch_targets, blocks);
33  }
34
35  void MergeTails(TargetsToBlocks *blocks) {
36    CodeGen::MergeTails(blocks);
37  }
38};
39
40enum { NO_FLAGS            = 0x0000,
41       HAS_MERGEABLE_TAILS = 0x0001,
42};
43
44Instruction *SampleProgramOneInstruction(CodeGen *codegen, int *flags) {
45  // Create the most basic valid BPF program:
46  //    RET ERR_ALLOWED
47  *flags = NO_FLAGS;
48  return codegen->MakeInstruction(BPF_RET+BPF_K,
49                                  ErrorCode(ErrorCode::ERR_ALLOWED));
50}
51
52Instruction *SampleProgramSimpleBranch(CodeGen *codegen, int *flags) {
53  // Create a program with a single branch:
54  //    JUMP if eq 42 then $0 else $1
55  // 0: RET EPERM
56  // 1: RET ERR_ALLOWED
57  *flags = NO_FLAGS;
58  return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
59         codegen->MakeInstruction(BPF_RET+BPF_K,
60                                  ErrorCode(EPERM)),
61         codegen->MakeInstruction(BPF_RET+BPF_K,
62                                  ErrorCode(ErrorCode::ERR_ALLOWED)));
63}
64
65Instruction *SampleProgramAtypicalBranch(CodeGen *codegen, int *flags) {
66  // Create a program with a single branch:
67  //    JUMP if eq 42 then $0 else $0
68  // 0: RET ERR_ALLOWED
69
70  // N.B.: As the instructions in both sides of the branch are already
71  //       the same object, we do not actually have any "mergeable" branches.
72  //       This needs to be reflected in our choice of "flags".
73  *flags = NO_FLAGS;
74
75  Instruction *ret =
76    codegen->MakeInstruction(BPF_RET+BPF_K,
77                             ErrorCode(ErrorCode::ERR_ALLOWED));
78  return codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42, ret, ret);
79}
80
81Instruction *SampleProgramComplex(CodeGen *codegen, int *flags) {
82  // Creates a basic BPF program that we'll use to test some of the code:
83  //    JUMP if eq 42 the $0 else $1     (insn6)
84  // 0: LD 23                            (insn5)
85  // 1: JUMP if eq 42 then $2 else $4    (insn4)
86  // 2: JUMP to $3                       (insn1)
87  // 3: LD 42                            (insn0)
88  //    RET ErrorCode(42)                (insn2)
89  // 4: LD 42                            (insn3)
90  //    RET ErrorCode(42)                (insn3+)
91  *flags = HAS_MERGEABLE_TAILS;
92
93  Instruction *insn0 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42);
94  SANDBOX_ASSERT(insn0);
95  SANDBOX_ASSERT(insn0->code == BPF_LD+BPF_W+BPF_ABS);
96  SANDBOX_ASSERT(insn0->k == 42);
97  SANDBOX_ASSERT(insn0->next == NULL);
98
99  Instruction *insn1 = codegen->MakeInstruction(BPF_JMP+BPF_JA, 0, insn0);
100  SANDBOX_ASSERT(insn1);
101  SANDBOX_ASSERT(insn1->code == BPF_JMP+BPF_JA);
102  SANDBOX_ASSERT(insn1->jt_ptr == insn0);
103
104  Instruction *insn2 = codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42));
105  SANDBOX_ASSERT(insn2);
106  SANDBOX_ASSERT(insn2->code == BPF_RET+BPF_K);
107  SANDBOX_ASSERT(insn2->next == NULL);
108
109  // We explicitly duplicate instructions so that MergeTails() can coalesce
110  // them later.
111  Instruction *insn3 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS, 42,
112    codegen->MakeInstruction(BPF_RET+BPF_K, ErrorCode(42)));
113
114  Instruction *insn4 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
115                                                insn1, insn3);
116  SANDBOX_ASSERT(insn4);
117  SANDBOX_ASSERT(insn4->code == BPF_JMP+BPF_JEQ+BPF_K);
118  SANDBOX_ASSERT(insn4->k == 42);
119  SANDBOX_ASSERT(insn4->jt_ptr == insn1);
120  SANDBOX_ASSERT(insn4->jf_ptr == insn3);
121
122  codegen->JoinInstructions(insn0, insn2);
123  SANDBOX_ASSERT(insn0->next == insn2);
124
125  Instruction *insn5 = codegen->MakeInstruction(BPF_LD+BPF_W+BPF_ABS,
126                                                23, insn4);
127  SANDBOX_ASSERT(insn5);
128  SANDBOX_ASSERT(insn5->code == BPF_LD+BPF_W+BPF_ABS);
129  SANDBOX_ASSERT(insn5->k == 23);
130  SANDBOX_ASSERT(insn5->next == insn4);
131
132  // Force a basic block that ends in neither a jump instruction nor a return
133  // instruction. It only contains "insn5". This exercises one of the less
134  // common code paths in the topo-sort algorithm.
135  // This also gives us a diamond-shaped pattern in our graph, which stresses
136  // another aspect of the topo-sort algorithm (namely, the ability to
137  // correctly count the incoming branches for subtrees that are not disjunct).
138  Instruction *insn6 = codegen->MakeInstruction(BPF_JMP+BPF_JEQ+BPF_K, 42,
139                                                insn5, insn4);
140
141  return insn6;
142}
143
144void ForAllPrograms(void (*test)(CodeGenUnittestHelper *, Instruction *, int)){
145  Instruction *(*function_table[])(CodeGen *codegen, int *flags) = {
146    SampleProgramOneInstruction,
147    SampleProgramSimpleBranch,
148    SampleProgramAtypicalBranch,
149    SampleProgramComplex,
150  };
151
152  for (size_t i = 0; i < arraysize(function_table); ++i) {
153    CodeGenUnittestHelper codegen;
154    int flags = NO_FLAGS;
155    Instruction *prg = function_table[i](&codegen, &flags);
156    test(&codegen, prg, flags);
157  }
158}
159
160void MakeInstruction(CodeGenUnittestHelper *codegen,
161                     Instruction *program, int) {
162  // Nothing to do here
163}
164
165SANDBOX_TEST(CodeGen, MakeInstruction) {
166  ForAllPrograms(MakeInstruction);
167}
168
169void FindBranchTargets(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
170  BranchTargets branch_targets;
171  codegen->FindBranchTargets(*prg, &branch_targets);
172
173  // Verifying the general properties that should be true for every
174  // well-formed BPF program.
175  // Perform a depth-first traversal of the BPF program an verify that all
176  // targets of BPF_JMP instructions are represented in the "branch_targets".
177  // At the same time, compute a set of both the branch targets and all the
178  // instructions in the program.
179  std::vector<Instruction *> stack;
180  std::set<Instruction *> all_instructions;
181  std::set<Instruction *> target_instructions;
182  BranchTargets::const_iterator end = branch_targets.end();
183  for (Instruction *insn = prg;;) {
184    all_instructions.insert(insn);
185    if (BPF_CLASS(insn->code) == BPF_JMP) {
186      target_instructions.insert(insn->jt_ptr);
187      SANDBOX_ASSERT(insn->jt_ptr != NULL);
188      SANDBOX_ASSERT(branch_targets.find(insn->jt_ptr) != end);
189      if (BPF_OP(insn->code) != BPF_JA) {
190        target_instructions.insert(insn->jf_ptr);
191        SANDBOX_ASSERT(insn->jf_ptr != NULL);
192        SANDBOX_ASSERT(branch_targets.find(insn->jf_ptr) != end);
193        stack.push_back(insn->jf_ptr);
194      }
195      insn = insn->jt_ptr;
196    } else if (BPF_CLASS(insn->code) == BPF_RET) {
197      SANDBOX_ASSERT(insn->next == NULL);
198      if (stack.empty()) {
199        break;
200      }
201      insn = stack.back();
202      stack.pop_back();
203    } else {
204      SANDBOX_ASSERT(insn->next != NULL);
205      insn = insn->next;
206    }
207  }
208  SANDBOX_ASSERT(target_instructions.size() == branch_targets.size());
209
210  // We can now subtract the set of the branch targets from the set of all
211  // instructions. This gives us a set with the instructions that nobody
212  // ever jumps to. Verify that they are no included in the
213  // "branch_targets" that FindBranchTargets() computed for us.
214  Instructions non_target_instructions(all_instructions.size() -
215                                       target_instructions.size());
216  set_difference(all_instructions.begin(), all_instructions.end(),
217                 target_instructions.begin(), target_instructions.end(),
218                 non_target_instructions.begin());
219  for (Instructions::const_iterator iter = non_target_instructions.begin();
220       iter != non_target_instructions.end();
221       ++iter) {
222    SANDBOX_ASSERT(branch_targets.find(*iter) == end);
223  }
224}
225
226SANDBOX_TEST(CodeGen, FindBranchTargets) {
227  ForAllPrograms(FindBranchTargets);
228}
229
230void CutGraphIntoBasicBlocks(CodeGenUnittestHelper *codegen,
231                             Instruction *prg, int) {
232  BranchTargets branch_targets;
233  codegen->FindBranchTargets(*prg, &branch_targets);
234  TargetsToBlocks all_blocks;
235  BasicBlock *first_block =
236    codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
237  SANDBOX_ASSERT(first_block != NULL);
238  SANDBOX_ASSERT(first_block->instructions.size() > 0);
239  Instruction *first_insn = first_block->instructions[0];
240
241  // Basic blocks are supposed to start with a branch target and end with
242  // either a jump or a return instruction. It can also end, if the next
243  // instruction forms the beginning of a new basic block. There should be
244  // no other jumps or return instructions in the middle of a basic block.
245  for (TargetsToBlocks::const_iterator bb_iter = all_blocks.begin();
246       bb_iter != all_blocks.end();
247       ++bb_iter) {
248    BasicBlock *bb = bb_iter->second;
249    SANDBOX_ASSERT(bb != NULL);
250    SANDBOX_ASSERT(bb->instructions.size() > 0);
251    Instruction *insn = bb->instructions[0];
252    SANDBOX_ASSERT(insn == first_insn ||
253                   branch_targets.find(insn) != branch_targets.end());
254    for (Instructions::const_iterator insn_iter = bb->instructions.begin();;){
255      insn = *insn_iter;
256      if (++insn_iter != bb->instructions.end()) {
257        SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_JMP);
258        SANDBOX_ASSERT(BPF_CLASS(insn->code) != BPF_RET);
259      } else {
260        SANDBOX_ASSERT(BPF_CLASS(insn->code) == BPF_JMP ||
261                       BPF_CLASS(insn->code) == BPF_RET ||
262                       branch_targets.find(insn->next) !=
263                         branch_targets.end());
264        break;
265      }
266      SANDBOX_ASSERT(branch_targets.find(*insn_iter) == branch_targets.end());
267    }
268  }
269}
270
271SANDBOX_TEST(CodeGen, CutGraphIntoBasicBlocks) {
272  ForAllPrograms(CutGraphIntoBasicBlocks);
273}
274
275void MergeTails(CodeGenUnittestHelper *codegen, Instruction *prg,
276                int flags) {
277  BranchTargets branch_targets;
278  codegen->FindBranchTargets(*prg, &branch_targets);
279  TargetsToBlocks all_blocks;
280  BasicBlock *first_block =
281    codegen->CutGraphIntoBasicBlocks(prg, branch_targets, &all_blocks);
282
283  // The shape of our graph and thus the function of our program should
284  // still be unchanged after we run MergeTails(). We verify this by
285  // serializing the graph and verifying that it is still the same.
286  // We also verify that at least some of the edges changed because of
287  // tail merging.
288  std::string graph[2];
289  std::string edges[2];
290
291  // The loop executes twice. After the first run, we call MergeTails() on
292  // our graph.
293  for (int i = 0;;) {
294    // Traverse the entire program in depth-first order.
295    std::vector<BasicBlock *> stack;
296    for (BasicBlock *bb = first_block;;) {
297      // Serialize the instructions in this basic block. In general, we only
298      // need to serialize "code" and "k"; except for a BPF_JA instruction
299      // where "k" isn't set.
300      // The stream of instructions should be unchanged after MergeTails().
301      for (Instructions::const_iterator iter = bb->instructions.begin();
302           iter != bb->instructions.end();
303           ++iter) {
304        graph[i].append(reinterpret_cast<char *>(&(*iter)->code),
305                        sizeof((*iter)->code));
306        if (BPF_CLASS((*iter)->code) != BPF_JMP ||
307            BPF_OP((*iter)->code) != BPF_JA) {
308          graph[i].append(reinterpret_cast<char *>(&(*iter)->k),
309                          sizeof((*iter)->k));
310        }
311      }
312
313      // Also serialize the addresses the basic blocks as we encounter them.
314      // This will change as basic blocks are coalesed by MergeTails().
315      edges[i].append(reinterpret_cast<char *>(&bb), sizeof(bb));
316
317      // Depth-first traversal of the graph. We only ever need to look at the
318      // very last instruction in the basic block, as that is the only one that
319      // can change code flow.
320      Instruction *insn = bb->instructions.back();
321      if (BPF_CLASS(insn->code) == BPF_JMP) {
322        // For jump instructions, we need to remember the "false" branch while
323        // traversing the "true" branch. This is not necessary for BPF_JA which
324        // only has a single branch.
325        if (BPF_OP(insn->code) != BPF_JA) {
326          stack.push_back(all_blocks[insn->jf_ptr]);
327        }
328        bb = all_blocks[insn->jt_ptr];
329      } else if (BPF_CLASS(insn->code) == BPF_RET) {
330        // After a BPF_RET, see if we need to back track.
331        if (stack.empty()) {
332          break;
333        }
334        bb = stack.back();
335        stack.pop_back();
336      } else {
337        // For "normal" instructions, just follow to the next basic block.
338        bb = all_blocks[insn->next];
339      }
340    }
341
342    // Our loop runs exactly two times.
343    if (++i > 1) {
344      break;
345    }
346    codegen->MergeTails(&all_blocks);
347  }
348  SANDBOX_ASSERT(graph[0] == graph[1]);
349  if (flags & HAS_MERGEABLE_TAILS) {
350    SANDBOX_ASSERT(edges[0] != edges[1]);
351  } else {
352    SANDBOX_ASSERT(edges[0] == edges[1]);
353  }
354}
355
356SANDBOX_TEST(CodeGen, MergeTails) {
357  ForAllPrograms(MergeTails);
358}
359
360void CompileAndCompare(CodeGenUnittestHelper *codegen, Instruction *prg, int) {
361  // TopoSortBasicBlocks() has internal checks that cause it to fail, if it
362  // detects a problem. Typically, if anything goes wrong, this looks to the
363  // TopoSort algorithm as if there had been cycles in the input data.
364  // This provides a pretty good unittest.
365  // We hand-crafted the program returned by SampleProgram() to exercise
366  // several of the more interesting code-paths. See comments in
367  // SampleProgram() for details.
368  // In addition to relying on the internal consistency checks in the compiler,
369  // we also serialize the graph and the resulting BPF program and compare
370  // them. With the exception of BPF_JA instructions that might have been
371  // inserted, both instruction streams should be equivalent.
372  // As Compile() modifies the instructions, we have to serialize the graph
373  // before calling Compile().
374  std::string source;
375  Instructions source_stack;
376  for (const Instruction *insn = prg, *next; insn; insn = next) {
377    if (BPF_CLASS(insn->code) == BPF_JMP) {
378      if (BPF_OP(insn->code) == BPF_JA) {
379        // Do not serialize BPF_JA instructions (see above).
380        next = insn->jt_ptr;
381        continue;
382      } else {
383        source_stack.push_back(insn->jf_ptr);
384        next = insn->jt_ptr;
385      }
386    } else if (BPF_CLASS(insn->code) == BPF_RET) {
387      if (source_stack.empty()) {
388        next = NULL;
389      } else {
390        next = source_stack.back();
391        source_stack.pop_back();
392      }
393    } else {
394      next = insn->next;
395    }
396    // Only serialize "code" and "k". That's all the information we need to
397    // compare. The rest of the information is encoded in the order of
398    // instructions.
399    source.append(reinterpret_cast<const char *>(&insn->code),
400                  sizeof(insn->code));
401    source.append(reinterpret_cast<const char *>(&insn->k),
402                  sizeof(insn->k));
403  }
404
405  // Compile the program
406  SandboxUnittestHelper::Program bpf;
407  codegen->Compile(prg, &bpf);
408
409  // Serialize the resulting BPF instructions.
410  std::string assembly;
411  std::vector<int> assembly_stack;
412  for (int idx = 0; idx >= 0;) {
413    SANDBOX_ASSERT(idx < (int)bpf.size());
414    struct sock_filter& insn = bpf[idx];
415    if (BPF_CLASS(insn.code) == BPF_JMP) {
416      if (BPF_OP(insn.code) == BPF_JA) {
417        // Do not serialize BPF_JA instructions (see above).
418        idx += insn.k + 1;
419        continue;
420      } else {
421        assembly_stack.push_back(idx + insn.jf + 1);
422        idx += insn.jt + 1;
423      }
424    } else if (BPF_CLASS(insn.code) == BPF_RET) {
425      if (assembly_stack.empty()) {
426        idx = -1;
427      } else {
428        idx = assembly_stack.back();
429        assembly_stack.pop_back();
430      }
431    } else {
432      ++idx;
433    }
434    // Serialize the same information that we serialized before compilation.
435    assembly.append(reinterpret_cast<char *>(&insn.code), sizeof(insn.code));
436    assembly.append(reinterpret_cast<char *>(&insn.k),    sizeof(insn.k));
437  }
438  SANDBOX_ASSERT(source == assembly);
439}
440
441SANDBOX_TEST(CodeGen, All) {
442  ForAllPrograms(CompileAndCompare);
443}
444
445}  // namespace playground2
446