1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "sandbox/linux/seccomp-bpf/codegen.h"
6
7#include <stdio.h>
8
9#include <set>
10
11#include "base/logging.h"
12#include "sandbox/linux/seccomp-bpf/basicblock.h"
13#include "sandbox/linux/seccomp-bpf/die.h"
14#include "sandbox/linux/seccomp-bpf/instruction.h"
15#include "sandbox/linux/seccomp-bpf/linux_seccomp.h"
16
17namespace {
18
19// Helper function for Traverse().
20void TraverseRecursively(std::set<sandbox::Instruction*>* visited,
21                         sandbox::Instruction* instruction) {
22  if (visited->find(instruction) == visited->end()) {
23    visited->insert(instruction);
24    switch (BPF_CLASS(instruction->code)) {
25      case BPF_JMP:
26        if (BPF_OP(instruction->code) != BPF_JA) {
27          TraverseRecursively(visited, instruction->jf_ptr);
28        }
29        TraverseRecursively(visited, instruction->jt_ptr);
30        break;
31      case BPF_RET:
32        break;
33      default:
34        TraverseRecursively(visited, instruction->next);
35        break;
36    }
37  }
38}
39
40}  // namespace
41
42namespace sandbox {
43
44CodeGen::CodeGen() : compiled_(false) {}
45
46CodeGen::~CodeGen() {
47  for (Instructions::iterator iter = instructions_.begin();
48       iter != instructions_.end();
49       ++iter) {
50    delete *iter;
51  }
52  for (BasicBlocks::iterator iter = basic_blocks_.begin();
53       iter != basic_blocks_.end();
54       ++iter) {
55    delete *iter;
56  }
57}
58
59void CodeGen::PrintProgram(const SandboxBPF::Program& program) {
60  for (SandboxBPF::Program::const_iterator iter = program.begin();
61       iter != program.end();
62       ++iter) {
63    int ip = (int)(iter - program.begin());
64    fprintf(stderr, "%3d) ", ip);
65    switch (BPF_CLASS(iter->code)) {
66      case BPF_LD:
67        if (iter->code == BPF_LD + BPF_W + BPF_ABS) {
68          fprintf(stderr, "LOAD %d  // ", (int)iter->k);
69          if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
70            fprintf(stderr, "System call number\n");
71          } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
72            fprintf(stderr, "Architecture\n");
73          } else if (iter->k ==
74                     offsetof(struct arch_seccomp_data, instruction_pointer)) {
75            fprintf(stderr, "Instruction pointer (LSB)\n");
76          } else if (iter->k ==
77                     offsetof(struct arch_seccomp_data, instruction_pointer) +
78                         4) {
79            fprintf(stderr, "Instruction pointer (MSB)\n");
80          } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
81                     iter->k < offsetof(struct arch_seccomp_data, args) + 48 &&
82                     (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 ==
83                         0) {
84            fprintf(
85                stderr,
86                "Argument %d (%cSB)\n",
87                (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8,
88                (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M'
89                                                                         : 'L');
90          } else {
91            fprintf(stderr, "???\n");
92          }
93        } else {
94          fprintf(stderr, "LOAD ???\n");
95        }
96        break;
97      case BPF_JMP:
98        if (BPF_OP(iter->code) == BPF_JA) {
99          fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
100        } else {
101          fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
102              BPF_OP(iter->code) == BPF_JSET ? "&" :
103              BPF_OP(iter->code) == BPF_JEQ ? "==" :
104              BPF_OP(iter->code) == BPF_JGE ? ">=" :
105              BPF_OP(iter->code) == BPF_JGT ? ">"  : "???",
106              (int)iter->k,
107              ip + iter->jt + 1, ip + iter->jf + 1);
108        }
109        break;
110      case BPF_RET:
111        fprintf(stderr, "RET 0x%x  // ", iter->k);
112        if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
113          fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
114        } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
115          fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
116        } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE) {
117          fprintf(stderr, "Trace #%d\n", iter->k & SECCOMP_RET_DATA);
118        } else if (iter->k == SECCOMP_RET_ALLOW) {
119          fprintf(stderr, "Allowed\n");
120        } else {
121          fprintf(stderr, "???\n");
122        }
123        break;
124      case BPF_ALU:
125        fprintf(stderr, BPF_OP(iter->code) == BPF_NEG
126            ? "A := -A\n" : "A := A %s 0x%x\n",
127            BPF_OP(iter->code) == BPF_ADD ? "+"  :
128            BPF_OP(iter->code) == BPF_SUB ? "-"  :
129            BPF_OP(iter->code) == BPF_MUL ? "*"  :
130            BPF_OP(iter->code) == BPF_DIV ? "/"  :
131            BPF_OP(iter->code) == BPF_MOD ? "%"  :
132            BPF_OP(iter->code) == BPF_OR  ? "|"  :
133            BPF_OP(iter->code) == BPF_XOR ? "^"  :
134            BPF_OP(iter->code) == BPF_AND ? "&"  :
135            BPF_OP(iter->code) == BPF_LSH ? "<<" :
136            BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
137            (int)iter->k);
138        break;
139      default:
140        fprintf(stderr, "???\n");
141        break;
142    }
143  }
144  return;
145}
146
147Instruction* CodeGen::MakeInstruction(uint16_t code,
148                                      uint32_t k,
149                                      Instruction* next) {
150  // We can handle non-jumping instructions and "always" jumps. Both of
151  // them are followed by exactly one "next" instruction.
152  // We allow callers to defer specifying "next", but then they must call
153  // "joinInstructions" later.
154  if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
155    SANDBOX_DIE(
156        "Must provide both \"true\" and \"false\" branch "
157        "for a BPF_JMP");
158  }
159  if (next && BPF_CLASS(code) == BPF_RET) {
160    SANDBOX_DIE("Cannot append instructions after a return statement");
161  }
162  if (BPF_CLASS(code) == BPF_JMP) {
163    // "Always" jumps use the "true" branch target, only.
164    Instruction* insn = new Instruction(code, 0, next, NULL);
165    instructions_.push_back(insn);
166    return insn;
167  } else {
168    // Non-jumping instructions do not use any of the branch targets.
169    Instruction* insn = new Instruction(code, k, next);
170    instructions_.push_back(insn);
171    return insn;
172  }
173}
174
175Instruction* CodeGen::MakeInstruction(uint16_t code,
176                                      uint32_t k,
177                                      Instruction* jt,
178                                      Instruction* jf) {
179  // We can handle all conditional jumps. They are followed by both a
180  // "true" and a "false" branch.
181  if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
182    SANDBOX_DIE("Expected a BPF_JMP instruction");
183  }
184  if (!jt || !jf) {
185    SANDBOX_DIE("Branches must jump to a valid instruction");
186  }
187  Instruction* insn = new Instruction(code, k, jt, jf);
188  instructions_.push_back(insn);
189  return insn;
190}
191
192void CodeGen::Traverse(Instruction* instruction,
193                       void (*fnc)(Instruction*, void*),
194                       void* aux) {
195  std::set<Instruction*> visited;
196  TraverseRecursively(&visited, instruction);
197  for (std::set<Instruction*>::const_iterator iter = visited.begin();
198       iter != visited.end();
199       ++iter) {
200    fnc(*iter, aux);
201  }
202}
203
204void CodeGen::FindBranchTargets(const Instruction& instructions,
205                                BranchTargets* branch_targets) {
206  // Follow all possible paths through the "instructions" graph and compute
207  // a list of branch targets. This will later be needed to compute the
208  // boundaries of basic blocks.
209  // We maintain a set of all instructions that we have previously seen. This
210  // set ultimately converges on all instructions in the program.
211  std::set<const Instruction*> seen_instructions;
212  Instructions stack;
213  for (const Instruction* insn = &instructions; insn;) {
214    seen_instructions.insert(insn);
215    if (BPF_CLASS(insn->code) == BPF_JMP) {
216      // Found a jump. Increase count of incoming edges for each of the jump
217      // targets.
218      ++(*branch_targets)[insn->jt_ptr];
219      if (BPF_OP(insn->code) != BPF_JA) {
220        ++(*branch_targets)[insn->jf_ptr];
221        stack.push_back(const_cast<Instruction*>(insn));
222      }
223      // Start a recursive decent for depth-first traversal.
224      if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
225        // We haven't seen the "true" branch yet. Traverse it now. We have
226        // already remembered the "false" branch on the stack and will
227        // traverse it later.
228        insn = insn->jt_ptr;
229        continue;
230      } else {
231        // Now try traversing the "false" branch.
232        insn = NULL;
233      }
234    } else {
235      // This is a non-jump instruction, just continue to the next instruction
236      // (if any). It's OK if "insn" becomes NULL when reaching a return
237      // instruction.
238      if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
239        SANDBOX_DIE(
240            "Internal compiler error; return instruction must be at "
241            "the end of the BPF program");
242      }
243      if (seen_instructions.find(insn->next) == seen_instructions.end()) {
244        insn = insn->next;
245      } else {
246        // We have seen this instruction before. That could happen if it is
247        // a branch target. No need to continue processing.
248        insn = NULL;
249      }
250    }
251    while (!insn && !stack.empty()) {
252      // We are done processing all the way to a leaf node, backtrack up the
253      // stack to any branches that we haven't processed yet. By definition,
254      // this has to be a "false" branch, as we always process the "true"
255      // branches right away.
256      insn = stack.back();
257      stack.pop_back();
258      if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
259        // We haven't seen the "false" branch yet. So, that's where we'll
260        // go now.
261        insn = insn->jf_ptr;
262      } else {
263        // We have seen both the "true" and the "false" branch, continue
264        // up the stack.
265        if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
266          SANDBOX_DIE(
267              "Internal compiler error; cannot find all "
268              "branch targets");
269        }
270        insn = NULL;
271      }
272    }
273  }
274  return;
275}
276
277BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {
278  // Iterate over all the instructions between "head" and "tail" and
279  // insert them into a new basic block.
280  BasicBlock* bb = new BasicBlock;
281  for (;; head = head->next) {
282    bb->instructions.push_back(head);
283    if (head == tail) {
284      break;
285    }
286    if (BPF_CLASS(head->code) == BPF_JMP) {
287      SANDBOX_DIE("Found a jump inside of a basic block");
288    }
289  }
290  basic_blocks_.push_back(bb);
291  return bb;
292}
293
294void CodeGen::AddBasicBlock(Instruction* head,
295                            Instruction* tail,
296                            const BranchTargets& branch_targets,
297                            TargetsToBlocks* basic_blocks,
298                            BasicBlock** firstBlock) {
299  // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
300  // has not been set before.
301  BranchTargets::const_iterator iter = branch_targets.find(head);
302  if ((iter == branch_targets.end()) != !*firstBlock ||
303      !*firstBlock != basic_blocks->empty()) {
304    SANDBOX_DIE(
305        "Only the very first basic block should have no "
306        "incoming jumps");
307  }
308  BasicBlock* bb = MakeBasicBlock(head, tail);
309  if (!*firstBlock) {
310    *firstBlock = bb;
311  }
312  (*basic_blocks)[head] = bb;
313  return;
314}
315
316BasicBlock* CodeGen::CutGraphIntoBasicBlocks(
317    Instruction* instructions,
318    const BranchTargets& branch_targets,
319    TargetsToBlocks* basic_blocks) {
320  // Textbook implementation of a basic block generator. All basic blocks
321  // start with a branch target and end with either a return statement or
322  // a jump (or are followed by an instruction that forms the beginning of a
323  // new block). Both conditional and "always" jumps are supported.
324  BasicBlock* first_block = NULL;
325  std::set<const Instruction*> seen_instructions;
326  Instructions stack;
327  Instruction* tail = NULL;
328  Instruction* head = instructions;
329  for (Instruction* insn = head; insn;) {
330    if (seen_instructions.find(insn) != seen_instructions.end()) {
331      // We somehow went in a circle. This should never be possible. Not even
332      // cyclic graphs are supposed to confuse us this much.
333      SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
334    }
335    seen_instructions.insert(insn);
336    if (tail && branch_targets.find(insn) != branch_targets.end()) {
337      // We reached a branch target. Start a new basic block (this means,
338      // flushing the previous basic block first).
339      AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
340      head = insn;
341    }
342    if (BPF_CLASS(insn->code) == BPF_JMP) {
343      // We reached a jump instruction, this completes our current basic
344      // block. Flush it and continue by traversing both the true and the
345      // false branch of the jump. We need to maintain a stack to do so.
346      AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
347      if (BPF_OP(insn->code) != BPF_JA) {
348        stack.push_back(insn->jf_ptr);
349      }
350      insn = insn->jt_ptr;
351
352      // If we are jumping to an instruction that we have previously
353      // processed, we are done with this branch. Continue by backtracking
354      // up the stack.
355      while (seen_instructions.find(insn) != seen_instructions.end()) {
356      backtracking:
357        if (stack.empty()) {
358          // We successfully traversed all reachable instructions.
359          return first_block;
360        } else {
361          // Going up the stack.
362          insn = stack.back();
363          stack.pop_back();
364        }
365      }
366      // Starting a new basic block.
367      tail = NULL;
368      head = insn;
369    } else {
370      // We found a non-jumping instruction, append it to current basic
371      // block.
372      tail = insn;
373      insn = insn->next;
374      if (!insn) {
375        // We reached a return statement, flush the current basic block and
376        // backtrack up the stack.
377        AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
378        goto backtracking;
379      }
380    }
381  }
382  return first_block;
383}
384
385// We define a comparator that inspects the sequence of instructions in our
386// basic block and any blocks referenced by this block. This function can be
387// used in a "less" comparator for the purpose of storing pointers to basic
388// blocks in STL containers; this gives an easy option to use STL to find
389// shared tail  sequences of basic blocks.
390static int PointerCompare(const BasicBlock* block1,
391                          const BasicBlock* block2,
392                          const TargetsToBlocks& blocks) {
393  // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
394  // If we are looking at the exact same block, this is trivial and we don't
395  // need to do a full comparison.
396  if (block1 == block2) {
397    return 0;
398  }
399
400  // We compare the sequence of instructions in both basic blocks.
401  const Instructions& insns1 = block1->instructions;
402  const Instructions& insns2 = block2->instructions;
403  // Basic blocks should never be empty.
404  CHECK(!insns1.empty());
405  CHECK(!insns2.empty());
406
407  Instructions::const_iterator iter1 = insns1.begin();
408  Instructions::const_iterator iter2 = insns2.begin();
409  for (;; ++iter1, ++iter2) {
410    // If we have reached the end of the sequence of instructions in one or
411    // both basic blocks, we know the relative ordering between the two blocks
412    // and can return.
413    if (iter1 == insns1.end() || iter2 == insns2.end()) {
414      if (iter1 != insns1.end()) {
415        return 1;
416      }
417      if (iter2 != insns2.end()) {
418        return -1;
419      }
420
421      // If the two blocks are the same length (and have elementwise-equal code
422      // and k fields) and their last instructions are neither a JMP nor a RET
423      // (which is the only way we can reach this point), then we must compare
424      // their successors.
425      Instruction* const insns1_last = insns1.back();
426      Instruction* const insns2_last = insns2.back();
427      CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP &&
428            BPF_CLASS(insns1_last->code) != BPF_RET);
429
430      // Non jumping instructions will always have a valid next instruction.
431      CHECK(insns1_last->next);
432      CHECK(insns2_last->next);
433      return PointerCompare(blocks.find(insns1_last->next)->second,
434                            blocks.find(insns2_last->next)->second,
435                            blocks);
436    }
437
438    // Compare the individual fields for both instructions.
439    const Instruction& insn1 = **iter1;
440    const Instruction& insn2 = **iter2;
441    if (insn1.code != insn2.code) {
442      return insn1.code - insn2.code;
443    }
444    if (insn1.k != insn2.k) {
445      return insn1.k - insn2.k;
446    }
447
448    // Sanity check: If we're looking at a JMP or RET instruction, by definition
449    // it should be the last instruction of the basic block.
450    if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) {
451      CHECK_EQ(insns1.back(), &insn1);
452      CHECK_EQ(insns2.back(), &insn2);
453    }
454
455    // RET instructions terminate execution, and only JMP instructions use the
456    // jt_ptr and jf_ptr fields.  Anything else can continue to the next
457    // instruction in the basic block.
458    if (BPF_CLASS(insn1.code) == BPF_RET) {
459      return 0;
460    } else if (BPF_CLASS(insn1.code) != BPF_JMP) {
461      continue;
462    }
463
464    // Recursively compare the "true" and "false" branches.
465    // A well-formed BPF program can't have any cycles, so we know
466    // that our recursive algorithm will ultimately terminate.
467    // In the unlikely event that the programmer made a mistake and
468    // went out of the way to give us a cyclic program, we will crash
469    // with a stack overflow. We are OK with that.
470    if (BPF_OP(insn1.code) != BPF_JA) {
471      int c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
472                             blocks.find(insn2.jf_ptr)->second,
473                             blocks);
474      if (c != 0) {
475        return c;
476      }
477    }
478    return PointerCompare(blocks.find(insn1.jt_ptr)->second,
479                          blocks.find(insn2.jt_ptr)->second,
480                          blocks);
481  }
482}
483
484void CodeGen::MergeTails(TargetsToBlocks* blocks) {
485  // We enter all of our basic blocks into a set using the BasicBlock::Less()
486  // comparator. This naturally results in blocks with identical tails of
487  // instructions to map to the same entry in the set. Whenever we discover
488  // that a particular chain of instructions is already in the set, we merge
489  // the basic blocks and update the pointer in the "blocks" map.
490  // Returns the number of unique basic blocks.
491  // N.B. We don't merge instructions on a granularity that is finer than
492  //      a basic block. In practice, this is sufficiently rare that we don't
493  //      incur a big cost.
494  //      Similarly, we currently don't merge anything other than tails. In
495  //      the future, we might decide to revisit this decision and attempt to
496  //      merge arbitrary sub-sequences of instructions.
497  BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
498  typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;
499  Set seen_basic_blocks(less);
500  for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();
501       ++iter) {
502    BasicBlock* bb = iter->second;
503    Set::const_iterator entry = seen_basic_blocks.find(bb);
504    if (entry == seen_basic_blocks.end()) {
505      // This is the first time we see this particular sequence of
506      // instructions. Enter the basic block into the set of known
507      // basic blocks.
508      seen_basic_blocks.insert(bb);
509    } else {
510      // We have previously seen another basic block that defines the same
511      // sequence of instructions. Merge the two blocks and update the
512      // pointer in the "blocks" map.
513      iter->second = *entry;
514    }
515  }
516}
517
518void CodeGen::ComputeIncomingBranches(BasicBlock* block,
519                                      const TargetsToBlocks& targets_to_blocks,
520                                      IncomingBranches* incoming_branches) {
521  // We increment the number of incoming branches each time we encounter a
522  // basic block. But we only traverse recursively the very first time we
523  // encounter a new block. This is necessary to make topological sorting
524  // work correctly.
525  if (++(*incoming_branches)[block] == 1) {
526    Instruction* last_insn = block->instructions.back();
527    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
528      ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,
529                              targets_to_blocks,
530                              incoming_branches);
531      if (BPF_OP(last_insn->code) != BPF_JA) {
532        ComputeIncomingBranches(
533            targets_to_blocks.find(last_insn->jf_ptr)->second,
534            targets_to_blocks,
535            incoming_branches);
536      }
537    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
538      ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
539                              targets_to_blocks,
540                              incoming_branches);
541    }
542  }
543}
544
545void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,
546                                  const TargetsToBlocks& blocks,
547                                  BasicBlocks* basic_blocks) {
548  // Textbook implementation of a toposort. We keep looking for basic blocks
549  // that don't have any incoming branches (initially, this is just the
550  // "first_block") and add them to the topologically sorted list of
551  // "basic_blocks". As we do so, we remove outgoing branches. This potentially
552  // ends up making our descendants eligible for the sorted list. The
553  // sorting algorithm terminates when there are no more basic blocks that have
554  // no incoming branches. If we didn't move all blocks from the set of
555  // "unordered_blocks" to the sorted list of "basic_blocks", there must have
556  // been a cyclic dependency. This should never happen in a BPF program, as
557  // well-formed BPF programs only ever have forward branches.
558  IncomingBranches unordered_blocks;
559  ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
560
561  std::set<BasicBlock*> heads;
562  for (;;) {
563    // Move block from "unordered_blocks" to "basic_blocks".
564    basic_blocks->push_back(first_block);
565
566    // Inspect last instruction in the basic block. This is typically either a
567    // jump or a return statement. But it could also be a "normal" instruction
568    // that is followed by a jump target.
569    Instruction* last_insn = first_block->instructions.back();
570    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
571      // Remove outgoing branches. This might end up moving our descendants
572      // into set of "head" nodes that no longer have any incoming branches.
573      TargetsToBlocks::const_iterator iter;
574      if (BPF_OP(last_insn->code) != BPF_JA) {
575        iter = blocks.find(last_insn->jf_ptr);
576        if (!--unordered_blocks[iter->second]) {
577          heads.insert(iter->second);
578        }
579      }
580      iter = blocks.find(last_insn->jt_ptr);
581      if (!--unordered_blocks[iter->second]) {
582        first_block = iter->second;
583        continue;
584      }
585    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
586      // We encountered an instruction that doesn't change code flow. Try to
587      // pick the next "first_block" from "last_insn->next", if possible.
588      TargetsToBlocks::const_iterator iter;
589      iter = blocks.find(last_insn->next);
590      if (!--unordered_blocks[iter->second]) {
591        first_block = iter->second;
592        continue;
593      } else {
594        // Our basic block is supposed to be followed by "last_insn->next",
595        // but dependencies prevent this from happening. Insert a BPF_JA
596        // instruction to correct the code flow.
597        Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);
598        first_block->instructions.push_back(ja);
599        last_insn->next = ja;
600      }
601    }
602    if (heads.empty()) {
603      if (unordered_blocks.size() != basic_blocks->size()) {
604        SANDBOX_DIE("Internal compiler error; cyclic graph detected");
605      }
606      return;
607    }
608    // Proceed by picking an arbitrary node from the set of basic blocks that
609    // do not have any incoming branches.
610    first_block = *heads.begin();
611    heads.erase(heads.begin());
612  }
613}
614
615void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,
616                                   const TargetsToBlocks& targets_to_blocks) {
617  // While we previously used pointers in jt_ptr and jf_ptr to link jump
618  // instructions to their targets, we now convert these jumps to relative
619  // jumps that are suitable for loading the BPF program into the kernel.
620  int offset = 0;
621
622  // Since we just completed a toposort, all jump targets are guaranteed to
623  // go forward. This means, iterating over the basic blocks in reverse makes
624  // it trivial to compute the correct offsets.
625  BasicBlock* bb = NULL;
626  BasicBlock* last_bb = NULL;
627  for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
628       iter != basic_blocks->rend();
629       ++iter) {
630    last_bb = bb;
631    bb = *iter;
632    Instruction* insn = bb->instructions.back();
633    if (BPF_CLASS(insn->code) == BPF_JMP) {
634      // Basic block ended in a jump instruction. We can now compute the
635      // appropriate offsets.
636      if (BPF_OP(insn->code) == BPF_JA) {
637        // "Always" jumps use the 32bit "k" field for the offset, instead
638        // of the 8bit "jt" and "jf" fields.
639        int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
640        insn->k = jmp;
641        insn->jt = insn->jf = 0;
642      } else {
643        // The offset computations for conditional jumps are just the same
644        // as for "always" jumps.
645        int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
646        int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;
647
648        // There is an added complication, because conditional relative jumps
649        // can only jump at most 255 instructions forward. If we have to jump
650        // further, insert an extra "always" jump.
651        Instructions::size_type jmp = bb->instructions.size();
652        if (jt > 255 || (jt == 255 && jf > 255)) {
653          Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);
654          bb->instructions.push_back(ja);
655          ja->k = jt;
656          ja->jt = ja->jf = 0;
657
658          // The newly inserted "always" jump, of course, requires us to adjust
659          // the jump targets in the original conditional jump.
660          jt = 0;
661          ++jf;
662        }
663        if (jf > 255) {
664          Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);
665          bb->instructions.insert(bb->instructions.begin() + jmp, ja);
666          ja->k = jf;
667          ja->jt = ja->jf = 0;
668
669          // Again, we have to adjust the jump targets in the original
670          // conditional jump.
671          ++jt;
672          jf = 0;
673        }
674
675        // Now we can finally set the relative jump targets in the conditional
676        // jump instruction. Afterwards, we must no longer access the jt_ptr
677        // and jf_ptr fields.
678        insn->jt = jt;
679        insn->jf = jf;
680      }
681    } else if (BPF_CLASS(insn->code) != BPF_RET &&
682               targets_to_blocks.find(insn->next)->second != last_bb) {
683      SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
684    }
685
686    // Proceed to next basic block.
687    offset += bb->instructions.size();
688    bb->offset = offset;
689  }
690  return;
691}
692
693void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
694                                     SandboxBPF::Program* program) {
695  // Our basic blocks have been sorted and relative jump offsets have been
696  // computed. The last remaining step is for all the instructions in our
697  // basic blocks to be concatenated into a BPF program.
698  program->clear();
699  for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
700       bb_iter != basic_blocks.end();
701       ++bb_iter) {
702    const BasicBlock& bb = **bb_iter;
703    for (Instructions::const_iterator insn_iter = bb.instructions.begin();
704         insn_iter != bb.instructions.end();
705         ++insn_iter) {
706      const Instruction& insn = **insn_iter;
707      program->push_back(
708          (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});
709    }
710  }
711  return;
712}
713
714void CodeGen::Compile(Instruction* instructions, SandboxBPF::Program* program) {
715  if (compiled_) {
716    SANDBOX_DIE(
717        "Cannot call Compile() multiple times. Create a new code "
718        "generator instead");
719  }
720  compiled_ = true;
721
722  BranchTargets branch_targets;
723  FindBranchTargets(*instructions, &branch_targets);
724  TargetsToBlocks all_blocks;
725  BasicBlock* first_block =
726      CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
727  MergeTails(&all_blocks);
728  BasicBlocks basic_blocks;
729  TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
730  ComputeRelativeJumps(&basic_blocks, all_blocks);
731  ConcatenateBasicBlocks(basic_blocks, program);
732  return;
733}
734
735}  // namespace sandbox
736