1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <stdio.h>
6
7#include "sandbox/linux/seccomp-bpf/codegen.h"
8
9
10namespace {
11
12// Helper function for Traverse().
13void TraverseRecursively(std::set<playground2::Instruction *> *visited,
14                         playground2::Instruction *instruction) {
15  if (visited->find(instruction) == visited->end()) {
16    visited->insert(instruction);
17    switch (BPF_CLASS(instruction->code)) {
18    case BPF_JMP:
19      if (BPF_OP(instruction->code) != BPF_JA) {
20        TraverseRecursively(visited, instruction->jf_ptr);
21      }
22      TraverseRecursively(visited, instruction->jt_ptr);
23      break;
24    case BPF_RET:
25      break;
26    default:
27      TraverseRecursively(visited, instruction->next);
28      break;
29    }
30  }
31}
32
33}  // namespace
34
35namespace playground2 {
36
37CodeGen::CodeGen()
38    : compiled_(false) {
39}
40
41CodeGen::~CodeGen() {
42  for (Instructions::iterator iter = instructions_.begin();
43       iter != instructions_.end();
44       ++iter) {
45    delete *iter;
46  }
47  for (BasicBlocks::iterator iter = basic_blocks_.begin();
48       iter != basic_blocks_.end();
49       ++iter) {
50    delete *iter;
51  }
52}
53
54void CodeGen::PrintProgram(const Sandbox::Program& program) {
55  for (Sandbox::Program::const_iterator iter = program.begin();
56       iter != program.end();
57       ++iter) {
58    int ip = (int)(iter - program.begin());
59    fprintf(stderr, "%3d) ", ip);
60    switch (BPF_CLASS(iter->code)) {
61    case BPF_LD:
62      if (iter->code == BPF_LD+BPF_W+BPF_ABS) {
63        fprintf(stderr, "LOAD %d  // ", (int)iter->k);
64        if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
65          fprintf(stderr, "System call number\n");
66        } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
67          fprintf(stderr, "Architecture\n");
68        } else if (iter->k == offsetof(struct arch_seccomp_data,
69                                       instruction_pointer)) {
70          fprintf(stderr, "Instruction pointer (LSB)\n");
71        } else if (iter->k == offsetof(struct arch_seccomp_data,
72                                       instruction_pointer) + 4) {
73          fprintf(stderr, "Instruction pointer (MSB)\n");
74        } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
75                   iter->k <  offsetof(struct arch_seccomp_data, args)+48 &&
76                   (iter->k-offsetof(struct arch_seccomp_data, args))%4 == 0) {
77          fprintf(stderr, "Argument %d (%cSB)\n",
78                  (int)(iter->k-offsetof(struct arch_seccomp_data, args))/8,
79                  (iter->k-offsetof(struct arch_seccomp_data,
80                                    args))%8 ? 'M' : 'L');
81        } else {
82          fprintf(stderr, "???\n");
83        }
84      } else {
85        fprintf(stderr, "LOAD ???\n");
86      }
87      break;
88    case BPF_JMP:
89      if (BPF_OP(iter->code) == BPF_JA) {
90        fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
91      } else {
92        fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
93                BPF_OP(iter->code) == BPF_JSET ? "&" :
94                BPF_OP(iter->code) == BPF_JEQ ? "==" :
95                BPF_OP(iter->code) == BPF_JGE ? ">=" :
96                BPF_OP(iter->code) == BPF_JGT ? ">"  : "???",
97                (int)iter->k,
98                ip + iter->jt + 1, ip + iter->jf + 1);
99      }
100      break;
101    case BPF_RET:
102      fprintf(stderr, "RET 0x%x  // ", iter->k);
103      if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
104        fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
105      } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
106        fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
107      } else if (iter->k == SECCOMP_RET_ALLOW) {
108        fprintf(stderr, "Allowed\n");
109      } else {
110        fprintf(stderr, "???\n");
111      }
112      break;
113    case BPF_ALU:
114      fprintf(stderr, BPF_OP(iter->code) == BPF_NEG
115              ? "A := -A\n" : "A := A %s 0x%x\n",
116              BPF_OP(iter->code) == BPF_ADD ? "+"  :
117              BPF_OP(iter->code) == BPF_SUB ? "-"  :
118              BPF_OP(iter->code) == BPF_MUL ? "*"  :
119              BPF_OP(iter->code) == BPF_DIV ? "/"  :
120              BPF_OP(iter->code) == BPF_MOD ? "%"  :
121              BPF_OP(iter->code) == BPF_OR  ? "|"  :
122              BPF_OP(iter->code) == BPF_XOR ? "^"  :
123              BPF_OP(iter->code) == BPF_AND ? "&"  :
124              BPF_OP(iter->code) == BPF_LSH ? "<<" :
125              BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
126              (int)iter->k);
127      break;
128    default:
129      fprintf(stderr, "???\n");
130      break;
131    }
132  }
133  return;
134}
135
136Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k,
137                                      Instruction *next) {
138  // We can handle non-jumping instructions and "always" jumps. Both of
139  // them are followed by exactly one "next" instruction.
140  // We allow callers to defer specifying "next", but then they must call
141  // "joinInstructions" later.
142  if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
143    SANDBOX_DIE("Must provide both \"true\" and \"false\" branch "
144                "for a BPF_JMP");
145  }
146  if (next && BPF_CLASS(code) == BPF_RET) {
147    SANDBOX_DIE("Cannot append instructions after a return statement");
148  }
149  if (BPF_CLASS(code) == BPF_JMP) {
150    // "Always" jumps use the "true" branch target, only.
151    Instruction *insn = new Instruction(code, 0, next, NULL);
152    instructions_.push_back(insn);
153    return insn;
154  } else {
155    // Non-jumping instructions do not use any of the branch targets.
156    Instruction *insn = new Instruction(code, k, next);
157    instructions_.push_back(insn);
158    return insn;
159  }
160}
161
162Instruction *CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) {
163  if (BPF_CLASS(code) != BPF_RET) {
164    SANDBOX_DIE("ErrorCodes can only be used in return expressions");
165  }
166  if (err.error_type_ != ErrorCode::ET_SIMPLE &&
167      err.error_type_ != ErrorCode::ET_TRAP) {
168    SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program");
169  }
170  return MakeInstruction(code, err.err_);
171}
172
173Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k,
174                                      Instruction *jt, Instruction *jf) {
175  // We can handle all conditional jumps. They are followed by both a
176  // "true" and a "false" branch.
177  if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
178    SANDBOX_DIE("Expected a BPF_JMP instruction");
179  }
180  if (!jt && !jf) {
181    // We allow callers to defer specifying exactly one of the branch
182    // targets. It must then be set later by calling "JoinInstructions".
183    SANDBOX_DIE("Branches must jump to a valid instruction");
184  }
185  Instruction *insn = new Instruction(code, k, jt, jf);
186  instructions_.push_back(insn);
187  return insn;
188}
189
190void CodeGen::JoinInstructions(Instruction *head, Instruction *tail) {
191  // Merge two instructions, or set the branch target for an "always" jump.
192  // This function should be called, if the caller didn't initially provide
193  // a value for "next" when creating the instruction.
194  if (BPF_CLASS(head->code) == BPF_JMP) {
195    if (BPF_OP(head->code) == BPF_JA) {
196      if (head->jt_ptr) {
197        SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
198      }
199      head->jt_ptr = tail;
200    } else {
201      if (!head->jt_ptr && head->jf_ptr) {
202        head->jt_ptr = tail;
203      } else if (!head->jf_ptr && head->jt_ptr) {
204        head->jf_ptr = tail;
205      } else {
206        SANDBOX_DIE("Cannot append instructions after a jump");
207      }
208    }
209  } else if (BPF_CLASS(head->code) == BPF_RET) {
210    SANDBOX_DIE("Cannot append instructions after a return statement");
211  } else if (head->next) {
212    SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
213  } else {
214    head->next = tail;
215  }
216  return;
217}
218
219void CodeGen::Traverse(Instruction *instruction,
220                       void (*fnc)(Instruction *, void *), void *aux) {
221  std::set<Instruction *> visited;
222  TraverseRecursively(&visited, instruction);
223  for (std::set<Instruction *>::const_iterator iter = visited.begin();
224       iter != visited.end();
225       ++iter) {
226    fnc(*iter, aux);
227  }
228}
229
230void CodeGen::FindBranchTargets(const Instruction& instructions,
231                                BranchTargets *branch_targets) {
232  // Follow all possible paths through the "instructions" graph and compute
233  // a list of branch targets. This will later be needed to compute the
234  // boundaries of basic blocks.
235  // We maintain a set of all instructions that we have previously seen. This
236  // set ultimately converges on all instructions in the program.
237  std::set<const Instruction *> seen_instructions;
238  Instructions stack;
239  for (const Instruction *insn = &instructions; insn; ) {
240    seen_instructions.insert(insn);
241    if (BPF_CLASS(insn->code) == BPF_JMP) {
242      // Found a jump. Increase count of incoming edges for each of the jump
243      // targets.
244      ++(*branch_targets)[insn->jt_ptr];
245      if (BPF_OP(insn->code) != BPF_JA) {
246        ++(*branch_targets)[insn->jf_ptr];
247        stack.push_back(const_cast<Instruction *>(insn));
248      }
249      // Start a recursive decent for depth-first traversal.
250      if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
251        // We haven't seen the "true" branch yet. Traverse it now. We have
252        // already remembered the "false" branch on the stack and will
253        // traverse it later.
254        insn = insn->jt_ptr;
255        continue;
256      } else {
257        // Now try traversing the "false" branch.
258        insn = NULL;
259      }
260    } else {
261      // This is a non-jump instruction, just continue to the next instruction
262      // (if any). It's OK if "insn" becomes NULL when reaching a return
263      // instruction.
264      if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
265        SANDBOX_DIE("Internal compiler error; return instruction must be at "
266                    "the end of the BPF program");
267      }
268      if (seen_instructions.find(insn->next) == seen_instructions.end()) {
269        insn = insn->next;
270      } else {
271        // We have seen this instruction before. That could happen if it is
272        // a branch target. No need to continue processing.
273        insn = NULL;
274      }
275    }
276    while (!insn && !stack.empty()) {
277      // We are done processing all the way to a leaf node, backtrack up the
278      // stack to any branches that we haven't processed yet. By definition,
279      // this has to be a "false" branch, as we always process the "true"
280      // branches right away.
281      insn = stack.back();
282      stack.pop_back();
283      if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
284        // We haven't seen the "false" branch yet. So, that's where we'll
285        // go now.
286        insn = insn->jf_ptr;
287      } else {
288        // We have seen both the "true" and the "false" branch, continue
289        // up the stack.
290        if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
291          SANDBOX_DIE("Internal compiler error; cannot find all "
292                      "branch targets");
293        }
294        insn = NULL;
295      }
296    }
297  }
298  return;
299}
300
301BasicBlock *CodeGen::MakeBasicBlock(Instruction *head,
302                                    Instruction *tail) {
303  // Iterate over all the instructions between "head" and "tail" and
304  // insert them into a new basic block.
305  BasicBlock *bb = new BasicBlock;
306  for (;; head = head->next) {
307    bb->instructions.push_back(head);
308    if (head == tail) {
309      break;
310    }
311    if (BPF_CLASS(head->code) == BPF_JMP) {
312      SANDBOX_DIE("Found a jump inside of a basic block");
313    }
314  }
315  basic_blocks_.push_back(bb);
316  return bb;
317}
318
319void CodeGen::AddBasicBlock(Instruction *head,
320                            Instruction *tail,
321                            const BranchTargets& branch_targets,
322                            TargetsToBlocks *basic_blocks,
323                            BasicBlock **firstBlock) {
324  // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
325  // has not been set before.
326  BranchTargets::const_iterator iter = branch_targets.find(head);
327  if ((iter == branch_targets.end()) != !*firstBlock ||
328      !*firstBlock != basic_blocks->empty()) {
329    SANDBOX_DIE("Only the very first basic block should have no "
330                "incoming jumps");
331  }
332  BasicBlock *bb = MakeBasicBlock(head, tail);
333  if (!*firstBlock) {
334    *firstBlock = bb;
335  }
336  (*basic_blocks)[head] = bb;
337  return;
338}
339
340BasicBlock *CodeGen::CutGraphIntoBasicBlocks(
341    Instruction *instructions, const BranchTargets& branch_targets,
342    TargetsToBlocks *basic_blocks) {
343  // Textbook implementation of a basic block generator. All basic blocks
344  // start with a branch target and end with either a return statement or
345  // a jump (or are followed by an instruction that forms the beginning of a
346  // new block). Both conditional and "always" jumps are supported.
347  BasicBlock *first_block = NULL;
348  std::set<const Instruction *> seen_instructions;
349  Instructions stack;
350  Instruction *tail = NULL;
351  Instruction *head = instructions;
352  for (Instruction *insn = head; insn; ) {
353    if (seen_instructions.find(insn) != seen_instructions.end()) {
354      // We somehow went in a circle. This should never be possible. Not even
355      // cyclic graphs are supposed to confuse us this much.
356      SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
357    }
358    seen_instructions.insert(insn);
359    if (tail && branch_targets.find(insn) != branch_targets.end()) {
360      // We reached a branch target. Start a new basic block (this means,
361      // flushing the previous basic block first).
362      AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
363      head = insn;
364    }
365    if (BPF_CLASS(insn->code) == BPF_JMP) {
366      // We reached a jump instruction, this completes our current basic
367      // block. Flush it and continue by traversing both the true and the
368      // false branch of the jump. We need to maintain a stack to do so.
369      AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
370      if (BPF_OP(insn->code) != BPF_JA) {
371        stack.push_back(insn->jf_ptr);
372      }
373      insn = insn->jt_ptr;
374
375      // If we are jumping to an instruction that we have previously
376      // processed, we are done with this branch. Continue by backtracking
377      // up the stack.
378      while (seen_instructions.find(insn) != seen_instructions.end()) {
379      backtracking:
380        if (stack.empty()) {
381          // We successfully traversed all reachable instructions.
382          return first_block;
383        } else {
384          // Going up the stack.
385          insn = stack.back();
386          stack.pop_back();
387        }
388      }
389      // Starting a new basic block.
390      tail = NULL;
391      head = insn;
392    } else {
393      // We found a non-jumping instruction, append it to current basic
394      // block.
395      tail = insn;
396      insn = insn->next;
397      if (!insn) {
398        // We reached a return statement, flush the current basic block and
399        // backtrack up the stack.
400        AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
401        goto backtracking;
402      }
403    }
404  }
405  return first_block;
406}
407
408// We define a comparator that inspects the sequence of instructions in our
409// basic block and any blocks referenced by this block. This function can be
410// used in a "less" comparator for the purpose of storing pointers to basic
411// blocks in STL containers; this gives an easy option to use STL to find
412// shared tail  sequences of basic blocks.
413static int PointerCompare(const BasicBlock *block1, const BasicBlock *block2,
414                          const TargetsToBlocks& blocks) {
415  // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
416  // If we are looking at the exact same block, this is trivial and we don't
417  // need to do a full comparison.
418  if (block1 == block2) {
419    return 0;
420  }
421
422  // We compare the sequence of instructions in both basic blocks.
423  const Instructions& insns1 = block1->instructions;
424  const Instructions& insns2 = block2->instructions;
425  Instructions::const_iterator iter1 = insns1.begin();
426  Instructions::const_iterator iter2 = insns2.begin();
427  for (;; ++iter1, ++iter2) {
428    // If we have reached the end of the sequence of instructions in one or
429    // both basic blocks, we know the relative ordering between the two blocks
430    // and can return.
431    if (iter1 == insns1.end()) {
432      return iter2 == insns2.end() ? 0 : -1;
433    } else if (iter2 == insns2.end()) {
434      return 1;
435    }
436
437    // Compare the individual fields for both instructions.
438    const Instruction& insn1 = **iter1;
439    const Instruction& insn2 = **iter2;
440    if (insn1.code == insn2.code) {
441      if (insn1.k == insn2.k) {
442        // Only conditional jump instructions use the jt_ptr and jf_ptr
443        // fields.
444        if (BPF_CLASS(insn1.code) == BPF_JMP) {
445          if (BPF_OP(insn1.code) != BPF_JA) {
446            // Recursively compare the "true" and "false" branches.
447            // A well-formed BPF program can't have any cycles, so we know
448            // that our recursive algorithm will ultimately terminate.
449            // In the unlikely event that the programmer made a mistake and
450            // went out of the way to give us a cyclic program, we will crash
451            // with a stack overflow. We are OK with that.
452            int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
453                                   blocks.find(insn2.jt_ptr)->second,
454                                   blocks);
455            if (c == 0) {
456              c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
457                                 blocks.find(insn2.jf_ptr)->second,
458                                 blocks);
459              if (c == 0) {
460                continue;
461              } else {
462                return c;
463              }
464            } else {
465              return c;
466            }
467          } else {
468            int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
469                                   blocks.find(insn2.jt_ptr)->second,
470                                   blocks);
471            if (c == 0) {
472              continue;
473            } else {
474              return c;
475            }
476          }
477        } else {
478          continue;
479        }
480      } else {
481        return insn1.k - insn2.k;
482      }
483    } else {
484      return insn1.code - insn2.code;
485    }
486  }
487}
488
489void CodeGen::MergeTails(TargetsToBlocks *blocks) {
490  // We enter all of our basic blocks into a set using the BasicBlock::Less()
491  // comparator. This naturally results in blocks with identical tails of
492  // instructions to map to the same entry in the set. Whenever we discover
493  // that a particular chain of instructions is already in the set, we merge
494  // the basic blocks and update the pointer in the "blocks" map.
495  // Returns the number of unique basic blocks.
496  // N.B. We don't merge instructions on a granularity that is finer than
497  //      a basic block. In practice, this is sufficiently rare that we don't
498  //      incur a big cost.
499  //      Similarly, we currently don't merge anything other than tails. In
500  //      the future, we might decide to revisit this decision and attempt to
501  //      merge arbitrary sub-sequences of instructions.
502  BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
503  typedef std::set<BasicBlock *, BasicBlock::Less<TargetsToBlocks> > Set;
504  Set seen_basic_blocks(less);
505  for (TargetsToBlocks::iterator iter = blocks->begin();
506       iter != blocks->end();
507       ++iter) {
508    BasicBlock *bb = iter->second;
509    Set::const_iterator entry = seen_basic_blocks.find(bb);
510    if (entry == seen_basic_blocks.end()) {
511      // This is the first time we see this particular sequence of
512      // instructions. Enter the basic block into the set of known
513      // basic blocks.
514      seen_basic_blocks.insert(bb);
515    } else {
516      // We have previously seen another basic block that defines the same
517      // sequence of instructions. Merge the two blocks and update the
518      // pointer in the "blocks" map.
519      iter->second = *entry;
520    }
521  }
522}
523
524void CodeGen::ComputeIncomingBranches(BasicBlock *block,
525                                      const TargetsToBlocks& targets_to_blocks,
526                                      IncomingBranches *incoming_branches) {
527  // We increment the number of incoming branches each time we encounter a
528  // basic block. But we only traverse recursively the very first time we
529  // encounter a new block. This is necessary to make topological sorting
530  // work correctly.
531  if (++(*incoming_branches)[block] == 1) {
532    Instruction *last_insn = block->instructions.back();
533    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
534      ComputeIncomingBranches(
535                             targets_to_blocks.find(last_insn->jt_ptr)->second,
536                             targets_to_blocks, incoming_branches);
537      if (BPF_OP(last_insn->code) != BPF_JA) {
538        ComputeIncomingBranches(
539                             targets_to_blocks.find(last_insn->jf_ptr)->second,
540                             targets_to_blocks, incoming_branches);
541      }
542    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
543      ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
544                              targets_to_blocks, incoming_branches);
545    }
546  }
547}
548
549void CodeGen::TopoSortBasicBlocks(BasicBlock *first_block,
550                                  const TargetsToBlocks& blocks,
551                                  BasicBlocks *basic_blocks) {
552  // Textbook implementation of a toposort. We keep looking for basic blocks
553  // that don't have any incoming branches (initially, this is just the
554  // "first_block") and add them to the topologically sorted list of
555  // "basic_blocks". As we do so, we remove outgoing branches. This potentially
556  // ends up making our descendants eligible for the sorted list. The
557  // sorting algorithm terminates when there are no more basic blocks that have
558  // no incoming branches. If we didn't move all blocks from the set of
559  // "unordered_blocks" to the sorted list of "basic_blocks", there must have
560  // been a cyclic dependency. This should never happen in a BPF program, as
561  // well-formed BPF programs only ever have forward branches.
562  IncomingBranches unordered_blocks;
563  ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
564
565  std::set<BasicBlock *> heads;
566  for (;;) {
567    // Move block from "unordered_blocks" to "basic_blocks".
568    basic_blocks->push_back(first_block);
569
570    // Inspect last instruction in the basic block. This is typically either a
571    // jump or a return statement. But it could also be a "normal" instruction
572    // that is followed by a jump target.
573    Instruction *last_insn = first_block->instructions.back();
574    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
575      // Remove outgoing branches. This might end up moving our descendants
576      // into set of "head" nodes that no longer have any incoming branches.
577      TargetsToBlocks::const_iterator iter;
578      if (BPF_OP(last_insn->code) != BPF_JA) {
579        iter = blocks.find(last_insn->jf_ptr);
580        if (!--unordered_blocks[iter->second]) {
581          heads.insert(iter->second);
582        }
583      }
584      iter = blocks.find(last_insn->jt_ptr);
585      if (!--unordered_blocks[iter->second]) {
586        first_block = iter->second;
587        continue;
588      }
589    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
590      // We encountered an instruction that doesn't change code flow. Try to
591      // pick the next "first_block" from "last_insn->next", if possible.
592      TargetsToBlocks::const_iterator iter;
593      iter = blocks.find(last_insn->next);
594      if (!--unordered_blocks[iter->second]) {
595        first_block = iter->second;
596        continue;
597      } else {
598        // Our basic block is supposed to be followed by "last_insn->next",
599        // but dependencies prevent this from happening. Insert a BPF_JA
600        // instruction to correct the code flow.
601        Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, last_insn->next);
602        first_block->instructions.push_back(ja);
603        last_insn->next = ja;
604      }
605    }
606    if (heads.empty()) {
607      if (unordered_blocks.size() != basic_blocks->size()) {
608        SANDBOX_DIE("Internal compiler error; cyclic graph detected");
609      }
610      return;
611    }
612    // Proceed by picking an arbitrary node from the set of basic blocks that
613    // do not have any incoming branches.
614    first_block = *heads.begin();
615    heads.erase(heads.begin());
616  }
617}
618
619void CodeGen::ComputeRelativeJumps(BasicBlocks *basic_blocks,
620                                   const TargetsToBlocks& targets_to_blocks) {
621  // While we previously used pointers in jt_ptr and jf_ptr to link jump
622  // instructions to their targets, we now convert these jumps to relative
623  // jumps that are suitable for loading the BPF program into the kernel.
624  int offset = 0;
625
626  // Since we just completed a toposort, all jump targets are guaranteed to
627  // go forward. This means, iterating over the basic blocks in reverse makes
628  // it trivial to compute the correct offsets.
629  BasicBlock *bb = NULL;
630  BasicBlock *last_bb = NULL;
631  for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
632       iter != basic_blocks->rend();
633       ++iter) {
634    last_bb = bb;
635    bb = *iter;
636    Instruction *insn = bb->instructions.back();
637    if (BPF_CLASS(insn->code) == BPF_JMP) {
638      // Basic block ended in a jump instruction. We can now compute the
639      // appropriate offsets.
640      if (BPF_OP(insn->code) == BPF_JA) {
641        // "Always" jumps use the 32bit "k" field for the offset, instead
642        // of the 8bit "jt" and "jf" fields.
643        int jmp =
644          offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
645        insn->k  = jmp;
646        insn->jt = insn->jf = 0;
647      } else {
648        // The offset computations for conditional jumps are just the same
649        // as for "always" jumps.
650        int jt = offset-targets_to_blocks.find(insn->jt_ptr)->second->offset;
651        int jf = offset-targets_to_blocks.find(insn->jf_ptr)->second->offset;
652
653        // There is an added complication, because conditional relative jumps
654        // can only jump at most 255 instructions forward. If we have to jump
655        // further, insert an extra "always" jump.
656        Instructions::size_type jmp = bb->instructions.size();
657        if (jt > 255 || (jt == 255 && jf > 255)) {
658          Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jt_ptr);
659          bb->instructions.push_back(ja);
660          ja->k  = jt;
661          ja->jt = ja->jf = 0;
662
663          // The newly inserted "always" jump, of course, requires us to adjust
664          // the jump targets in the original conditional jump.
665          jt = 0;
666          ++jf;
667        }
668        if (jf > 255) {
669          Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jf_ptr);
670          bb->instructions.insert(bb->instructions.begin() + jmp, ja);
671          ja->k  = jf;
672          ja->jt = ja->jf = 0;
673
674          // Again, we have to adjust the jump targets in the original
675          // conditional jump.
676          ++jt;
677          jf = 0;
678        }
679
680        // Now we can finally set the relative jump targets in the conditional
681        // jump instruction. Afterwards, we must no longer access the jt_ptr
682        // and jf_ptr fields.
683        insn->jt = jt;
684        insn->jf = jf;
685      }
686    } else if (BPF_CLASS(insn->code) != BPF_RET &&
687               targets_to_blocks.find(insn->next)->second != last_bb) {
688      SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
689    }
690
691    // Proceed to next basic block.
692    offset += bb->instructions.size();
693    bb->offset = offset;
694  }
695  return;
696}
697
698void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
699                                     Sandbox::Program *program) {
700  // Our basic blocks have been sorted and relative jump offsets have been
701  // computed. The last remaining step is for all the instructions in our
702  // basic blocks to be concatenated into a BPF program.
703  program->clear();
704  for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
705       bb_iter != basic_blocks.end();
706       ++bb_iter) {
707    const BasicBlock& bb = **bb_iter;
708    for (Instructions::const_iterator insn_iter = bb.instructions.begin();
709         insn_iter != bb.instructions.end();
710         ++insn_iter) {
711      const Instruction& insn = **insn_iter;
712      program->push_back(
713        (struct sock_filter) { insn.code, insn.jt, insn.jf, insn.k });
714    }
715  }
716  return;
717}
718
719void CodeGen::Compile(Instruction *instructions, Sandbox::Program *program) {
720  if (compiled_) {
721    SANDBOX_DIE("Cannot call Compile() multiple times. Create a new code "
722                "generator instead");
723  }
724  compiled_ = true;
725
726  BranchTargets branch_targets;
727  FindBranchTargets(*instructions, &branch_targets);
728  TargetsToBlocks all_blocks;
729  BasicBlock *first_block =
730    CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
731  MergeTails(&all_blocks);
732  BasicBlocks basic_blocks;
733  TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
734  ComputeRelativeJumps(&basic_blocks, all_blocks);
735  ConcatenateBasicBlocks(basic_blocks, program);
736  return;
737}
738
739}  // namespace
740