15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <stdio.h>
62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "sandbox/linux/seccomp-bpf/codegen.h"
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace {
102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Helper function for Traverse().
12a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)void TraverseRecursively(std::set<sandbox::Instruction*>* visited,
13a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)                         sandbox::Instruction* instruction) {
142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (visited->find(instruction) == visited->end()) {
152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    visited->insert(instruction);
162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    switch (BPF_CLASS(instruction->code)) {
17f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_JMP:
18f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        if (BPF_OP(instruction->code) != BPF_JA) {
19f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          TraverseRecursively(visited, instruction->jf_ptr);
20f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        }
21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        TraverseRecursively(visited, instruction->jt_ptr);
22f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
23f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_RET:
24f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
25f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      default:
26f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        TraverseRecursively(visited, instruction->next);
27f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}  // namespace
332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
34a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)namespace sandbox {
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
36f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)CodeGen::CodeGen() : compiled_(false) {}
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)CodeGen::~CodeGen() {
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (Instructions::iterator iter = instructions_.begin();
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != instructions_.end();
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete *iter;
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (BasicBlocks::iterator iter = basic_blocks_.begin();
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != basic_blocks_.end();
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete *iter;
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
51a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)void CodeGen::PrintProgram(const SandboxBPF::Program& program) {
52a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  for (SandboxBPF::Program::const_iterator iter = program.begin();
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != program.end();
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int ip = (int)(iter - program.begin());
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    fprintf(stderr, "%3d) ", ip);
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    switch (BPF_CLASS(iter->code)) {
58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_LD:
59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        if (iter->code == BPF_LD + BPF_W + BPF_ABS) {
60f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "LOAD %d  // ", (int)iter->k);
61f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
62f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(stderr, "System call number\n");
63f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
64f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(stderr, "Architecture\n");
65f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          } else if (iter->k ==
66f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                     offsetof(struct arch_seccomp_data, instruction_pointer)) {
67f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(stderr, "Instruction pointer (LSB)\n");
68f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          } else if (iter->k ==
69f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                     offsetof(struct arch_seccomp_data, instruction_pointer) +
70f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                         4) {
71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(stderr, "Instruction pointer (MSB)\n");
72f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
73f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                     iter->k < offsetof(struct arch_seccomp_data, args) + 48 &&
74f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                     (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 ==
75f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                         0) {
76f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(
77f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                stderr,
78f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                "Argument %d (%cSB)\n",
79f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8,
80f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M'
81f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                                                         : 'L');
82f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          } else {
83f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(stderr, "???\n");
84f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          }
85f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        } else {
86f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "LOAD ???\n");
87f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        }
88f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
89f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_JMP:
90f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        if (BPF_OP(iter->code) == BPF_JA) {
91f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
92f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        } else {
93f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
94f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              BPF_OP(iter->code) == BPF_JSET ? "&" :
95f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              BPF_OP(iter->code) == BPF_JEQ ? "==" :
96f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              BPF_OP(iter->code) == BPF_JGE ? ">=" :
97f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              BPF_OP(iter->code) == BPF_JGT ? ">"  : "???",
98f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              (int)iter->k,
99f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              ip + iter->jt + 1, ip + iter->jf + 1);
100f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        }
101f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
102f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_RET:
103f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        fprintf(stderr, "RET 0x%x  // ", iter->k);
104f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
105f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
106f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
107f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
108f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        } else if (iter->k == SECCOMP_RET_ALLOW) {
109f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "Allowed\n");
1102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        } else {
1112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          fprintf(stderr, "???\n");
1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        }
113f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
114f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_ALU:
115f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        fprintf(stderr, BPF_OP(iter->code) == BPF_NEG
116f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            ? "A := -A\n" : "A := A %s 0x%x\n",
117f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_ADD ? "+"  :
118f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_SUB ? "-"  :
119f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_MUL ? "*"  :
120f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_DIV ? "/"  :
121f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_MOD ? "%"  :
122f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_OR  ? "|"  :
123f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_XOR ? "^"  :
124f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_AND ? "&"  :
125f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_LSH ? "<<" :
126f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
127f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            (int)iter->k);
128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
129f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      default:
1302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        fprintf(stderr, "???\n");
131f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
137f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)Instruction* CodeGen::MakeInstruction(uint16_t code,
138f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      uint32_t k,
139f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      Instruction* next) {
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We can handle non-jumping instructions and "always" jumps. Both of
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // them are followed by exactly one "next" instruction.
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We allow callers to defer specifying "next", but then they must call
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "joinInstructions" later.
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    SANDBOX_DIE(
146f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "Must provide both \"true\" and \"false\" branch "
147f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "for a BPF_JMP");
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (next && BPF_CLASS(code) == BPF_RET) {
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Cannot append instructions after a return statement");
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(code) == BPF_JMP) {
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // "Always" jumps use the "true" branch target, only.
154f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* insn = new Instruction(code, 0, next, NULL);
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    instructions_.push_back(insn);
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return insn;
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Non-jumping instructions do not use any of the branch targets.
159f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* insn = new Instruction(code, k, next);
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    instructions_.push_back(insn);
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return insn;
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)Instruction* CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) {
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(code) != BPF_RET) {
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("ErrorCodes can only be used in return expressions");
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (err.error_type_ != ErrorCode::ET_SIMPLE &&
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      err.error_type_ != ErrorCode::ET_TRAP) {
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program");
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return MakeInstruction(code, err.err_);
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)Instruction* CodeGen::MakeInstruction(uint16_t code,
177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      uint32_t k,
178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      Instruction* jt,
179f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      Instruction* jf) {
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We can handle all conditional jumps. They are followed by both a
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "true" and a "false" branch.
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Expected a BPF_JMP instruction");
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!jt && !jf) {
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We allow callers to defer specifying exactly one of the branch
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // targets. It must then be set later by calling "JoinInstructions".
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Branches must jump to a valid instruction");
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  Instruction* insn = new Instruction(code, k, jt, jf);
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  instructions_.push_back(insn);
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return insn;
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
195f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::JoinInstructions(Instruction* head, Instruction* tail) {
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Merge two instructions, or set the branch target for an "always" jump.
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This function should be called, if the caller didn't initially provide
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a value for "next" when creating the instruction.
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(head->code) == BPF_JMP) {
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_OP(head->code) == BPF_JA) {
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (head->jt_ptr) {
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      head->jt_ptr = tail;
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!head->jt_ptr && head->jf_ptr) {
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        head->jt_ptr = tail;
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else if (!head->jf_ptr && head->jt_ptr) {
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        head->jf_ptr = tail;
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SANDBOX_DIE("Cannot append instructions after a jump");
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (BPF_CLASS(head->code) == BPF_RET) {
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Cannot append instructions after a return statement");
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (head->next) {
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    head->next = tail;
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
224f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::Traverse(Instruction* instruction,
225f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                       void (*fnc)(Instruction*, void*),
226f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                       void* aux) {
227f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::set<Instruction*> visited;
2282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  TraverseRecursively(&visited, instruction);
229f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  for (std::set<Instruction*>::const_iterator iter = visited.begin();
2302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)       iter != visited.end();
2312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)       ++iter) {
2322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    fnc(*iter, aux);
2332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
2342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
2352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::FindBranchTargets(const Instruction& instructions,
237f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                BranchTargets* branch_targets) {
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Follow all possible paths through the "instructions" graph and compute
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a list of branch targets. This will later be needed to compute the
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // boundaries of basic blocks.
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We maintain a set of all instructions that we have previously seen. This
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // set ultimately converges on all instructions in the program.
243f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::set<const Instruction*> seen_instructions;
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions stack;
245f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  for (const Instruction* insn = &instructions; insn;) {
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    seen_instructions.insert(insn);
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(insn->code) == BPF_JMP) {
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Found a jump. Increase count of incoming edges for each of the jump
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // targets.
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++(*branch_targets)[insn->jt_ptr];
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(insn->code) != BPF_JA) {
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++(*branch_targets)[insn->jf_ptr];
253f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        stack.push_back(const_cast<Instruction*>(insn));
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Start a recursive decent for depth-first traversal.
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We haven't seen the "true" branch yet. Traverse it now. We have
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // already remembered the "false" branch on the stack and will
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // traverse it later.
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = insn->jt_ptr;
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Now try traversing the "false" branch.
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = NULL;
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // This is a non-jump instruction, just continue to the next instruction
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // (if any). It's OK if "insn" becomes NULL when reaching a return
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // instruction.
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
271f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        SANDBOX_DIE(
272f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            "Internal compiler error; return instruction must be at "
273f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            "the end of the BPF program");
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (seen_instructions.find(insn->next) == seen_instructions.end()) {
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = insn->next;
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We have seen this instruction before. That could happen if it is
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // a branch target. No need to continue processing.
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = NULL;
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (!insn && !stack.empty()) {
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We are done processing all the way to a leaf node, backtrack up the
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // stack to any branches that we haven't processed yet. By definition,
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // this has to be a "false" branch, as we always process the "true"
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // branches right away.
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      insn = stack.back();
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      stack.pop_back();
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We haven't seen the "false" branch yet. So, that's where we'll
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // go now.
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = insn->jf_ptr;
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We have seen both the "true" and the "false" branch, continue
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // up the stack.
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
298f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          SANDBOX_DIE(
299f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              "Internal compiler error; cannot find all "
300f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              "branch targets");
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = NULL;
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
309f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Iterate over all the instructions between "head" and "tail" and
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // insert them into a new basic block.
312f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* bb = new BasicBlock;
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (;; head = head->next) {
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bb->instructions.push_back(head);
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (head == tail) {
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(head->code) == BPF_JMP) {
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SANDBOX_DIE("Found a jump inside of a basic block");
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  basic_blocks_.push_back(bb);
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return bb;
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
326f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::AddBasicBlock(Instruction* head,
327f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                            Instruction* tail,
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            const BranchTargets& branch_targets,
329f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                            TargetsToBlocks* basic_blocks,
330f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                            BasicBlock** firstBlock) {
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // has not been set before.
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BranchTargets::const_iterator iter = branch_targets.find(head);
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ((iter == branch_targets.end()) != !*firstBlock ||
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      !*firstBlock != basic_blocks->empty()) {
336f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    SANDBOX_DIE(
337f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "Only the very first basic block should have no "
338f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "incoming jumps");
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
340f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* bb = MakeBasicBlock(head, tail);
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!*firstBlock) {
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *firstBlock = bb;
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  (*basic_blocks)[head] = bb;
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
348f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)BasicBlock* CodeGen::CutGraphIntoBasicBlocks(
349f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* instructions,
350f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    const BranchTargets& branch_targets,
351f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    TargetsToBlocks* basic_blocks) {
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Textbook implementation of a basic block generator. All basic blocks
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // start with a branch target and end with either a return statement or
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a jump (or are followed by an instruction that forms the beginning of a
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // new block). Both conditional and "always" jumps are supported.
356f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* first_block = NULL;
357f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::set<const Instruction*> seen_instructions;
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions stack;
359f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  Instruction* tail = NULL;
360f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  Instruction* head = instructions;
361f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  for (Instruction* insn = head; insn;) {
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (seen_instructions.find(insn) != seen_instructions.end()) {
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We somehow went in a circle. This should never be possible. Not even
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // cyclic graphs are supposed to confuse us this much.
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    seen_instructions.insert(insn);
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (tail && branch_targets.find(insn) != branch_targets.end()) {
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We reached a branch target. Start a new basic block (this means,
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // flushing the previous basic block first).
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      head = insn;
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(insn->code) == BPF_JMP) {
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We reached a jump instruction, this completes our current basic
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // block. Flush it and continue by traversing both the true and the
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // false branch of the jump. We need to maintain a stack to do so.
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(insn->code) != BPF_JA) {
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        stack.push_back(insn->jf_ptr);
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      insn = insn->jt_ptr;
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // If we are jumping to an instruction that we have previously
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // processed, we are done with this branch. Continue by backtracking
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // up the stack.
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (seen_instructions.find(insn) != seen_instructions.end()) {
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      backtracking:
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (stack.empty()) {
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // We successfully traversed all reachable instructions.
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          return first_block;
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Going up the stack.
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          insn = stack.back();
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          stack.pop_back();
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Starting a new basic block.
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      tail = NULL;
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      head = insn;
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We found a non-jumping instruction, append it to current basic
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // block.
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      tail = insn;
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      insn = insn->next;
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!insn) {
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We reached a return statement, flush the current basic block and
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // backtrack up the stack.
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        goto backtracking;
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return first_block;
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We define a comparator that inspects the sequence of instructions in our
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// basic block and any blocks referenced by this block. This function can be
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// used in a "less" comparator for the purpose of storing pointers to basic
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// blocks in STL containers; this gives an easy option to use STL to find
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// shared tail  sequences of basic blocks.
422f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)static int PointerCompare(const BasicBlock* block1,
423f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                          const BasicBlock* block2,
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          const TargetsToBlocks& blocks) {
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we are looking at the exact same block, this is trivial and we don't
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // need to do a full comparison.
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (block1 == block2) {
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0;
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We compare the sequence of instructions in both basic blocks.
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Instructions& insns1 = block1->instructions;
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Instructions& insns2 = block2->instructions;
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions::const_iterator iter1 = insns1.begin();
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions::const_iterator iter2 = insns2.begin();
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (;; ++iter1, ++iter2) {
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // If we have reached the end of the sequence of instructions in one or
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // both basic blocks, we know the relative ordering between the two blocks
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // and can return.
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (iter1 == insns1.end()) {
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return iter2 == insns2.end() ? 0 : -1;
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (iter2 == insns2.end()) {
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return 1;
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Compare the individual fields for both instructions.
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Instruction& insn1 = **iter1;
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Instruction& insn2 = **iter2;
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (insn1.code == insn2.code) {
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (insn1.k == insn2.k) {
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Only conditional jump instructions use the jt_ptr and jf_ptr
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // fields.
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (BPF_CLASS(insn1.code) == BPF_JMP) {
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (BPF_OP(insn1.code) != BPF_JA) {
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // Recursively compare the "true" and "false" branches.
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // A well-formed BPF program can't have any cycles, so we know
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // that our recursive algorithm will ultimately terminate.
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // In the unlikely event that the programmer made a mistake and
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // went out of the way to give us a cyclic program, we will crash
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // with a stack overflow. We are OK with that.
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   blocks.find(insn2.jt_ptr)->second,
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   blocks);
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (c == 0) {
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 blocks.find(insn2.jf_ptr)->second,
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 blocks);
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              if (c == 0) {
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                continue;
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              } else {
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                return c;
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              }
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            } else {
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              return c;
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          } else {
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   blocks.find(insn2.jt_ptr)->second,
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   blocks);
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (c == 0) {
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              continue;
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            } else {
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              return c;
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          }
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          continue;
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return insn1.k - insn2.k;
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return insn1.code - insn2.code;
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
499f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::MergeTails(TargetsToBlocks* blocks) {
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We enter all of our basic blocks into a set using the BasicBlock::Less()
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // comparator. This naturally results in blocks with identical tails of
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // instructions to map to the same entry in the set. Whenever we discover
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that a particular chain of instructions is already in the set, we merge
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the basic blocks and update the pointer in the "blocks" map.
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the number of unique basic blocks.
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // N.B. We don't merge instructions on a granularity that is finer than
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      a basic block. In practice, this is sufficiently rare that we don't
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      incur a big cost.
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      Similarly, we currently don't merge anything other than tails. In
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      the future, we might decide to revisit this decision and attempt to
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      merge arbitrary sub-sequences of instructions.
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
513f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Set seen_basic_blocks(less);
515f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
517f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    BasicBlock* bb = iter->second;
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Set::const_iterator entry = seen_basic_blocks.find(bb);
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (entry == seen_basic_blocks.end()) {
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // This is the first time we see this particular sequence of
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // instructions. Enter the basic block into the set of known
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // basic blocks.
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      seen_basic_blocks.insert(bb);
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We have previously seen another basic block that defines the same
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // sequence of instructions. Merge the two blocks and update the
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // pointer in the "blocks" map.
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      iter->second = *entry;
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
533f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::ComputeIncomingBranches(BasicBlock* block,
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      const TargetsToBlocks& targets_to_blocks,
535f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      IncomingBranches* incoming_branches) {
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We increment the number of incoming branches each time we encounter a
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // basic block. But we only traverse recursively the very first time we
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // encounter a new block. This is necessary to make topological sorting
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // work correctly.
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (++(*incoming_branches)[block] == 1) {
541f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* last_insn = block->instructions.back();
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
543f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,
544f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                              targets_to_blocks,
545f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                              incoming_branches);
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(last_insn->code) != BPF_JA) {
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ComputeIncomingBranches(
548f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            targets_to_blocks.find(last_insn->jf_ptr)->second,
549f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            targets_to_blocks,
550f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            incoming_branches);
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
554f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                              targets_to_blocks,
555f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                              incoming_branches);
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
560f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  const TargetsToBlocks& blocks,
562f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                  BasicBlocks* basic_blocks) {
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Textbook implementation of a toposort. We keep looking for basic blocks
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that don't have any incoming branches (initially, this is just the
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "first_block") and add them to the topologically sorted list of
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "basic_blocks". As we do so, we remove outgoing branches. This potentially
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // ends up making our descendants eligible for the sorted list. The
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // sorting algorithm terminates when there are no more basic blocks that have
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // no incoming branches. If we didn't move all blocks from the set of
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "unordered_blocks" to the sorted list of "basic_blocks", there must have
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // been a cyclic dependency. This should never happen in a BPF program, as
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // well-formed BPF programs only ever have forward branches.
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  IncomingBranches unordered_blocks;
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
576f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::set<BasicBlock*> heads;
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (;;) {
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Move block from "unordered_blocks" to "basic_blocks".
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    basic_blocks->push_back(first_block);
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Inspect last instruction in the basic block. This is typically either a
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // jump or a return statement. But it could also be a "normal" instruction
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // that is followed by a jump target.
584f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* last_insn = first_block->instructions.back();
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Remove outgoing branches. This might end up moving our descendants
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // into set of "head" nodes that no longer have any incoming branches.
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      TargetsToBlocks::const_iterator iter;
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(last_insn->code) != BPF_JA) {
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        iter = blocks.find(last_insn->jf_ptr);
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (!--unordered_blocks[iter->second]) {
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          heads.insert(iter->second);
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      iter = blocks.find(last_insn->jt_ptr);
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!--unordered_blocks[iter->second]) {
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        first_block = iter->second;
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We encountered an instruction that doesn't change code flow. Try to
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // pick the next "first_block" from "last_insn->next", if possible.
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      TargetsToBlocks::const_iterator iter;
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      iter = blocks.find(last_insn->next);
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!--unordered_blocks[iter->second]) {
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        first_block = iter->second;
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Our basic block is supposed to be followed by "last_insn->next",
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // but dependencies prevent this from happening. Insert a BPF_JA
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // instruction to correct the code flow.
612f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        first_block->instructions.push_back(ja);
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        last_insn->next = ja;
6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (heads.empty()) {
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (unordered_blocks.size() != basic_blocks->size()) {
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SANDBOX_DIE("Internal compiler error; cyclic graph detected");
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Proceed by picking an arbitrary node from the set of basic blocks that
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // do not have any incoming branches.
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    first_block = *heads.begin();
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    heads.erase(heads.begin());
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
630f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   const TargetsToBlocks& targets_to_blocks) {
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // While we previously used pointers in jt_ptr and jf_ptr to link jump
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // instructions to their targets, we now convert these jumps to relative
6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // jumps that are suitable for loading the BPF program into the kernel.
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int offset = 0;
6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Since we just completed a toposort, all jump targets are guaranteed to
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // go forward. This means, iterating over the basic blocks in reverse makes
6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // it trivial to compute the correct offsets.
640f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* bb = NULL;
641f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* last_bb = NULL;
6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != basic_blocks->rend();
6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    last_bb = bb;
6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bb = *iter;
647f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* insn = bb->instructions.back();
6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(insn->code) == BPF_JMP) {
6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Basic block ended in a jump instruction. We can now compute the
6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // appropriate offsets.
6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(insn->code) == BPF_JA) {
6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // "Always" jumps use the 32bit "k" field for the offset, instead
6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // of the 8bit "jt" and "jf" fields.
654f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
655f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        insn->k = jmp;
6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn->jt = insn->jf = 0;
6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // The offset computations for conditional jumps are just the same
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // as for "always" jumps.
660f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
661f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;
6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // There is an added complication, because conditional relative jumps
6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // can only jump at most 255 instructions forward. If we have to jump
6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // further, insert an extra "always" jump.
6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        Instructions::size_type jmp = bb->instructions.size();
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (jt > 255 || (jt == 255 && jf > 255)) {
668f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);
6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          bb->instructions.push_back(ja);
670f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          ja->k = jt;
6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ja->jt = ja->jf = 0;
6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // The newly inserted "always" jump, of course, requires us to adjust
6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // the jump targets in the original conditional jump.
6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          jt = 0;
6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ++jf;
6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (jf > 255) {
679f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          bb->instructions.insert(bb->instructions.begin() + jmp, ja);
681f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          ja->k = jf;
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ja->jt = ja->jf = 0;
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Again, we have to adjust the jump targets in the original
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // conditional jump.
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ++jt;
6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          jf = 0;
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Now we can finally set the relative jump targets in the conditional
6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // jump instruction. Afterwards, we must no longer access the jt_ptr
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // and jf_ptr fields.
6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn->jt = jt;
6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn->jf = jf;
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (BPF_CLASS(insn->code) != BPF_RET &&
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               targets_to_blocks.find(insn->next)->second != last_bb) {
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Proceed to next basic block.
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    offset += bb->instructions.size();
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bb->offset = offset;
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
709a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)                                     SandboxBPF::Program* program) {
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Our basic blocks have been sorted and relative jump offsets have been
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // computed. The last remaining step is for all the instructions in our
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // basic blocks to be concatenated into a BPF program.
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  program->clear();
7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       bb_iter != basic_blocks.end();
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++bb_iter) {
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const BasicBlock& bb = **bb_iter;
7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (Instructions::const_iterator insn_iter = bb.instructions.begin();
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         insn_iter != bb.instructions.end();
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++insn_iter) {
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const Instruction& insn = **insn_iter;
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      program->push_back(
723f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
729a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)void CodeGen::Compile(Instruction* instructions, SandboxBPF::Program* program) {
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (compiled_) {
731f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    SANDBOX_DIE(
732f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "Cannot call Compile() multiple times. Create a new code "
733f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "generator instead");
7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  compiled_ = true;
7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BranchTargets branch_targets;
7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FindBranchTargets(*instructions, &branch_targets);
7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TargetsToBlocks all_blocks;
740f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* first_block =
741f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
7425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  MergeTails(&all_blocks);
7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlocks basic_blocks;
7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ComputeRelativeJumps(&basic_blocks, all_blocks);
7465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ConcatenateBasicBlocks(basic_blocks, program);
7475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
7485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
750a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)}  // namespace sandbox
751