15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
51320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "sandbox/linux/seccomp-bpf/codegen.h"
61320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <stdio.h>
82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
91320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include <set>
101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
11effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch#include "base/logging.h"
121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "sandbox/linux/seccomp-bpf/basicblock.h"
131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "sandbox/linux/seccomp-bpf/die.h"
141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "sandbox/linux/seccomp-bpf/instruction.h"
151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "sandbox/linux/seccomp-bpf/linux_seccomp.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace {
182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Helper function for Traverse().
20a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)void TraverseRecursively(std::set<sandbox::Instruction*>* visited,
21a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)                         sandbox::Instruction* instruction) {
222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (visited->find(instruction) == visited->end()) {
232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    visited->insert(instruction);
242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    switch (BPF_CLASS(instruction->code)) {
25f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_JMP:
26f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        if (BPF_OP(instruction->code) != BPF_JA) {
27f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          TraverseRecursively(visited, instruction->jf_ptr);
28f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        }
29f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        TraverseRecursively(visited, instruction->jt_ptr);
30f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
31f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_RET:
32f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
33f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      default:
34f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        TraverseRecursively(visited, instruction->next);
35f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}  // namespace
412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
42a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)namespace sandbox {
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
44f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)CodeGen::CodeGen() : compiled_(false) {}
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)CodeGen::~CodeGen() {
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (Instructions::iterator iter = instructions_.begin();
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != instructions_.end();
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete *iter;
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (BasicBlocks::iterator iter = basic_blocks_.begin();
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != basic_blocks_.end();
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete *iter;
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
59a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)void CodeGen::PrintProgram(const SandboxBPF::Program& program) {
60a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  for (SandboxBPF::Program::const_iterator iter = program.begin();
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != program.end();
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int ip = (int)(iter - program.begin());
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    fprintf(stderr, "%3d) ", ip);
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    switch (BPF_CLASS(iter->code)) {
66f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_LD:
67f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        if (iter->code == BPF_LD + BPF_W + BPF_ABS) {
68f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "LOAD %d  // ", (int)iter->k);
69f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
70f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(stderr, "System call number\n");
71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
72f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(stderr, "Architecture\n");
73f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          } else if (iter->k ==
74f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                     offsetof(struct arch_seccomp_data, instruction_pointer)) {
75f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(stderr, "Instruction pointer (LSB)\n");
76f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          } else if (iter->k ==
77f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                     offsetof(struct arch_seccomp_data, instruction_pointer) +
78f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                         4) {
79f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(stderr, "Instruction pointer (MSB)\n");
80f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
81f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                     iter->k < offsetof(struct arch_seccomp_data, args) + 48 &&
82f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                     (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 ==
83f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                         0) {
84f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(
85f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                stderr,
86f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                "Argument %d (%cSB)\n",
87f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8,
88f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M'
89f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                                                         : 'L');
90f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          } else {
91f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            fprintf(stderr, "???\n");
92f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          }
93f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        } else {
94f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "LOAD ???\n");
95f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        }
96f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
97f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_JMP:
98f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        if (BPF_OP(iter->code) == BPF_JA) {
99f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
100f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        } else {
101f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
102f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              BPF_OP(iter->code) == BPF_JSET ? "&" :
103f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              BPF_OP(iter->code) == BPF_JEQ ? "==" :
104f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              BPF_OP(iter->code) == BPF_JGE ? ">=" :
105f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              BPF_OP(iter->code) == BPF_JGT ? ">"  : "???",
106f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              (int)iter->k,
107f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              ip + iter->jt + 1, ip + iter->jf + 1);
108f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        }
109f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
110f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_RET:
111f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        fprintf(stderr, "RET 0x%x  // ", iter->k);
112f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
113f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
114f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
115f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
116f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)        } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE) {
117f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)          fprintf(stderr, "Trace #%d\n", iter->k & SECCOMP_RET_DATA);
118f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        } else if (iter->k == SECCOMP_RET_ALLOW) {
119f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          fprintf(stderr, "Allowed\n");
1202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        } else {
1212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          fprintf(stderr, "???\n");
1222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        }
123f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
124f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      case BPF_ALU:
125f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        fprintf(stderr, BPF_OP(iter->code) == BPF_NEG
126f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            ? "A := -A\n" : "A := A %s 0x%x\n",
127f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_ADD ? "+"  :
128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_SUB ? "-"  :
129f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_MUL ? "*"  :
130f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_DIV ? "/"  :
131f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_MOD ? "%"  :
132f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_OR  ? "|"  :
133f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_XOR ? "^"  :
134f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_AND ? "&"  :
135f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_LSH ? "<<" :
136f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
137f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            (int)iter->k);
138f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
139f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      default:
1402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        fprintf(stderr, "???\n");
141f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        break;
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
147f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)Instruction* CodeGen::MakeInstruction(uint16_t code,
148f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      uint32_t k,
149f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      Instruction* next) {
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We can handle non-jumping instructions and "always" jumps. Both of
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // them are followed by exactly one "next" instruction.
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We allow callers to defer specifying "next", but then they must call
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "joinInstructions" later.
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
155f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    SANDBOX_DIE(
156f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "Must provide both \"true\" and \"false\" branch "
157f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "for a BPF_JMP");
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (next && BPF_CLASS(code) == BPF_RET) {
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Cannot append instructions after a return statement");
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(code) == BPF_JMP) {
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // "Always" jumps use the "true" branch target, only.
164f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* insn = new Instruction(code, 0, next, NULL);
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    instructions_.push_back(insn);
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return insn;
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Non-jumping instructions do not use any of the branch targets.
169f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* insn = new Instruction(code, k, next);
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    instructions_.push_back(insn);
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return insn;
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)Instruction* CodeGen::MakeInstruction(uint16_t code,
176f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      uint32_t k,
177f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      Instruction* jt,
178f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      Instruction* jf) {
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We can handle all conditional jumps. They are followed by both a
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "true" and a "false" branch.
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Expected a BPF_JMP instruction");
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  if (!jt || !jf) {
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Branches must jump to a valid instruction");
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
187f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  Instruction* insn = new Instruction(code, k, jt, jf);
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  instructions_.push_back(insn);
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return insn;
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
192f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::Traverse(Instruction* instruction,
193f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                       void (*fnc)(Instruction*, void*),
194f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                       void* aux) {
195f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::set<Instruction*> visited;
1962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  TraverseRecursively(&visited, instruction);
197f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  for (std::set<Instruction*>::const_iterator iter = visited.begin();
1982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)       iter != visited.end();
1992a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)       ++iter) {
2002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    fnc(*iter, aux);
2012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
2022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
2032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::FindBranchTargets(const Instruction& instructions,
205f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                BranchTargets* branch_targets) {
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Follow all possible paths through the "instructions" graph and compute
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a list of branch targets. This will later be needed to compute the
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // boundaries of basic blocks.
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We maintain a set of all instructions that we have previously seen. This
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // set ultimately converges on all instructions in the program.
211f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::set<const Instruction*> seen_instructions;
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions stack;
213f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  for (const Instruction* insn = &instructions; insn;) {
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    seen_instructions.insert(insn);
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(insn->code) == BPF_JMP) {
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Found a jump. Increase count of incoming edges for each of the jump
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // targets.
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++(*branch_targets)[insn->jt_ptr];
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(insn->code) != BPF_JA) {
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++(*branch_targets)[insn->jf_ptr];
221f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        stack.push_back(const_cast<Instruction*>(insn));
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Start a recursive decent for depth-first traversal.
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We haven't seen the "true" branch yet. Traverse it now. We have
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // already remembered the "false" branch on the stack and will
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // traverse it later.
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = insn->jt_ptr;
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Now try traversing the "false" branch.
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = NULL;
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // This is a non-jump instruction, just continue to the next instruction
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // (if any). It's OK if "insn" becomes NULL when reaching a return
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // instruction.
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
239f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        SANDBOX_DIE(
240f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            "Internal compiler error; return instruction must be at "
241f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            "the end of the BPF program");
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (seen_instructions.find(insn->next) == seen_instructions.end()) {
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = insn->next;
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We have seen this instruction before. That could happen if it is
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // a branch target. No need to continue processing.
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = NULL;
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (!insn && !stack.empty()) {
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We are done processing all the way to a leaf node, backtrack up the
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // stack to any branches that we haven't processed yet. By definition,
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // this has to be a "false" branch, as we always process the "true"
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // branches right away.
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      insn = stack.back();
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      stack.pop_back();
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We haven't seen the "false" branch yet. So, that's where we'll
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // go now.
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = insn->jf_ptr;
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We have seen both the "true" and the "false" branch, continue
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // up the stack.
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
266f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          SANDBOX_DIE(
267f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              "Internal compiler error; cannot find all "
268f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)              "branch targets");
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = NULL;
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
277f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) {
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Iterate over all the instructions between "head" and "tail" and
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // insert them into a new basic block.
280f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* bb = new BasicBlock;
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (;; head = head->next) {
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bb->instructions.push_back(head);
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (head == tail) {
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(head->code) == BPF_JMP) {
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SANDBOX_DIE("Found a jump inside of a basic block");
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  basic_blocks_.push_back(bb);
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return bb;
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
294f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::AddBasicBlock(Instruction* head,
295f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                            Instruction* tail,
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            const BranchTargets& branch_targets,
297f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                            TargetsToBlocks* basic_blocks,
298f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                            BasicBlock** firstBlock) {
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // has not been set before.
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BranchTargets::const_iterator iter = branch_targets.find(head);
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ((iter == branch_targets.end()) != !*firstBlock ||
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      !*firstBlock != basic_blocks->empty()) {
304f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    SANDBOX_DIE(
305f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "Only the very first basic block should have no "
306f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "incoming jumps");
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
308f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* bb = MakeBasicBlock(head, tail);
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!*firstBlock) {
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *firstBlock = bb;
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  (*basic_blocks)[head] = bb;
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
316f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)BasicBlock* CodeGen::CutGraphIntoBasicBlocks(
317f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* instructions,
318f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    const BranchTargets& branch_targets,
319f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    TargetsToBlocks* basic_blocks) {
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Textbook implementation of a basic block generator. All basic blocks
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // start with a branch target and end with either a return statement or
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a jump (or are followed by an instruction that forms the beginning of a
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // new block). Both conditional and "always" jumps are supported.
324f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* first_block = NULL;
325f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::set<const Instruction*> seen_instructions;
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions stack;
327f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  Instruction* tail = NULL;
328f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  Instruction* head = instructions;
329f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  for (Instruction* insn = head; insn;) {
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (seen_instructions.find(insn) != seen_instructions.end()) {
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We somehow went in a circle. This should never be possible. Not even
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // cyclic graphs are supposed to confuse us this much.
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    seen_instructions.insert(insn);
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (tail && branch_targets.find(insn) != branch_targets.end()) {
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We reached a branch target. Start a new basic block (this means,
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // flushing the previous basic block first).
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      head = insn;
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(insn->code) == BPF_JMP) {
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We reached a jump instruction, this completes our current basic
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // block. Flush it and continue by traversing both the true and the
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // false branch of the jump. We need to maintain a stack to do so.
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(insn->code) != BPF_JA) {
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        stack.push_back(insn->jf_ptr);
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      insn = insn->jt_ptr;
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // If we are jumping to an instruction that we have previously
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // processed, we are done with this branch. Continue by backtracking
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // up the stack.
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (seen_instructions.find(insn) != seen_instructions.end()) {
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      backtracking:
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (stack.empty()) {
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // We successfully traversed all reachable instructions.
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          return first_block;
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Going up the stack.
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          insn = stack.back();
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          stack.pop_back();
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Starting a new basic block.
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      tail = NULL;
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      head = insn;
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We found a non-jumping instruction, append it to current basic
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // block.
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      tail = insn;
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      insn = insn->next;
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!insn) {
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We reached a return statement, flush the current basic block and
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // backtrack up the stack.
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        goto backtracking;
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return first_block;
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We define a comparator that inspects the sequence of instructions in our
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// basic block and any blocks referenced by this block. This function can be
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// used in a "less" comparator for the purpose of storing pointers to basic
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// blocks in STL containers; this gives an easy option to use STL to find
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// shared tail  sequences of basic blocks.
390f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)static int PointerCompare(const BasicBlock* block1,
391f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                          const BasicBlock* block2,
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          const TargetsToBlocks& blocks) {
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we are looking at the exact same block, this is trivial and we don't
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // need to do a full comparison.
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (block1 == block2) {
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0;
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We compare the sequence of instructions in both basic blocks.
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Instructions& insns1 = block1->instructions;
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Instructions& insns2 = block2->instructions;
403effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  // Basic blocks should never be empty.
404effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  CHECK(!insns1.empty());
405effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  CHECK(!insns2.empty());
406effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions::const_iterator iter1 = insns1.begin();
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions::const_iterator iter2 = insns2.begin();
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (;; ++iter1, ++iter2) {
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // If we have reached the end of the sequence of instructions in one or
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // both basic blocks, we know the relative ordering between the two blocks
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // and can return.
413cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    if (iter1 == insns1.end() || iter2 == insns2.end()) {
414cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      if (iter1 != insns1.end()) {
415cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        return 1;
416cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      }
417cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      if (iter2 != insns2.end()) {
418cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        return -1;
419effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      }
420cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
421cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      // If the two blocks are the same length (and have elementwise-equal code
422cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      // and k fields) and their last instructions are neither a JMP nor a RET
423cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      // (which is the only way we can reach this point), then we must compare
424cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      // their successors.
425cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      Instruction* const insns1_last = insns1.back();
426cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      Instruction* const insns2_last = insns2.back();
427cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP &&
428cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)            BPF_CLASS(insns1_last->code) != BPF_RET);
429cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
430cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      // Non jumping instructions will always have a valid next instruction.
431cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      CHECK(insns1_last->next);
432cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      CHECK(insns2_last->next);
433cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      return PointerCompare(blocks.find(insns1_last->next)->second,
434cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                            blocks.find(insns2_last->next)->second,
435cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                            blocks);
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Compare the individual fields for both instructions.
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Instruction& insn1 = **iter1;
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Instruction& insn2 = **iter2;
441cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    if (insn1.code != insn2.code) {
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return insn1.code - insn2.code;
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
444cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    if (insn1.k != insn2.k) {
445cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      return insn1.k - insn2.k;
446cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    }
447cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
448cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    // Sanity check: If we're looking at a JMP or RET instruction, by definition
449cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    // it should be the last instruction of the basic block.
450cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) {
451cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      CHECK_EQ(insns1.back(), &insn1);
452cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      CHECK_EQ(insns2.back(), &insn2);
453cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    }
454cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
455cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    // RET instructions terminate execution, and only JMP instructions use the
456cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    // jt_ptr and jf_ptr fields.  Anything else can continue to the next
457cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    // instruction in the basic block.
458cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    if (BPF_CLASS(insn1.code) == BPF_RET) {
459cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      return 0;
460cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    } else if (BPF_CLASS(insn1.code) != BPF_JMP) {
461cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      continue;
462cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    }
463cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)
464cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    // Recursively compare the "true" and "false" branches.
465cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    // A well-formed BPF program can't have any cycles, so we know
466cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    // that our recursive algorithm will ultimately terminate.
467cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    // In the unlikely event that the programmer made a mistake and
468cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    // went out of the way to give us a cyclic program, we will crash
469cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    // with a stack overflow. We are OK with that.
470cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    if (BPF_OP(insn1.code) != BPF_JA) {
471cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      int c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
472cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                             blocks.find(insn2.jf_ptr)->second,
473cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                             blocks);
474cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      if (c != 0) {
475cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        return c;
476cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      }
477cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    }
478cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    return PointerCompare(blocks.find(insn1.jt_ptr)->second,
479cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                          blocks.find(insn2.jt_ptr)->second,
480cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                          blocks);
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
484f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::MergeTails(TargetsToBlocks* blocks) {
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We enter all of our basic blocks into a set using the BasicBlock::Less()
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // comparator. This naturally results in blocks with identical tails of
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // instructions to map to the same entry in the set. Whenever we discover
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that a particular chain of instructions is already in the set, we merge
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the basic blocks and update the pointer in the "blocks" map.
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the number of unique basic blocks.
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // N.B. We don't merge instructions on a granularity that is finer than
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      a basic block. In practice, this is sufficiently rare that we don't
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      incur a big cost.
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      Similarly, we currently don't merge anything other than tails. In
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      the future, we might decide to revisit this decision and attempt to
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      merge arbitrary sub-sequences of instructions.
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
498f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set;
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Set seen_basic_blocks(less);
500f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end();
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
502f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    BasicBlock* bb = iter->second;
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Set::const_iterator entry = seen_basic_blocks.find(bb);
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (entry == seen_basic_blocks.end()) {
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // This is the first time we see this particular sequence of
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // instructions. Enter the basic block into the set of known
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // basic blocks.
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      seen_basic_blocks.insert(bb);
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We have previously seen another basic block that defines the same
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // sequence of instructions. Merge the two blocks and update the
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // pointer in the "blocks" map.
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      iter->second = *entry;
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
518f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::ComputeIncomingBranches(BasicBlock* block,
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      const TargetsToBlocks& targets_to_blocks,
520f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                      IncomingBranches* incoming_branches) {
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We increment the number of incoming branches each time we encounter a
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // basic block. But we only traverse recursively the very first time we
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // encounter a new block. This is necessary to make topological sorting
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // work correctly.
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (++(*incoming_branches)[block] == 1) {
526f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* last_insn = block->instructions.back();
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
528f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second,
529f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                              targets_to_blocks,
530f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                              incoming_branches);
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(last_insn->code) != BPF_JA) {
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ComputeIncomingBranches(
533f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            targets_to_blocks.find(last_insn->jf_ptr)->second,
534f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            targets_to_blocks,
535f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)            incoming_branches);
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
539f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                              targets_to_blocks,
540f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                              incoming_branches);
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block,
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  const TargetsToBlocks& blocks,
547f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                  BasicBlocks* basic_blocks) {
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Textbook implementation of a toposort. We keep looking for basic blocks
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that don't have any incoming branches (initially, this is just the
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "first_block") and add them to the topologically sorted list of
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "basic_blocks". As we do so, we remove outgoing branches. This potentially
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // ends up making our descendants eligible for the sorted list. The
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // sorting algorithm terminates when there are no more basic blocks that have
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // no incoming branches. If we didn't move all blocks from the set of
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "unordered_blocks" to the sorted list of "basic_blocks", there must have
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // been a cyclic dependency. This should never happen in a BPF program, as
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // well-formed BPF programs only ever have forward branches.
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  IncomingBranches unordered_blocks;
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
561f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  std::set<BasicBlock*> heads;
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (;;) {
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Move block from "unordered_blocks" to "basic_blocks".
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    basic_blocks->push_back(first_block);
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Inspect last instruction in the basic block. This is typically either a
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // jump or a return statement. But it could also be a "normal" instruction
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // that is followed by a jump target.
569f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* last_insn = first_block->instructions.back();
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Remove outgoing branches. This might end up moving our descendants
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // into set of "head" nodes that no longer have any incoming branches.
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      TargetsToBlocks::const_iterator iter;
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(last_insn->code) != BPF_JA) {
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        iter = blocks.find(last_insn->jf_ptr);
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (!--unordered_blocks[iter->second]) {
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          heads.insert(iter->second);
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      iter = blocks.find(last_insn->jt_ptr);
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!--unordered_blocks[iter->second]) {
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        first_block = iter->second;
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We encountered an instruction that doesn't change code flow. Try to
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // pick the next "first_block" from "last_insn->next", if possible.
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      TargetsToBlocks::const_iterator iter;
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      iter = blocks.find(last_insn->next);
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!--unordered_blocks[iter->second]) {
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        first_block = iter->second;
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Our basic block is supposed to be followed by "last_insn->next",
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // but dependencies prevent this from happening. Insert a BPF_JA
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // instruction to correct the code flow.
597f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next);
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        first_block->instructions.push_back(ja);
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        last_insn->next = ja;
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (heads.empty()) {
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (unordered_blocks.size() != basic_blocks->size()) {
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SANDBOX_DIE("Internal compiler error; cyclic graph detected");
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Proceed by picking an arbitrary node from the set of basic blocks that
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // do not have any incoming branches.
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    first_block = *heads.begin();
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    heads.erase(heads.begin());
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
615f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks,
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   const TargetsToBlocks& targets_to_blocks) {
6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // While we previously used pointers in jt_ptr and jf_ptr to link jump
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // instructions to their targets, we now convert these jumps to relative
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // jumps that are suitable for loading the BPF program into the kernel.
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int offset = 0;
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Since we just completed a toposort, all jump targets are guaranteed to
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // go forward. This means, iterating over the basic blocks in reverse makes
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // it trivial to compute the correct offsets.
625f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* bb = NULL;
626f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* last_bb = NULL;
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != basic_blocks->rend();
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    last_bb = bb;
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bb = *iter;
632f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    Instruction* insn = bb->instructions.back();
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(insn->code) == BPF_JMP) {
6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Basic block ended in a jump instruction. We can now compute the
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // appropriate offsets.
6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(insn->code) == BPF_JA) {
6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // "Always" jumps use the 32bit "k" field for the offset, instead
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // of the 8bit "jt" and "jf" fields.
639f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
640f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        insn->k = jmp;
6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn->jt = insn->jf = 0;
6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // The offset computations for conditional jumps are just the same
6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // as for "always" jumps.
645f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
646f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset;
6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // There is an added complication, because conditional relative jumps
6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // can only jump at most 255 instructions forward. If we have to jump
6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // further, insert an extra "always" jump.
6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        Instructions::size_type jmp = bb->instructions.size();
6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (jt > 255 || (jt == 255 && jf > 255)) {
653f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr);
6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          bb->instructions.push_back(ja);
655f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          ja->k = jt;
6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ja->jt = ja->jf = 0;
6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // The newly inserted "always" jump, of course, requires us to adjust
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // the jump targets in the original conditional jump.
6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          jt = 0;
6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ++jf;
6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (jf > 255) {
664f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr);
6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          bb->instructions.insert(bb->instructions.begin() + jmp, ja);
666f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          ja->k = jf;
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ja->jt = ja->jf = 0;
6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Again, we have to adjust the jump targets in the original
6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // conditional jump.
6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ++jt;
6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          jf = 0;
6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Now we can finally set the relative jump targets in the conditional
6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // jump instruction. Afterwards, we must no longer access the jt_ptr
6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // and jf_ptr fields.
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn->jt = jt;
6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn->jf = jf;
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (BPF_CLASS(insn->code) != BPF_RET &&
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               targets_to_blocks.find(insn->next)->second != last_bb) {
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Proceed to next basic block.
6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    offset += bb->instructions.size();
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bb->offset = offset;
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
694a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)                                     SandboxBPF::Program* program) {
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Our basic blocks have been sorted and relative jump offsets have been
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // computed. The last remaining step is for all the instructions in our
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // basic blocks to be concatenated into a BPF program.
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  program->clear();
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       bb_iter != basic_blocks.end();
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++bb_iter) {
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const BasicBlock& bb = **bb_iter;
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (Instructions::const_iterator insn_iter = bb.instructions.begin();
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         insn_iter != bb.instructions.end();
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++insn_iter) {
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const Instruction& insn = **insn_iter;
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      program->push_back(
708f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)          (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k});
7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
714a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)void CodeGen::Compile(Instruction* instructions, SandboxBPF::Program* program) {
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (compiled_) {
716f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    SANDBOX_DIE(
717f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "Cannot call Compile() multiple times. Create a new code "
718f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)        "generator instead");
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  compiled_ = true;
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BranchTargets branch_targets;
7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FindBranchTargets(*instructions, &branch_targets);
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TargetsToBlocks all_blocks;
725f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  BasicBlock* first_block =
726f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  MergeTails(&all_blocks);
7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlocks basic_blocks;
7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ComputeRelativeJumps(&basic_blocks, all_blocks);
7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ConcatenateBasicBlocks(basic_blocks, program);
7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
735a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)}  // namespace sandbox
736