15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include <stdio.h>
62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "sandbox/linux/seccomp-bpf/codegen.h"
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)namespace {
112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Helper function for Traverse().
132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void TraverseRecursively(std::set<playground2::Instruction *> *visited,
142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                         playground2::Instruction *instruction) {
152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (visited->find(instruction) == visited->end()) {
162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    visited->insert(instruction);
172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    switch (BPF_CLASS(instruction->code)) {
182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    case BPF_JMP:
192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (BPF_OP(instruction->code) != BPF_JA) {
202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        TraverseRecursively(visited, instruction->jf_ptr);
212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      }
222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      TraverseRecursively(visited, instruction->jt_ptr);
232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      break;
242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    case BPF_RET:
252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      break;
262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    default:
272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      TraverseRecursively(visited, instruction->next);
282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      break;
292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}  // namespace
342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace playground2 {
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)CodeGen::CodeGen()
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : compiled_(false) {
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)CodeGen::~CodeGen() {
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (Instructions::iterator iter = instructions_.begin();
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != instructions_.end();
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete *iter;
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (BasicBlocks::iterator iter = basic_blocks_.begin();
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != basic_blocks_.end();
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    delete *iter;
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::PrintProgram(const Sandbox::Program& program) {
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (Sandbox::Program::const_iterator iter = program.begin();
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != program.end();
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int ip = (int)(iter - program.begin());
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    fprintf(stderr, "%3d) ", ip);
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    switch (BPF_CLASS(iter->code)) {
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BPF_LD:
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (iter->code == BPF_LD+BPF_W+BPF_ABS) {
632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        fprintf(stderr, "LOAD %d  // ", (int)iter->k);
642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        if (iter->k == offsetof(struct arch_seccomp_data, nr)) {
652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          fprintf(stderr, "System call number\n");
662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) {
672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          fprintf(stderr, "Architecture\n");
682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        } else if (iter->k == offsetof(struct arch_seccomp_data,
692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                       instruction_pointer)) {
702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          fprintf(stderr, "Instruction pointer (LSB)\n");
712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        } else if (iter->k == offsetof(struct arch_seccomp_data,
722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                       instruction_pointer) + 4) {
732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          fprintf(stderr, "Instruction pointer (MSB)\n");
742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        } else if (iter->k >= offsetof(struct arch_seccomp_data, args) &&
752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                   iter->k <  offsetof(struct arch_seccomp_data, args)+48 &&
762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                   (iter->k-offsetof(struct arch_seccomp_data, args))%4 == 0) {
772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          fprintf(stderr, "Argument %d (%cSB)\n",
782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                  (int)(iter->k-offsetof(struct arch_seccomp_data, args))/8,
792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                  (iter->k-offsetof(struct arch_seccomp_data,
802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                    args))%8 ? 'M' : 'L');
812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        } else {
822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)          fprintf(stderr, "???\n");
832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        }
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        fprintf(stderr, "LOAD ???\n");
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BPF_JMP:
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(iter->code) == BPF_JA) {
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        fprintf(stderr, "JMP %d\n", ip + iter->k + 1);
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n",
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                BPF_OP(iter->code) == BPF_JSET ? "&" :
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                BPF_OP(iter->code) == BPF_JEQ ? "==" :
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                BPF_OP(iter->code) == BPF_JGE ? ">=" :
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                BPF_OP(iter->code) == BPF_JGT ? ">"  : "???",
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                (int)iter->k,
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                ip + iter->jt + 1, ip + iter->jf + 1);
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    case BPF_RET:
1022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      fprintf(stderr, "RET 0x%x  // ", iter->k);
1032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) {
1042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA);
1052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) {
1062a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA);
1072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      } else if (iter->k == SECCOMP_RET_ALLOW) {
1082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        fprintf(stderr, "Allowed\n");
1092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      } else {
1102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        fprintf(stderr, "???\n");
1112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      }
1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      break;
1132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    case BPF_ALU:
1142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      fprintf(stderr, BPF_OP(iter->code) == BPF_NEG
1152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              ? "A := -A\n" : "A := A %s 0x%x\n",
1162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              BPF_OP(iter->code) == BPF_ADD ? "+"  :
1172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              BPF_OP(iter->code) == BPF_SUB ? "-"  :
1182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              BPF_OP(iter->code) == BPF_MUL ? "*"  :
1192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              BPF_OP(iter->code) == BPF_DIV ? "/"  :
1202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              BPF_OP(iter->code) == BPF_MOD ? "%"  :
1212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              BPF_OP(iter->code) == BPF_OR  ? "|"  :
1222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              BPF_OP(iter->code) == BPF_XOR ? "^"  :
1232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              BPF_OP(iter->code) == BPF_AND ? "&"  :
1242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              BPF_OP(iter->code) == BPF_LSH ? "<<" :
1252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              BPF_OP(iter->code) == BPF_RSH ? ">>" : "???",
1262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)              (int)iter->k);
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    default:
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      fprintf(stderr, "???\n");
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k,
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      Instruction *next) {
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We can handle non-jumping instructions and "always" jumps. Both of
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // them are followed by exactly one "next" instruction.
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We allow callers to defer specifying "next", but then they must call
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "joinInstructions" later.
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) {
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Must provide both \"true\" and \"false\" branch "
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                "for a BPF_JMP");
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (next && BPF_CLASS(code) == BPF_RET) {
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Cannot append instructions after a return statement");
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(code) == BPF_JMP) {
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // "Always" jumps use the "true" branch target, only.
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Instruction *insn = new Instruction(code, 0, next, NULL);
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    instructions_.push_back(insn);
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return insn;
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Non-jumping instructions do not use any of the branch targets.
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Instruction *insn = new Instruction(code, k, next);
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    instructions_.push_back(insn);
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return insn;
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Instruction *CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) {
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(code) != BPF_RET) {
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("ErrorCodes can only be used in return expressions");
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (err.error_type_ != ErrorCode::ET_SIMPLE &&
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      err.error_type_ != ErrorCode::ET_TRAP) {
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program");
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return MakeInstruction(code, err.err_);
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k,
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      Instruction *jt, Instruction *jf) {
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We can handle all conditional jumps. They are followed by both a
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "true" and a "false" branch.
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) {
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Expected a BPF_JMP instruction");
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!jt && !jf) {
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We allow callers to defer specifying exactly one of the branch
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // targets. It must then be set later by calling "JoinInstructions".
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Branches must jump to a valid instruction");
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instruction *insn = new Instruction(code, k, jt, jf);
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  instructions_.push_back(insn);
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return insn;
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::JoinInstructions(Instruction *head, Instruction *tail) {
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Merge two instructions, or set the branch target for an "always" jump.
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This function should be called, if the caller didn't initially provide
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a value for "next" when creating the instruction.
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (BPF_CLASS(head->code) == BPF_JMP) {
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_OP(head->code) == BPF_JA) {
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (head->jt_ptr) {
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      head->jt_ptr = tail;
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!head->jt_ptr && head->jf_ptr) {
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        head->jt_ptr = tail;
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else if (!head->jf_ptr && head->jt_ptr) {
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        head->jf_ptr = tail;
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SANDBOX_DIE("Cannot append instructions after a jump");
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (BPF_CLASS(head->code) == BPF_RET) {
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Cannot append instructions after a return statement");
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (head->next) {
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Cannot append instructions in the middle of a sequence");
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    head->next = tail;
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void CodeGen::Traverse(Instruction *instruction,
2202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                       void (*fnc)(Instruction *, void *), void *aux) {
2212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  std::set<Instruction *> visited;
2222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  TraverseRecursively(&visited, instruction);
2232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  for (std::set<Instruction *>::const_iterator iter = visited.begin();
2242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)       iter != visited.end();
2252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)       ++iter) {
2262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    fnc(*iter, aux);
2272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
2282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
2292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::FindBranchTargets(const Instruction& instructions,
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                BranchTargets *branch_targets) {
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Follow all possible paths through the "instructions" graph and compute
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a list of branch targets. This will later be needed to compute the
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // boundaries of basic blocks.
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We maintain a set of all instructions that we have previously seen. This
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // set ultimately converges on all instructions in the program.
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::set<const Instruction *> seen_instructions;
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions stack;
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (const Instruction *insn = &instructions; insn; ) {
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    seen_instructions.insert(insn);
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(insn->code) == BPF_JMP) {
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Found a jump. Increase count of incoming edges for each of the jump
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // targets.
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++(*branch_targets)[insn->jt_ptr];
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(insn->code) != BPF_JA) {
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++(*branch_targets)[insn->jf_ptr];
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        stack.push_back(const_cast<Instruction *>(insn));
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Start a recursive decent for depth-first traversal.
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We haven't seen the "true" branch yet. Traverse it now. We have
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // already remembered the "false" branch on the stack and will
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // traverse it later.
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = insn->jt_ptr;
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Now try traversing the "false" branch.
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = NULL;
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // This is a non-jump instruction, just continue to the next instruction
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // (if any). It's OK if "insn" becomes NULL when reaching a return
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // instruction.
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) {
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SANDBOX_DIE("Internal compiler error; return instruction must be at "
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    "the end of the BPF program");
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (seen_instructions.find(insn->next) == seen_instructions.end()) {
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = insn->next;
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We have seen this instruction before. That could happen if it is
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // a branch target. No need to continue processing.
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = NULL;
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while (!insn && !stack.empty()) {
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We are done processing all the way to a leaf node, backtrack up the
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // stack to any branches that we haven't processed yet. By definition,
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // this has to be a "false" branch, as we always process the "true"
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // branches right away.
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      insn = stack.back();
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      stack.pop_back();
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) {
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We haven't seen the "false" branch yet. So, that's where we'll
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // go now.
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = insn->jf_ptr;
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We have seen both the "true" and the "false" branch, continue
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // up the stack.
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) {
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          SANDBOX_DIE("Internal compiler error; cannot find all "
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                      "branch targets");
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn = NULL;
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BasicBlock *CodeGen::MakeBasicBlock(Instruction *head,
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                    Instruction *tail) {
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Iterate over all the instructions between "head" and "tail" and
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // insert them into a new basic block.
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlock *bb = new BasicBlock;
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (;; head = head->next) {
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bb->instructions.push_back(head);
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (head == tail) {
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      break;
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(head->code) == BPF_JMP) {
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SANDBOX_DIE("Found a jump inside of a basic block");
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  basic_blocks_.push_back(bb);
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return bb;
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::AddBasicBlock(Instruction *head,
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            Instruction *tail,
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            const BranchTargets& branch_targets,
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            TargetsToBlocks *basic_blocks,
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                            BasicBlock **firstBlock) {
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add a new basic block to "basic_blocks". Also set "firstBlock", if it
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // has not been set before.
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BranchTargets::const_iterator iter = branch_targets.find(head);
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ((iter == branch_targets.end()) != !*firstBlock ||
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      !*firstBlock != basic_blocks->empty()) {
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Only the very first basic block should have no "
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                "incoming jumps");
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlock *bb = MakeBasicBlock(head, tail);
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!*firstBlock) {
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *firstBlock = bb;
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  (*basic_blocks)[head] = bb;
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)BasicBlock *CodeGen::CutGraphIntoBasicBlocks(
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Instruction *instructions, const BranchTargets& branch_targets,
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    TargetsToBlocks *basic_blocks) {
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Textbook implementation of a basic block generator. All basic blocks
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // start with a branch target and end with either a return statement or
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a jump (or are followed by an instruction that forms the beginning of a
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // new block). Both conditional and "always" jumps are supported.
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlock *first_block = NULL;
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::set<const Instruction *> seen_instructions;
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions stack;
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instruction *tail = NULL;
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instruction *head = instructions;
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (Instruction *insn = head; insn; ) {
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (seen_instructions.find(insn) != seen_instructions.end()) {
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We somehow went in a circle. This should never be possible. Not even
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // cyclic graphs are supposed to confuse us this much.
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SANDBOX_DIE("Internal compiler error; cannot compute basic blocks");
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    seen_instructions.insert(insn);
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (tail && branch_targets.find(insn) != branch_targets.end()) {
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We reached a branch target. Start a new basic block (this means,
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // flushing the previous basic block first).
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      head = insn;
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(insn->code) == BPF_JMP) {
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We reached a jump instruction, this completes our current basic
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // block. Flush it and continue by traversing both the true and the
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // false branch of the jump. We need to maintain a stack to do so.
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block);
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(insn->code) != BPF_JA) {
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        stack.push_back(insn->jf_ptr);
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      insn = insn->jt_ptr;
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // If we are jumping to an instruction that we have previously
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // processed, we are done with this branch. Continue by backtracking
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // up the stack.
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      while (seen_instructions.find(insn) != seen_instructions.end()) {
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      backtracking:
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (stack.empty()) {
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // We successfully traversed all reachable instructions.
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          return first_block;
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Going up the stack.
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          insn = stack.back();
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          stack.pop_back();
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Starting a new basic block.
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      tail = NULL;
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      head = insn;
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We found a non-jumping instruction, append it to current basic
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // block.
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      tail = insn;
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      insn = insn->next;
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!insn) {
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // We reached a return statement, flush the current basic block and
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // backtrack up the stack.
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block);
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        goto backtracking;
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return first_block;
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// We define a comparator that inspects the sequence of instructions in our
4095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// basic block and any blocks referenced by this block. This function can be
4105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// used in a "less" comparator for the purpose of storing pointers to basic
4115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// blocks in STL containers; this gives an easy option to use STL to find
4125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// shared tail  sequences of basic blocks.
4135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int PointerCompare(const BasicBlock *block1, const BasicBlock *block2,
4145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                          const TargetsToBlocks& blocks) {
4155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return <0, 0, or >0 depending on the ordering of "block1" and "block2".
4165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If we are looking at the exact same block, this is trivial and we don't
4175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // need to do a full comparison.
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (block1 == block2) {
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0;
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We compare the sequence of instructions in both basic blocks.
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Instructions& insns1 = block1->instructions;
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const Instructions& insns2 = block2->instructions;
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions::const_iterator iter1 = insns1.begin();
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Instructions::const_iterator iter2 = insns2.begin();
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (;; ++iter1, ++iter2) {
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // If we have reached the end of the sequence of instructions in one or
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // both basic blocks, we know the relative ordering between the two blocks
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // and can return.
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (iter1 == insns1.end()) {
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return iter2 == insns2.end() ? 0 : -1;
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (iter2 == insns2.end()) {
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return 1;
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Compare the individual fields for both instructions.
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Instruction& insn1 = **iter1;
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const Instruction& insn2 = **iter2;
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (insn1.code == insn2.code) {
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (insn1.k == insn2.k) {
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Only conditional jump instructions use the jt_ptr and jf_ptr
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // fields.
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (BPF_CLASS(insn1.code) == BPF_JMP) {
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          if (BPF_OP(insn1.code) != BPF_JA) {
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // Recursively compare the "true" and "false" branches.
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // A well-formed BPF program can't have any cycles, so we know
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // that our recursive algorithm will ultimately terminate.
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // In the unlikely event that the programmer made a mistake and
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // went out of the way to give us a cyclic program, we will crash
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            // with a stack overflow. We are OK with that.
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   blocks.find(insn2.jt_ptr)->second,
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   blocks);
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (c == 0) {
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              c = PointerCompare(blocks.find(insn1.jf_ptr)->second,
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 blocks.find(insn2.jf_ptr)->second,
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                 blocks);
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              if (c == 0) {
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                continue;
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              } else {
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                return c;
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              }
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            } else {
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              return c;
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          } else {
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            int c = PointerCompare(blocks.find(insn1.jt_ptr)->second,
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   blocks.find(insn2.jt_ptr)->second,
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   blocks);
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (c == 0) {
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              continue;
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            } else {
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)              return c;
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          }
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          continue;
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return insn1.k - insn2.k;
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return insn1.code - insn2.code;
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::MergeTails(TargetsToBlocks *blocks) {
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We enter all of our basic blocks into a set using the BasicBlock::Less()
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // comparator. This naturally results in blocks with identical tails of
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // instructions to map to the same entry in the set. Whenever we discover
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that a particular chain of instructions is already in the set, we merge
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the basic blocks and update the pointer in the "blocks" map.
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the number of unique basic blocks.
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // N.B. We don't merge instructions on a granularity that is finer than
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      a basic block. In practice, this is sufficiently rare that we don't
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      incur a big cost.
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      Similarly, we currently don't merge anything other than tails. In
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      the future, we might decide to revisit this decision and attempt to
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //      merge arbitrary sub-sequences of instructions.
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare);
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  typedef std::set<BasicBlock *, BasicBlock::Less<TargetsToBlocks> > Set;
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Set seen_basic_blocks(less);
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (TargetsToBlocks::iterator iter = blocks->begin();
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != blocks->end();
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    BasicBlock *bb = iter->second;
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Set::const_iterator entry = seen_basic_blocks.find(bb);
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (entry == seen_basic_blocks.end()) {
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // This is the first time we see this particular sequence of
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // instructions. Enter the basic block into the set of known
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // basic blocks.
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      seen_basic_blocks.insert(bb);
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We have previously seen another basic block that defines the same
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // sequence of instructions. Merge the two blocks and update the
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // pointer in the "blocks" map.
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      iter->second = *entry;
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::ComputeIncomingBranches(BasicBlock *block,
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      const TargetsToBlocks& targets_to_blocks,
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      IncomingBranches *incoming_branches) {
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We increment the number of incoming branches each time we encounter a
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // basic block. But we only traverse recursively the very first time we
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // encounter a new block. This is necessary to make topological sorting
5305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // work correctly.
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (++(*incoming_branches)[block] == 1) {
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Instruction *last_insn = block->instructions.back();
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ComputeIncomingBranches(
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             targets_to_blocks.find(last_insn->jt_ptr)->second,
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             targets_to_blocks, incoming_branches);
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(last_insn->code) != BPF_JA) {
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ComputeIncomingBranches(
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             targets_to_blocks.find(last_insn->jf_ptr)->second,
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             targets_to_blocks, incoming_branches);
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second,
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              targets_to_blocks, incoming_branches);
5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::TopoSortBasicBlocks(BasicBlock *first_block,
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  const TargetsToBlocks& blocks,
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  BasicBlocks *basic_blocks) {
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Textbook implementation of a toposort. We keep looking for basic blocks
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that don't have any incoming branches (initially, this is just the
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "first_block") and add them to the topologically sorted list of
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "basic_blocks". As we do so, we remove outgoing branches. This potentially
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // ends up making our descendants eligible for the sorted list. The
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // sorting algorithm terminates when there are no more basic blocks that have
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // no incoming branches. If we didn't move all blocks from the set of
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "unordered_blocks" to the sorted list of "basic_blocks", there must have
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // been a cyclic dependency. This should never happen in a BPF program, as
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // well-formed BPF programs only ever have forward branches.
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  IncomingBranches unordered_blocks;
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ComputeIncomingBranches(first_block, blocks, &unordered_blocks);
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::set<BasicBlock *> heads;
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (;;) {
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Move block from "unordered_blocks" to "basic_blocks".
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    basic_blocks->push_back(first_block);
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Inspect last instruction in the basic block. This is typically either a
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // jump or a return statement. But it could also be a "normal" instruction
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // that is followed by a jump target.
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Instruction *last_insn = first_block->instructions.back();
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(last_insn->code) == BPF_JMP) {
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Remove outgoing branches. This might end up moving our descendants
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // into set of "head" nodes that no longer have any incoming branches.
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      TargetsToBlocks::const_iterator iter;
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(last_insn->code) != BPF_JA) {
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        iter = blocks.find(last_insn->jf_ptr);
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (!--unordered_blocks[iter->second]) {
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          heads.insert(iter->second);
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      iter = blocks.find(last_insn->jt_ptr);
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!--unordered_blocks[iter->second]) {
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        first_block = iter->second;
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (BPF_CLASS(last_insn->code) != BPF_RET) {
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We encountered an instruction that doesn't change code flow. Try to
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // pick the next "first_block" from "last_insn->next", if possible.
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      TargetsToBlocks::const_iterator iter;
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      iter = blocks.find(last_insn->next);
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (!--unordered_blocks[iter->second]) {
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        first_block = iter->second;
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Our basic block is supposed to be followed by "last_insn->next",
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // but dependencies prevent this from happening. Insert a BPF_JA
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // instruction to correct the code flow.
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, last_insn->next);
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        first_block->instructions.push_back(ja);
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        last_insn->next = ja;
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (heads.empty()) {
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (unordered_blocks.size() != basic_blocks->size()) {
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        SANDBOX_DIE("Internal compiler error; cyclic graph detected");
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Proceed by picking an arbitrary node from the set of basic blocks that
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // do not have any incoming branches.
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    first_block = *heads.begin();
6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    heads.erase(heads.begin());
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::ComputeRelativeJumps(BasicBlocks *basic_blocks,
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                   const TargetsToBlocks& targets_to_blocks) {
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // While we previously used pointers in jt_ptr and jf_ptr to link jump
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // instructions to their targets, we now convert these jumps to relative
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // jumps that are suitable for loading the BPF program into the kernel.
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int offset = 0;
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Since we just completed a toposort, all jump targets are guaranteed to
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // go forward. This means, iterating over the basic blocks in reverse makes
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // it trivial to compute the correct offsets.
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlock *bb = NULL;
6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlock *last_bb = NULL;
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin();
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != basic_blocks->rend();
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter) {
6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    last_bb = bb;
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bb = *iter;
6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Instruction *insn = bb->instructions.back();
6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (BPF_CLASS(insn->code) == BPF_JMP) {
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Basic block ended in a jump instruction. We can now compute the
6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // appropriate offsets.
6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (BPF_OP(insn->code) == BPF_JA) {
6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // "Always" jumps use the 32bit "k" field for the offset, instead
6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // of the 8bit "jt" and "jf" fields.
6435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        int jmp =
6445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          offset - targets_to_blocks.find(insn->jt_ptr)->second->offset;
6455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn->k  = jmp;
6465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn->jt = insn->jf = 0;
6475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
6485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // The offset computations for conditional jumps are just the same
6495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // as for "always" jumps.
6505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        int jt = offset-targets_to_blocks.find(insn->jt_ptr)->second->offset;
6515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        int jf = offset-targets_to_blocks.find(insn->jf_ptr)->second->offset;
6525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // There is an added complication, because conditional relative jumps
6545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // can only jump at most 255 instructions forward. If we have to jump
6555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // further, insert an extra "always" jump.
6565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        Instructions::size_type jmp = bb->instructions.size();
6575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (jt > 255 || (jt == 255 && jf > 255)) {
6585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jt_ptr);
6595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          bb->instructions.push_back(ja);
6605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ja->k  = jt;
6615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ja->jt = ja->jf = 0;
6625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // The newly inserted "always" jump, of course, requires us to adjust
6645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // the jump targets in the original conditional jump.
6655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          jt = 0;
6665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ++jf;
6675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (jf > 255) {
6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jf_ptr);
6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          bb->instructions.insert(bb->instructions.begin() + jmp, ja);
6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ja->k  = jf;
6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ja->jt = ja->jf = 0;
6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Again, we have to adjust the jump targets in the original
6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // conditional jump.
6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ++jt;
6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          jf = 0;
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Now we can finally set the relative jump targets in the conditional
6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // jump instruction. Afterwards, we must no longer access the jt_ptr
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // and jf_ptr fields.
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn->jt = jt;
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        insn->jf = jf;
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (BPF_CLASS(insn->code) != BPF_RET &&
6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               targets_to_blocks.find(insn->next)->second != last_bb) {
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      SANDBOX_DIE("Internal compiler error; invalid basic block encountered");
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Proceed to next basic block.
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    offset += bb->instructions.size();
6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bb->offset = offset;
6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks,
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                     Sandbox::Program *program) {
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Our basic blocks have been sorted and relative jump offsets have been
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // computed. The last remaining step is for all the instructions in our
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // basic blocks to be concatenated into a BPF program.
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  program->clear();
7045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin();
7055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       bb_iter != basic_blocks.end();
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++bb_iter) {
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const BasicBlock& bb = **bb_iter;
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (Instructions::const_iterator insn_iter = bb.instructions.begin();
7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         insn_iter != bb.instructions.end();
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         ++insn_iter) {
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const Instruction& insn = **insn_iter;
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      program->push_back(
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (struct sock_filter) { insn.code, insn.jt, insn.jf, insn.k });
7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void CodeGen::Compile(Instruction *instructions, Sandbox::Program *program) {
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (compiled_) {
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SANDBOX_DIE("Cannot call Compile() multiple times. Create a new code "
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                "generator instead");
7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  compiled_ = true;
7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BranchTargets branch_targets;
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FindBranchTargets(*instructions, &branch_targets);
7285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TargetsToBlocks all_blocks;
7295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlock *first_block =
7305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks);
7315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  MergeTails(&all_blocks);
7325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  BasicBlocks basic_blocks;
7335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks);
7345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ComputeRelativeJumps(&basic_blocks, all_blocks);
7355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ConcatenateBasicBlocks(basic_blocks, program);
7365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return;
7375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
740