1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "sandbox/linux/seccomp-bpf/codegen.h" 6 7#include <stdio.h> 8 9#include <set> 10 11#include "base/logging.h" 12#include "sandbox/linux/seccomp-bpf/basicblock.h" 13#include "sandbox/linux/seccomp-bpf/die.h" 14#include "sandbox/linux/seccomp-bpf/instruction.h" 15#include "sandbox/linux/seccomp-bpf/linux_seccomp.h" 16 17namespace { 18 19// Helper function for Traverse(). 20void TraverseRecursively(std::set<sandbox::Instruction*>* visited, 21 sandbox::Instruction* instruction) { 22 if (visited->find(instruction) == visited->end()) { 23 visited->insert(instruction); 24 switch (BPF_CLASS(instruction->code)) { 25 case BPF_JMP: 26 if (BPF_OP(instruction->code) != BPF_JA) { 27 TraverseRecursively(visited, instruction->jf_ptr); 28 } 29 TraverseRecursively(visited, instruction->jt_ptr); 30 break; 31 case BPF_RET: 32 break; 33 default: 34 TraverseRecursively(visited, instruction->next); 35 break; 36 } 37 } 38} 39 40} // namespace 41 42namespace sandbox { 43 44CodeGen::CodeGen() : compiled_(false) {} 45 46CodeGen::~CodeGen() { 47 for (Instructions::iterator iter = instructions_.begin(); 48 iter != instructions_.end(); 49 ++iter) { 50 delete *iter; 51 } 52 for (BasicBlocks::iterator iter = basic_blocks_.begin(); 53 iter != basic_blocks_.end(); 54 ++iter) { 55 delete *iter; 56 } 57} 58 59void CodeGen::PrintProgram(const SandboxBPF::Program& program) { 60 for (SandboxBPF::Program::const_iterator iter = program.begin(); 61 iter != program.end(); 62 ++iter) { 63 int ip = (int)(iter - program.begin()); 64 fprintf(stderr, "%3d) ", ip); 65 switch (BPF_CLASS(iter->code)) { 66 case BPF_LD: 67 if (iter->code == BPF_LD + BPF_W + BPF_ABS) { 68 fprintf(stderr, "LOAD %d // ", (int)iter->k); 69 if (iter->k == offsetof(struct arch_seccomp_data, nr)) { 70 fprintf(stderr, "System call number\n"); 71 } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) { 72 fprintf(stderr, "Architecture\n"); 73 } else if (iter->k == 74 offsetof(struct arch_seccomp_data, instruction_pointer)) { 75 fprintf(stderr, "Instruction pointer (LSB)\n"); 76 } else if (iter->k == 77 offsetof(struct arch_seccomp_data, instruction_pointer) + 78 4) { 79 fprintf(stderr, "Instruction pointer (MSB)\n"); 80 } else if (iter->k >= offsetof(struct arch_seccomp_data, args) && 81 iter->k < offsetof(struct arch_seccomp_data, args) + 48 && 82 (iter->k - offsetof(struct arch_seccomp_data, args)) % 4 == 83 0) { 84 fprintf( 85 stderr, 86 "Argument %d (%cSB)\n", 87 (int)(iter->k - offsetof(struct arch_seccomp_data, args)) / 8, 88 (iter->k - offsetof(struct arch_seccomp_data, args)) % 8 ? 'M' 89 : 'L'); 90 } else { 91 fprintf(stderr, "???\n"); 92 } 93 } else { 94 fprintf(stderr, "LOAD ???\n"); 95 } 96 break; 97 case BPF_JMP: 98 if (BPF_OP(iter->code) == BPF_JA) { 99 fprintf(stderr, "JMP %d\n", ip + iter->k + 1); 100 } else { 101 fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n", 102 BPF_OP(iter->code) == BPF_JSET ? "&" : 103 BPF_OP(iter->code) == BPF_JEQ ? "==" : 104 BPF_OP(iter->code) == BPF_JGE ? ">=" : 105 BPF_OP(iter->code) == BPF_JGT ? ">" : "???", 106 (int)iter->k, 107 ip + iter->jt + 1, ip + iter->jf + 1); 108 } 109 break; 110 case BPF_RET: 111 fprintf(stderr, "RET 0x%x // ", iter->k); 112 if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) { 113 fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA); 114 } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { 115 fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA); 116 } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRACE) { 117 fprintf(stderr, "Trace #%d\n", iter->k & SECCOMP_RET_DATA); 118 } else if (iter->k == SECCOMP_RET_ALLOW) { 119 fprintf(stderr, "Allowed\n"); 120 } else { 121 fprintf(stderr, "???\n"); 122 } 123 break; 124 case BPF_ALU: 125 fprintf(stderr, BPF_OP(iter->code) == BPF_NEG 126 ? "A := -A\n" : "A := A %s 0x%x\n", 127 BPF_OP(iter->code) == BPF_ADD ? "+" : 128 BPF_OP(iter->code) == BPF_SUB ? "-" : 129 BPF_OP(iter->code) == BPF_MUL ? "*" : 130 BPF_OP(iter->code) == BPF_DIV ? "/" : 131 BPF_OP(iter->code) == BPF_MOD ? "%" : 132 BPF_OP(iter->code) == BPF_OR ? "|" : 133 BPF_OP(iter->code) == BPF_XOR ? "^" : 134 BPF_OP(iter->code) == BPF_AND ? "&" : 135 BPF_OP(iter->code) == BPF_LSH ? "<<" : 136 BPF_OP(iter->code) == BPF_RSH ? ">>" : "???", 137 (int)iter->k); 138 break; 139 default: 140 fprintf(stderr, "???\n"); 141 break; 142 } 143 } 144 return; 145} 146 147Instruction* CodeGen::MakeInstruction(uint16_t code, 148 uint32_t k, 149 Instruction* next) { 150 // We can handle non-jumping instructions and "always" jumps. Both of 151 // them are followed by exactly one "next" instruction. 152 // We allow callers to defer specifying "next", but then they must call 153 // "joinInstructions" later. 154 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) { 155 SANDBOX_DIE( 156 "Must provide both \"true\" and \"false\" branch " 157 "for a BPF_JMP"); 158 } 159 if (next && BPF_CLASS(code) == BPF_RET) { 160 SANDBOX_DIE("Cannot append instructions after a return statement"); 161 } 162 if (BPF_CLASS(code) == BPF_JMP) { 163 // "Always" jumps use the "true" branch target, only. 164 Instruction* insn = new Instruction(code, 0, next, NULL); 165 instructions_.push_back(insn); 166 return insn; 167 } else { 168 // Non-jumping instructions do not use any of the branch targets. 169 Instruction* insn = new Instruction(code, k, next); 170 instructions_.push_back(insn); 171 return insn; 172 } 173} 174 175Instruction* CodeGen::MakeInstruction(uint16_t code, 176 uint32_t k, 177 Instruction* jt, 178 Instruction* jf) { 179 // We can handle all conditional jumps. They are followed by both a 180 // "true" and a "false" branch. 181 if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) { 182 SANDBOX_DIE("Expected a BPF_JMP instruction"); 183 } 184 if (!jt || !jf) { 185 SANDBOX_DIE("Branches must jump to a valid instruction"); 186 } 187 Instruction* insn = new Instruction(code, k, jt, jf); 188 instructions_.push_back(insn); 189 return insn; 190} 191 192void CodeGen::Traverse(Instruction* instruction, 193 void (*fnc)(Instruction*, void*), 194 void* aux) { 195 std::set<Instruction*> visited; 196 TraverseRecursively(&visited, instruction); 197 for (std::set<Instruction*>::const_iterator iter = visited.begin(); 198 iter != visited.end(); 199 ++iter) { 200 fnc(*iter, aux); 201 } 202} 203 204void CodeGen::FindBranchTargets(const Instruction& instructions, 205 BranchTargets* branch_targets) { 206 // Follow all possible paths through the "instructions" graph and compute 207 // a list of branch targets. This will later be needed to compute the 208 // boundaries of basic blocks. 209 // We maintain a set of all instructions that we have previously seen. This 210 // set ultimately converges on all instructions in the program. 211 std::set<const Instruction*> seen_instructions; 212 Instructions stack; 213 for (const Instruction* insn = &instructions; insn;) { 214 seen_instructions.insert(insn); 215 if (BPF_CLASS(insn->code) == BPF_JMP) { 216 // Found a jump. Increase count of incoming edges for each of the jump 217 // targets. 218 ++(*branch_targets)[insn->jt_ptr]; 219 if (BPF_OP(insn->code) != BPF_JA) { 220 ++(*branch_targets)[insn->jf_ptr]; 221 stack.push_back(const_cast<Instruction*>(insn)); 222 } 223 // Start a recursive decent for depth-first traversal. 224 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { 225 // We haven't seen the "true" branch yet. Traverse it now. We have 226 // already remembered the "false" branch on the stack and will 227 // traverse it later. 228 insn = insn->jt_ptr; 229 continue; 230 } else { 231 // Now try traversing the "false" branch. 232 insn = NULL; 233 } 234 } else { 235 // This is a non-jump instruction, just continue to the next instruction 236 // (if any). It's OK if "insn" becomes NULL when reaching a return 237 // instruction. 238 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) { 239 SANDBOX_DIE( 240 "Internal compiler error; return instruction must be at " 241 "the end of the BPF program"); 242 } 243 if (seen_instructions.find(insn->next) == seen_instructions.end()) { 244 insn = insn->next; 245 } else { 246 // We have seen this instruction before. That could happen if it is 247 // a branch target. No need to continue processing. 248 insn = NULL; 249 } 250 } 251 while (!insn && !stack.empty()) { 252 // We are done processing all the way to a leaf node, backtrack up the 253 // stack to any branches that we haven't processed yet. By definition, 254 // this has to be a "false" branch, as we always process the "true" 255 // branches right away. 256 insn = stack.back(); 257 stack.pop_back(); 258 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) { 259 // We haven't seen the "false" branch yet. So, that's where we'll 260 // go now. 261 insn = insn->jf_ptr; 262 } else { 263 // We have seen both the "true" and the "false" branch, continue 264 // up the stack. 265 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { 266 SANDBOX_DIE( 267 "Internal compiler error; cannot find all " 268 "branch targets"); 269 } 270 insn = NULL; 271 } 272 } 273 } 274 return; 275} 276 277BasicBlock* CodeGen::MakeBasicBlock(Instruction* head, Instruction* tail) { 278 // Iterate over all the instructions between "head" and "tail" and 279 // insert them into a new basic block. 280 BasicBlock* bb = new BasicBlock; 281 for (;; head = head->next) { 282 bb->instructions.push_back(head); 283 if (head == tail) { 284 break; 285 } 286 if (BPF_CLASS(head->code) == BPF_JMP) { 287 SANDBOX_DIE("Found a jump inside of a basic block"); 288 } 289 } 290 basic_blocks_.push_back(bb); 291 return bb; 292} 293 294void CodeGen::AddBasicBlock(Instruction* head, 295 Instruction* tail, 296 const BranchTargets& branch_targets, 297 TargetsToBlocks* basic_blocks, 298 BasicBlock** firstBlock) { 299 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it 300 // has not been set before. 301 BranchTargets::const_iterator iter = branch_targets.find(head); 302 if ((iter == branch_targets.end()) != !*firstBlock || 303 !*firstBlock != basic_blocks->empty()) { 304 SANDBOX_DIE( 305 "Only the very first basic block should have no " 306 "incoming jumps"); 307 } 308 BasicBlock* bb = MakeBasicBlock(head, tail); 309 if (!*firstBlock) { 310 *firstBlock = bb; 311 } 312 (*basic_blocks)[head] = bb; 313 return; 314} 315 316BasicBlock* CodeGen::CutGraphIntoBasicBlocks( 317 Instruction* instructions, 318 const BranchTargets& branch_targets, 319 TargetsToBlocks* basic_blocks) { 320 // Textbook implementation of a basic block generator. All basic blocks 321 // start with a branch target and end with either a return statement or 322 // a jump (or are followed by an instruction that forms the beginning of a 323 // new block). Both conditional and "always" jumps are supported. 324 BasicBlock* first_block = NULL; 325 std::set<const Instruction*> seen_instructions; 326 Instructions stack; 327 Instruction* tail = NULL; 328 Instruction* head = instructions; 329 for (Instruction* insn = head; insn;) { 330 if (seen_instructions.find(insn) != seen_instructions.end()) { 331 // We somehow went in a circle. This should never be possible. Not even 332 // cyclic graphs are supposed to confuse us this much. 333 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks"); 334 } 335 seen_instructions.insert(insn); 336 if (tail && branch_targets.find(insn) != branch_targets.end()) { 337 // We reached a branch target. Start a new basic block (this means, 338 // flushing the previous basic block first). 339 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); 340 head = insn; 341 } 342 if (BPF_CLASS(insn->code) == BPF_JMP) { 343 // We reached a jump instruction, this completes our current basic 344 // block. Flush it and continue by traversing both the true and the 345 // false branch of the jump. We need to maintain a stack to do so. 346 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block); 347 if (BPF_OP(insn->code) != BPF_JA) { 348 stack.push_back(insn->jf_ptr); 349 } 350 insn = insn->jt_ptr; 351 352 // If we are jumping to an instruction that we have previously 353 // processed, we are done with this branch. Continue by backtracking 354 // up the stack. 355 while (seen_instructions.find(insn) != seen_instructions.end()) { 356 backtracking: 357 if (stack.empty()) { 358 // We successfully traversed all reachable instructions. 359 return first_block; 360 } else { 361 // Going up the stack. 362 insn = stack.back(); 363 stack.pop_back(); 364 } 365 } 366 // Starting a new basic block. 367 tail = NULL; 368 head = insn; 369 } else { 370 // We found a non-jumping instruction, append it to current basic 371 // block. 372 tail = insn; 373 insn = insn->next; 374 if (!insn) { 375 // We reached a return statement, flush the current basic block and 376 // backtrack up the stack. 377 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); 378 goto backtracking; 379 } 380 } 381 } 382 return first_block; 383} 384 385// We define a comparator that inspects the sequence of instructions in our 386// basic block and any blocks referenced by this block. This function can be 387// used in a "less" comparator for the purpose of storing pointers to basic 388// blocks in STL containers; this gives an easy option to use STL to find 389// shared tail sequences of basic blocks. 390static int PointerCompare(const BasicBlock* block1, 391 const BasicBlock* block2, 392 const TargetsToBlocks& blocks) { 393 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2". 394 // If we are looking at the exact same block, this is trivial and we don't 395 // need to do a full comparison. 396 if (block1 == block2) { 397 return 0; 398 } 399 400 // We compare the sequence of instructions in both basic blocks. 401 const Instructions& insns1 = block1->instructions; 402 const Instructions& insns2 = block2->instructions; 403 // Basic blocks should never be empty. 404 CHECK(!insns1.empty()); 405 CHECK(!insns2.empty()); 406 407 Instructions::const_iterator iter1 = insns1.begin(); 408 Instructions::const_iterator iter2 = insns2.begin(); 409 for (;; ++iter1, ++iter2) { 410 // If we have reached the end of the sequence of instructions in one or 411 // both basic blocks, we know the relative ordering between the two blocks 412 // and can return. 413 if (iter1 == insns1.end() || iter2 == insns2.end()) { 414 if (iter1 != insns1.end()) { 415 return 1; 416 } 417 if (iter2 != insns2.end()) { 418 return -1; 419 } 420 421 // If the two blocks are the same length (and have elementwise-equal code 422 // and k fields) and their last instructions are neither a JMP nor a RET 423 // (which is the only way we can reach this point), then we must compare 424 // their successors. 425 Instruction* const insns1_last = insns1.back(); 426 Instruction* const insns2_last = insns2.back(); 427 CHECK(BPF_CLASS(insns1_last->code) != BPF_JMP && 428 BPF_CLASS(insns1_last->code) != BPF_RET); 429 430 // Non jumping instructions will always have a valid next instruction. 431 CHECK(insns1_last->next); 432 CHECK(insns2_last->next); 433 return PointerCompare(blocks.find(insns1_last->next)->second, 434 blocks.find(insns2_last->next)->second, 435 blocks); 436 } 437 438 // Compare the individual fields for both instructions. 439 const Instruction& insn1 = **iter1; 440 const Instruction& insn2 = **iter2; 441 if (insn1.code != insn2.code) { 442 return insn1.code - insn2.code; 443 } 444 if (insn1.k != insn2.k) { 445 return insn1.k - insn2.k; 446 } 447 448 // Sanity check: If we're looking at a JMP or RET instruction, by definition 449 // it should be the last instruction of the basic block. 450 if (BPF_CLASS(insn1.code) == BPF_JMP || BPF_CLASS(insn1.code) == BPF_RET) { 451 CHECK_EQ(insns1.back(), &insn1); 452 CHECK_EQ(insns2.back(), &insn2); 453 } 454 455 // RET instructions terminate execution, and only JMP instructions use the 456 // jt_ptr and jf_ptr fields. Anything else can continue to the next 457 // instruction in the basic block. 458 if (BPF_CLASS(insn1.code) == BPF_RET) { 459 return 0; 460 } else if (BPF_CLASS(insn1.code) != BPF_JMP) { 461 continue; 462 } 463 464 // Recursively compare the "true" and "false" branches. 465 // A well-formed BPF program can't have any cycles, so we know 466 // that our recursive algorithm will ultimately terminate. 467 // In the unlikely event that the programmer made a mistake and 468 // went out of the way to give us a cyclic program, we will crash 469 // with a stack overflow. We are OK with that. 470 if (BPF_OP(insn1.code) != BPF_JA) { 471 int c = PointerCompare(blocks.find(insn1.jf_ptr)->second, 472 blocks.find(insn2.jf_ptr)->second, 473 blocks); 474 if (c != 0) { 475 return c; 476 } 477 } 478 return PointerCompare(blocks.find(insn1.jt_ptr)->second, 479 blocks.find(insn2.jt_ptr)->second, 480 blocks); 481 } 482} 483 484void CodeGen::MergeTails(TargetsToBlocks* blocks) { 485 // We enter all of our basic blocks into a set using the BasicBlock::Less() 486 // comparator. This naturally results in blocks with identical tails of 487 // instructions to map to the same entry in the set. Whenever we discover 488 // that a particular chain of instructions is already in the set, we merge 489 // the basic blocks and update the pointer in the "blocks" map. 490 // Returns the number of unique basic blocks. 491 // N.B. We don't merge instructions on a granularity that is finer than 492 // a basic block. In practice, this is sufficiently rare that we don't 493 // incur a big cost. 494 // Similarly, we currently don't merge anything other than tails. In 495 // the future, we might decide to revisit this decision and attempt to 496 // merge arbitrary sub-sequences of instructions. 497 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare); 498 typedef std::set<BasicBlock*, BasicBlock::Less<TargetsToBlocks> > Set; 499 Set seen_basic_blocks(less); 500 for (TargetsToBlocks::iterator iter = blocks->begin(); iter != blocks->end(); 501 ++iter) { 502 BasicBlock* bb = iter->second; 503 Set::const_iterator entry = seen_basic_blocks.find(bb); 504 if (entry == seen_basic_blocks.end()) { 505 // This is the first time we see this particular sequence of 506 // instructions. Enter the basic block into the set of known 507 // basic blocks. 508 seen_basic_blocks.insert(bb); 509 } else { 510 // We have previously seen another basic block that defines the same 511 // sequence of instructions. Merge the two blocks and update the 512 // pointer in the "blocks" map. 513 iter->second = *entry; 514 } 515 } 516} 517 518void CodeGen::ComputeIncomingBranches(BasicBlock* block, 519 const TargetsToBlocks& targets_to_blocks, 520 IncomingBranches* incoming_branches) { 521 // We increment the number of incoming branches each time we encounter a 522 // basic block. But we only traverse recursively the very first time we 523 // encounter a new block. This is necessary to make topological sorting 524 // work correctly. 525 if (++(*incoming_branches)[block] == 1) { 526 Instruction* last_insn = block->instructions.back(); 527 if (BPF_CLASS(last_insn->code) == BPF_JMP) { 528 ComputeIncomingBranches(targets_to_blocks.find(last_insn->jt_ptr)->second, 529 targets_to_blocks, 530 incoming_branches); 531 if (BPF_OP(last_insn->code) != BPF_JA) { 532 ComputeIncomingBranches( 533 targets_to_blocks.find(last_insn->jf_ptr)->second, 534 targets_to_blocks, 535 incoming_branches); 536 } 537 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { 538 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second, 539 targets_to_blocks, 540 incoming_branches); 541 } 542 } 543} 544 545void CodeGen::TopoSortBasicBlocks(BasicBlock* first_block, 546 const TargetsToBlocks& blocks, 547 BasicBlocks* basic_blocks) { 548 // Textbook implementation of a toposort. We keep looking for basic blocks 549 // that don't have any incoming branches (initially, this is just the 550 // "first_block") and add them to the topologically sorted list of 551 // "basic_blocks". As we do so, we remove outgoing branches. This potentially 552 // ends up making our descendants eligible for the sorted list. The 553 // sorting algorithm terminates when there are no more basic blocks that have 554 // no incoming branches. If we didn't move all blocks from the set of 555 // "unordered_blocks" to the sorted list of "basic_blocks", there must have 556 // been a cyclic dependency. This should never happen in a BPF program, as 557 // well-formed BPF programs only ever have forward branches. 558 IncomingBranches unordered_blocks; 559 ComputeIncomingBranches(first_block, blocks, &unordered_blocks); 560 561 std::set<BasicBlock*> heads; 562 for (;;) { 563 // Move block from "unordered_blocks" to "basic_blocks". 564 basic_blocks->push_back(first_block); 565 566 // Inspect last instruction in the basic block. This is typically either a 567 // jump or a return statement. But it could also be a "normal" instruction 568 // that is followed by a jump target. 569 Instruction* last_insn = first_block->instructions.back(); 570 if (BPF_CLASS(last_insn->code) == BPF_JMP) { 571 // Remove outgoing branches. This might end up moving our descendants 572 // into set of "head" nodes that no longer have any incoming branches. 573 TargetsToBlocks::const_iterator iter; 574 if (BPF_OP(last_insn->code) != BPF_JA) { 575 iter = blocks.find(last_insn->jf_ptr); 576 if (!--unordered_blocks[iter->second]) { 577 heads.insert(iter->second); 578 } 579 } 580 iter = blocks.find(last_insn->jt_ptr); 581 if (!--unordered_blocks[iter->second]) { 582 first_block = iter->second; 583 continue; 584 } 585 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { 586 // We encountered an instruction that doesn't change code flow. Try to 587 // pick the next "first_block" from "last_insn->next", if possible. 588 TargetsToBlocks::const_iterator iter; 589 iter = blocks.find(last_insn->next); 590 if (!--unordered_blocks[iter->second]) { 591 first_block = iter->second; 592 continue; 593 } else { 594 // Our basic block is supposed to be followed by "last_insn->next", 595 // but dependencies prevent this from happening. Insert a BPF_JA 596 // instruction to correct the code flow. 597 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, last_insn->next); 598 first_block->instructions.push_back(ja); 599 last_insn->next = ja; 600 } 601 } 602 if (heads.empty()) { 603 if (unordered_blocks.size() != basic_blocks->size()) { 604 SANDBOX_DIE("Internal compiler error; cyclic graph detected"); 605 } 606 return; 607 } 608 // Proceed by picking an arbitrary node from the set of basic blocks that 609 // do not have any incoming branches. 610 first_block = *heads.begin(); 611 heads.erase(heads.begin()); 612 } 613} 614 615void CodeGen::ComputeRelativeJumps(BasicBlocks* basic_blocks, 616 const TargetsToBlocks& targets_to_blocks) { 617 // While we previously used pointers in jt_ptr and jf_ptr to link jump 618 // instructions to their targets, we now convert these jumps to relative 619 // jumps that are suitable for loading the BPF program into the kernel. 620 int offset = 0; 621 622 // Since we just completed a toposort, all jump targets are guaranteed to 623 // go forward. This means, iterating over the basic blocks in reverse makes 624 // it trivial to compute the correct offsets. 625 BasicBlock* bb = NULL; 626 BasicBlock* last_bb = NULL; 627 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin(); 628 iter != basic_blocks->rend(); 629 ++iter) { 630 last_bb = bb; 631 bb = *iter; 632 Instruction* insn = bb->instructions.back(); 633 if (BPF_CLASS(insn->code) == BPF_JMP) { 634 // Basic block ended in a jump instruction. We can now compute the 635 // appropriate offsets. 636 if (BPF_OP(insn->code) == BPF_JA) { 637 // "Always" jumps use the 32bit "k" field for the offset, instead 638 // of the 8bit "jt" and "jf" fields. 639 int jmp = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; 640 insn->k = jmp; 641 insn->jt = insn->jf = 0; 642 } else { 643 // The offset computations for conditional jumps are just the same 644 // as for "always" jumps. 645 int jt = offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; 646 int jf = offset - targets_to_blocks.find(insn->jf_ptr)->second->offset; 647 648 // There is an added complication, because conditional relative jumps 649 // can only jump at most 255 instructions forward. If we have to jump 650 // further, insert an extra "always" jump. 651 Instructions::size_type jmp = bb->instructions.size(); 652 if (jt > 255 || (jt == 255 && jf > 255)) { 653 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jt_ptr); 654 bb->instructions.push_back(ja); 655 ja->k = jt; 656 ja->jt = ja->jf = 0; 657 658 // The newly inserted "always" jump, of course, requires us to adjust 659 // the jump targets in the original conditional jump. 660 jt = 0; 661 ++jf; 662 } 663 if (jf > 255) { 664 Instruction* ja = MakeInstruction(BPF_JMP + BPF_JA, 0, insn->jf_ptr); 665 bb->instructions.insert(bb->instructions.begin() + jmp, ja); 666 ja->k = jf; 667 ja->jt = ja->jf = 0; 668 669 // Again, we have to adjust the jump targets in the original 670 // conditional jump. 671 ++jt; 672 jf = 0; 673 } 674 675 // Now we can finally set the relative jump targets in the conditional 676 // jump instruction. Afterwards, we must no longer access the jt_ptr 677 // and jf_ptr fields. 678 insn->jt = jt; 679 insn->jf = jf; 680 } 681 } else if (BPF_CLASS(insn->code) != BPF_RET && 682 targets_to_blocks.find(insn->next)->second != last_bb) { 683 SANDBOX_DIE("Internal compiler error; invalid basic block encountered"); 684 } 685 686 // Proceed to next basic block. 687 offset += bb->instructions.size(); 688 bb->offset = offset; 689 } 690 return; 691} 692 693void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks, 694 SandboxBPF::Program* program) { 695 // Our basic blocks have been sorted and relative jump offsets have been 696 // computed. The last remaining step is for all the instructions in our 697 // basic blocks to be concatenated into a BPF program. 698 program->clear(); 699 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin(); 700 bb_iter != basic_blocks.end(); 701 ++bb_iter) { 702 const BasicBlock& bb = **bb_iter; 703 for (Instructions::const_iterator insn_iter = bb.instructions.begin(); 704 insn_iter != bb.instructions.end(); 705 ++insn_iter) { 706 const Instruction& insn = **insn_iter; 707 program->push_back( 708 (struct sock_filter) {insn.code, insn.jt, insn.jf, insn.k}); 709 } 710 } 711 return; 712} 713 714void CodeGen::Compile(Instruction* instructions, SandboxBPF::Program* program) { 715 if (compiled_) { 716 SANDBOX_DIE( 717 "Cannot call Compile() multiple times. Create a new code " 718 "generator instead"); 719 } 720 compiled_ = true; 721 722 BranchTargets branch_targets; 723 FindBranchTargets(*instructions, &branch_targets); 724 TargetsToBlocks all_blocks; 725 BasicBlock* first_block = 726 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks); 727 MergeTails(&all_blocks); 728 BasicBlocks basic_blocks; 729 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks); 730 ComputeRelativeJumps(&basic_blocks, all_blocks); 731 ConcatenateBasicBlocks(basic_blocks, program); 732 return; 733} 734 735} // namespace sandbox 736