1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include <stdio.h> 6 7#include "sandbox/linux/seccomp-bpf/codegen.h" 8 9 10namespace { 11 12// Helper function for Traverse(). 13void TraverseRecursively(std::set<playground2::Instruction *> *visited, 14 playground2::Instruction *instruction) { 15 if (visited->find(instruction) == visited->end()) { 16 visited->insert(instruction); 17 switch (BPF_CLASS(instruction->code)) { 18 case BPF_JMP: 19 if (BPF_OP(instruction->code) != BPF_JA) { 20 TraverseRecursively(visited, instruction->jf_ptr); 21 } 22 TraverseRecursively(visited, instruction->jt_ptr); 23 break; 24 case BPF_RET: 25 break; 26 default: 27 TraverseRecursively(visited, instruction->next); 28 break; 29 } 30 } 31} 32 33} // namespace 34 35namespace playground2 { 36 37CodeGen::CodeGen() 38 : compiled_(false) { 39} 40 41CodeGen::~CodeGen() { 42 for (Instructions::iterator iter = instructions_.begin(); 43 iter != instructions_.end(); 44 ++iter) { 45 delete *iter; 46 } 47 for (BasicBlocks::iterator iter = basic_blocks_.begin(); 48 iter != basic_blocks_.end(); 49 ++iter) { 50 delete *iter; 51 } 52} 53 54void CodeGen::PrintProgram(const Sandbox::Program& program) { 55 for (Sandbox::Program::const_iterator iter = program.begin(); 56 iter != program.end(); 57 ++iter) { 58 int ip = (int)(iter - program.begin()); 59 fprintf(stderr, "%3d) ", ip); 60 switch (BPF_CLASS(iter->code)) { 61 case BPF_LD: 62 if (iter->code == BPF_LD+BPF_W+BPF_ABS) { 63 fprintf(stderr, "LOAD %d // ", (int)iter->k); 64 if (iter->k == offsetof(struct arch_seccomp_data, nr)) { 65 fprintf(stderr, "System call number\n"); 66 } else if (iter->k == offsetof(struct arch_seccomp_data, arch)) { 67 fprintf(stderr, "Architecture\n"); 68 } else if (iter->k == offsetof(struct arch_seccomp_data, 69 instruction_pointer)) { 70 fprintf(stderr, "Instruction pointer (LSB)\n"); 71 } else if (iter->k == offsetof(struct arch_seccomp_data, 72 instruction_pointer) + 4) { 73 fprintf(stderr, "Instruction pointer (MSB)\n"); 74 } else if (iter->k >= offsetof(struct arch_seccomp_data, args) && 75 iter->k < offsetof(struct arch_seccomp_data, args)+48 && 76 (iter->k-offsetof(struct arch_seccomp_data, args))%4 == 0) { 77 fprintf(stderr, "Argument %d (%cSB)\n", 78 (int)(iter->k-offsetof(struct arch_seccomp_data, args))/8, 79 (iter->k-offsetof(struct arch_seccomp_data, 80 args))%8 ? 'M' : 'L'); 81 } else { 82 fprintf(stderr, "???\n"); 83 } 84 } else { 85 fprintf(stderr, "LOAD ???\n"); 86 } 87 break; 88 case BPF_JMP: 89 if (BPF_OP(iter->code) == BPF_JA) { 90 fprintf(stderr, "JMP %d\n", ip + iter->k + 1); 91 } else { 92 fprintf(stderr, "if A %s 0x%x; then JMP %d else JMP %d\n", 93 BPF_OP(iter->code) == BPF_JSET ? "&" : 94 BPF_OP(iter->code) == BPF_JEQ ? "==" : 95 BPF_OP(iter->code) == BPF_JGE ? ">=" : 96 BPF_OP(iter->code) == BPF_JGT ? ">" : "???", 97 (int)iter->k, 98 ip + iter->jt + 1, ip + iter->jf + 1); 99 } 100 break; 101 case BPF_RET: 102 fprintf(stderr, "RET 0x%x // ", iter->k); 103 if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_TRAP) { 104 fprintf(stderr, "Trap #%d\n", iter->k & SECCOMP_RET_DATA); 105 } else if ((iter->k & SECCOMP_RET_ACTION) == SECCOMP_RET_ERRNO) { 106 fprintf(stderr, "errno = %d\n", iter->k & SECCOMP_RET_DATA); 107 } else if (iter->k == SECCOMP_RET_ALLOW) { 108 fprintf(stderr, "Allowed\n"); 109 } else { 110 fprintf(stderr, "???\n"); 111 } 112 break; 113 case BPF_ALU: 114 fprintf(stderr, BPF_OP(iter->code) == BPF_NEG 115 ? "A := -A\n" : "A := A %s 0x%x\n", 116 BPF_OP(iter->code) == BPF_ADD ? "+" : 117 BPF_OP(iter->code) == BPF_SUB ? "-" : 118 BPF_OP(iter->code) == BPF_MUL ? "*" : 119 BPF_OP(iter->code) == BPF_DIV ? "/" : 120 BPF_OP(iter->code) == BPF_MOD ? "%" : 121 BPF_OP(iter->code) == BPF_OR ? "|" : 122 BPF_OP(iter->code) == BPF_XOR ? "^" : 123 BPF_OP(iter->code) == BPF_AND ? "&" : 124 BPF_OP(iter->code) == BPF_LSH ? "<<" : 125 BPF_OP(iter->code) == BPF_RSH ? ">>" : "???", 126 (int)iter->k); 127 break; 128 default: 129 fprintf(stderr, "???\n"); 130 break; 131 } 132 } 133 return; 134} 135 136Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k, 137 Instruction *next) { 138 // We can handle non-jumping instructions and "always" jumps. Both of 139 // them are followed by exactly one "next" instruction. 140 // We allow callers to defer specifying "next", but then they must call 141 // "joinInstructions" later. 142 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_JA) { 143 SANDBOX_DIE("Must provide both \"true\" and \"false\" branch " 144 "for a BPF_JMP"); 145 } 146 if (next && BPF_CLASS(code) == BPF_RET) { 147 SANDBOX_DIE("Cannot append instructions after a return statement"); 148 } 149 if (BPF_CLASS(code) == BPF_JMP) { 150 // "Always" jumps use the "true" branch target, only. 151 Instruction *insn = new Instruction(code, 0, next, NULL); 152 instructions_.push_back(insn); 153 return insn; 154 } else { 155 // Non-jumping instructions do not use any of the branch targets. 156 Instruction *insn = new Instruction(code, k, next); 157 instructions_.push_back(insn); 158 return insn; 159 } 160} 161 162Instruction *CodeGen::MakeInstruction(uint16_t code, const ErrorCode& err) { 163 if (BPF_CLASS(code) != BPF_RET) { 164 SANDBOX_DIE("ErrorCodes can only be used in return expressions"); 165 } 166 if (err.error_type_ != ErrorCode::ET_SIMPLE && 167 err.error_type_ != ErrorCode::ET_TRAP) { 168 SANDBOX_DIE("ErrorCode is not suitable for returning from a BPF program"); 169 } 170 return MakeInstruction(code, err.err_); 171} 172 173Instruction *CodeGen::MakeInstruction(uint16_t code, uint32_t k, 174 Instruction *jt, Instruction *jf) { 175 // We can handle all conditional jumps. They are followed by both a 176 // "true" and a "false" branch. 177 if (BPF_CLASS(code) != BPF_JMP || BPF_OP(code) == BPF_JA) { 178 SANDBOX_DIE("Expected a BPF_JMP instruction"); 179 } 180 if (!jt && !jf) { 181 // We allow callers to defer specifying exactly one of the branch 182 // targets. It must then be set later by calling "JoinInstructions". 183 SANDBOX_DIE("Branches must jump to a valid instruction"); 184 } 185 Instruction *insn = new Instruction(code, k, jt, jf); 186 instructions_.push_back(insn); 187 return insn; 188} 189 190void CodeGen::JoinInstructions(Instruction *head, Instruction *tail) { 191 // Merge two instructions, or set the branch target for an "always" jump. 192 // This function should be called, if the caller didn't initially provide 193 // a value for "next" when creating the instruction. 194 if (BPF_CLASS(head->code) == BPF_JMP) { 195 if (BPF_OP(head->code) == BPF_JA) { 196 if (head->jt_ptr) { 197 SANDBOX_DIE("Cannot append instructions in the middle of a sequence"); 198 } 199 head->jt_ptr = tail; 200 } else { 201 if (!head->jt_ptr && head->jf_ptr) { 202 head->jt_ptr = tail; 203 } else if (!head->jf_ptr && head->jt_ptr) { 204 head->jf_ptr = tail; 205 } else { 206 SANDBOX_DIE("Cannot append instructions after a jump"); 207 } 208 } 209 } else if (BPF_CLASS(head->code) == BPF_RET) { 210 SANDBOX_DIE("Cannot append instructions after a return statement"); 211 } else if (head->next) { 212 SANDBOX_DIE("Cannot append instructions in the middle of a sequence"); 213 } else { 214 head->next = tail; 215 } 216 return; 217} 218 219void CodeGen::Traverse(Instruction *instruction, 220 void (*fnc)(Instruction *, void *), void *aux) { 221 std::set<Instruction *> visited; 222 TraverseRecursively(&visited, instruction); 223 for (std::set<Instruction *>::const_iterator iter = visited.begin(); 224 iter != visited.end(); 225 ++iter) { 226 fnc(*iter, aux); 227 } 228} 229 230void CodeGen::FindBranchTargets(const Instruction& instructions, 231 BranchTargets *branch_targets) { 232 // Follow all possible paths through the "instructions" graph and compute 233 // a list of branch targets. This will later be needed to compute the 234 // boundaries of basic blocks. 235 // We maintain a set of all instructions that we have previously seen. This 236 // set ultimately converges on all instructions in the program. 237 std::set<const Instruction *> seen_instructions; 238 Instructions stack; 239 for (const Instruction *insn = &instructions; insn; ) { 240 seen_instructions.insert(insn); 241 if (BPF_CLASS(insn->code) == BPF_JMP) { 242 // Found a jump. Increase count of incoming edges for each of the jump 243 // targets. 244 ++(*branch_targets)[insn->jt_ptr]; 245 if (BPF_OP(insn->code) != BPF_JA) { 246 ++(*branch_targets)[insn->jf_ptr]; 247 stack.push_back(const_cast<Instruction *>(insn)); 248 } 249 // Start a recursive decent for depth-first traversal. 250 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { 251 // We haven't seen the "true" branch yet. Traverse it now. We have 252 // already remembered the "false" branch on the stack and will 253 // traverse it later. 254 insn = insn->jt_ptr; 255 continue; 256 } else { 257 // Now try traversing the "false" branch. 258 insn = NULL; 259 } 260 } else { 261 // This is a non-jump instruction, just continue to the next instruction 262 // (if any). It's OK if "insn" becomes NULL when reaching a return 263 // instruction. 264 if (!insn->next != (BPF_CLASS(insn->code) == BPF_RET)) { 265 SANDBOX_DIE("Internal compiler error; return instruction must be at " 266 "the end of the BPF program"); 267 } 268 if (seen_instructions.find(insn->next) == seen_instructions.end()) { 269 insn = insn->next; 270 } else { 271 // We have seen this instruction before. That could happen if it is 272 // a branch target. No need to continue processing. 273 insn = NULL; 274 } 275 } 276 while (!insn && !stack.empty()) { 277 // We are done processing all the way to a leaf node, backtrack up the 278 // stack to any branches that we haven't processed yet. By definition, 279 // this has to be a "false" branch, as we always process the "true" 280 // branches right away. 281 insn = stack.back(); 282 stack.pop_back(); 283 if (seen_instructions.find(insn->jf_ptr) == seen_instructions.end()) { 284 // We haven't seen the "false" branch yet. So, that's where we'll 285 // go now. 286 insn = insn->jf_ptr; 287 } else { 288 // We have seen both the "true" and the "false" branch, continue 289 // up the stack. 290 if (seen_instructions.find(insn->jt_ptr) == seen_instructions.end()) { 291 SANDBOX_DIE("Internal compiler error; cannot find all " 292 "branch targets"); 293 } 294 insn = NULL; 295 } 296 } 297 } 298 return; 299} 300 301BasicBlock *CodeGen::MakeBasicBlock(Instruction *head, 302 Instruction *tail) { 303 // Iterate over all the instructions between "head" and "tail" and 304 // insert them into a new basic block. 305 BasicBlock *bb = new BasicBlock; 306 for (;; head = head->next) { 307 bb->instructions.push_back(head); 308 if (head == tail) { 309 break; 310 } 311 if (BPF_CLASS(head->code) == BPF_JMP) { 312 SANDBOX_DIE("Found a jump inside of a basic block"); 313 } 314 } 315 basic_blocks_.push_back(bb); 316 return bb; 317} 318 319void CodeGen::AddBasicBlock(Instruction *head, 320 Instruction *tail, 321 const BranchTargets& branch_targets, 322 TargetsToBlocks *basic_blocks, 323 BasicBlock **firstBlock) { 324 // Add a new basic block to "basic_blocks". Also set "firstBlock", if it 325 // has not been set before. 326 BranchTargets::const_iterator iter = branch_targets.find(head); 327 if ((iter == branch_targets.end()) != !*firstBlock || 328 !*firstBlock != basic_blocks->empty()) { 329 SANDBOX_DIE("Only the very first basic block should have no " 330 "incoming jumps"); 331 } 332 BasicBlock *bb = MakeBasicBlock(head, tail); 333 if (!*firstBlock) { 334 *firstBlock = bb; 335 } 336 (*basic_blocks)[head] = bb; 337 return; 338} 339 340BasicBlock *CodeGen::CutGraphIntoBasicBlocks( 341 Instruction *instructions, const BranchTargets& branch_targets, 342 TargetsToBlocks *basic_blocks) { 343 // Textbook implementation of a basic block generator. All basic blocks 344 // start with a branch target and end with either a return statement or 345 // a jump (or are followed by an instruction that forms the beginning of a 346 // new block). Both conditional and "always" jumps are supported. 347 BasicBlock *first_block = NULL; 348 std::set<const Instruction *> seen_instructions; 349 Instructions stack; 350 Instruction *tail = NULL; 351 Instruction *head = instructions; 352 for (Instruction *insn = head; insn; ) { 353 if (seen_instructions.find(insn) != seen_instructions.end()) { 354 // We somehow went in a circle. This should never be possible. Not even 355 // cyclic graphs are supposed to confuse us this much. 356 SANDBOX_DIE("Internal compiler error; cannot compute basic blocks"); 357 } 358 seen_instructions.insert(insn); 359 if (tail && branch_targets.find(insn) != branch_targets.end()) { 360 // We reached a branch target. Start a new basic block (this means, 361 // flushing the previous basic block first). 362 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); 363 head = insn; 364 } 365 if (BPF_CLASS(insn->code) == BPF_JMP) { 366 // We reached a jump instruction, this completes our current basic 367 // block. Flush it and continue by traversing both the true and the 368 // false branch of the jump. We need to maintain a stack to do so. 369 AddBasicBlock(head, insn, branch_targets, basic_blocks, &first_block); 370 if (BPF_OP(insn->code) != BPF_JA) { 371 stack.push_back(insn->jf_ptr); 372 } 373 insn = insn->jt_ptr; 374 375 // If we are jumping to an instruction that we have previously 376 // processed, we are done with this branch. Continue by backtracking 377 // up the stack. 378 while (seen_instructions.find(insn) != seen_instructions.end()) { 379 backtracking: 380 if (stack.empty()) { 381 // We successfully traversed all reachable instructions. 382 return first_block; 383 } else { 384 // Going up the stack. 385 insn = stack.back(); 386 stack.pop_back(); 387 } 388 } 389 // Starting a new basic block. 390 tail = NULL; 391 head = insn; 392 } else { 393 // We found a non-jumping instruction, append it to current basic 394 // block. 395 tail = insn; 396 insn = insn->next; 397 if (!insn) { 398 // We reached a return statement, flush the current basic block and 399 // backtrack up the stack. 400 AddBasicBlock(head, tail, branch_targets, basic_blocks, &first_block); 401 goto backtracking; 402 } 403 } 404 } 405 return first_block; 406} 407 408// We define a comparator that inspects the sequence of instructions in our 409// basic block and any blocks referenced by this block. This function can be 410// used in a "less" comparator for the purpose of storing pointers to basic 411// blocks in STL containers; this gives an easy option to use STL to find 412// shared tail sequences of basic blocks. 413static int PointerCompare(const BasicBlock *block1, const BasicBlock *block2, 414 const TargetsToBlocks& blocks) { 415 // Return <0, 0, or >0 depending on the ordering of "block1" and "block2". 416 // If we are looking at the exact same block, this is trivial and we don't 417 // need to do a full comparison. 418 if (block1 == block2) { 419 return 0; 420 } 421 422 // We compare the sequence of instructions in both basic blocks. 423 const Instructions& insns1 = block1->instructions; 424 const Instructions& insns2 = block2->instructions; 425 Instructions::const_iterator iter1 = insns1.begin(); 426 Instructions::const_iterator iter2 = insns2.begin(); 427 for (;; ++iter1, ++iter2) { 428 // If we have reached the end of the sequence of instructions in one or 429 // both basic blocks, we know the relative ordering between the two blocks 430 // and can return. 431 if (iter1 == insns1.end()) { 432 return iter2 == insns2.end() ? 0 : -1; 433 } else if (iter2 == insns2.end()) { 434 return 1; 435 } 436 437 // Compare the individual fields for both instructions. 438 const Instruction& insn1 = **iter1; 439 const Instruction& insn2 = **iter2; 440 if (insn1.code == insn2.code) { 441 if (insn1.k == insn2.k) { 442 // Only conditional jump instructions use the jt_ptr and jf_ptr 443 // fields. 444 if (BPF_CLASS(insn1.code) == BPF_JMP) { 445 if (BPF_OP(insn1.code) != BPF_JA) { 446 // Recursively compare the "true" and "false" branches. 447 // A well-formed BPF program can't have any cycles, so we know 448 // that our recursive algorithm will ultimately terminate. 449 // In the unlikely event that the programmer made a mistake and 450 // went out of the way to give us a cyclic program, we will crash 451 // with a stack overflow. We are OK with that. 452 int c = PointerCompare(blocks.find(insn1.jt_ptr)->second, 453 blocks.find(insn2.jt_ptr)->second, 454 blocks); 455 if (c == 0) { 456 c = PointerCompare(blocks.find(insn1.jf_ptr)->second, 457 blocks.find(insn2.jf_ptr)->second, 458 blocks); 459 if (c == 0) { 460 continue; 461 } else { 462 return c; 463 } 464 } else { 465 return c; 466 } 467 } else { 468 int c = PointerCompare(blocks.find(insn1.jt_ptr)->second, 469 blocks.find(insn2.jt_ptr)->second, 470 blocks); 471 if (c == 0) { 472 continue; 473 } else { 474 return c; 475 } 476 } 477 } else { 478 continue; 479 } 480 } else { 481 return insn1.k - insn2.k; 482 } 483 } else { 484 return insn1.code - insn2.code; 485 } 486 } 487} 488 489void CodeGen::MergeTails(TargetsToBlocks *blocks) { 490 // We enter all of our basic blocks into a set using the BasicBlock::Less() 491 // comparator. This naturally results in blocks with identical tails of 492 // instructions to map to the same entry in the set. Whenever we discover 493 // that a particular chain of instructions is already in the set, we merge 494 // the basic blocks and update the pointer in the "blocks" map. 495 // Returns the number of unique basic blocks. 496 // N.B. We don't merge instructions on a granularity that is finer than 497 // a basic block. In practice, this is sufficiently rare that we don't 498 // incur a big cost. 499 // Similarly, we currently don't merge anything other than tails. In 500 // the future, we might decide to revisit this decision and attempt to 501 // merge arbitrary sub-sequences of instructions. 502 BasicBlock::Less<TargetsToBlocks> less(*blocks, PointerCompare); 503 typedef std::set<BasicBlock *, BasicBlock::Less<TargetsToBlocks> > Set; 504 Set seen_basic_blocks(less); 505 for (TargetsToBlocks::iterator iter = blocks->begin(); 506 iter != blocks->end(); 507 ++iter) { 508 BasicBlock *bb = iter->second; 509 Set::const_iterator entry = seen_basic_blocks.find(bb); 510 if (entry == seen_basic_blocks.end()) { 511 // This is the first time we see this particular sequence of 512 // instructions. Enter the basic block into the set of known 513 // basic blocks. 514 seen_basic_blocks.insert(bb); 515 } else { 516 // We have previously seen another basic block that defines the same 517 // sequence of instructions. Merge the two blocks and update the 518 // pointer in the "blocks" map. 519 iter->second = *entry; 520 } 521 } 522} 523 524void CodeGen::ComputeIncomingBranches(BasicBlock *block, 525 const TargetsToBlocks& targets_to_blocks, 526 IncomingBranches *incoming_branches) { 527 // We increment the number of incoming branches each time we encounter a 528 // basic block. But we only traverse recursively the very first time we 529 // encounter a new block. This is necessary to make topological sorting 530 // work correctly. 531 if (++(*incoming_branches)[block] == 1) { 532 Instruction *last_insn = block->instructions.back(); 533 if (BPF_CLASS(last_insn->code) == BPF_JMP) { 534 ComputeIncomingBranches( 535 targets_to_blocks.find(last_insn->jt_ptr)->second, 536 targets_to_blocks, incoming_branches); 537 if (BPF_OP(last_insn->code) != BPF_JA) { 538 ComputeIncomingBranches( 539 targets_to_blocks.find(last_insn->jf_ptr)->second, 540 targets_to_blocks, incoming_branches); 541 } 542 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { 543 ComputeIncomingBranches(targets_to_blocks.find(last_insn->next)->second, 544 targets_to_blocks, incoming_branches); 545 } 546 } 547} 548 549void CodeGen::TopoSortBasicBlocks(BasicBlock *first_block, 550 const TargetsToBlocks& blocks, 551 BasicBlocks *basic_blocks) { 552 // Textbook implementation of a toposort. We keep looking for basic blocks 553 // that don't have any incoming branches (initially, this is just the 554 // "first_block") and add them to the topologically sorted list of 555 // "basic_blocks". As we do so, we remove outgoing branches. This potentially 556 // ends up making our descendants eligible for the sorted list. The 557 // sorting algorithm terminates when there are no more basic blocks that have 558 // no incoming branches. If we didn't move all blocks from the set of 559 // "unordered_blocks" to the sorted list of "basic_blocks", there must have 560 // been a cyclic dependency. This should never happen in a BPF program, as 561 // well-formed BPF programs only ever have forward branches. 562 IncomingBranches unordered_blocks; 563 ComputeIncomingBranches(first_block, blocks, &unordered_blocks); 564 565 std::set<BasicBlock *> heads; 566 for (;;) { 567 // Move block from "unordered_blocks" to "basic_blocks". 568 basic_blocks->push_back(first_block); 569 570 // Inspect last instruction in the basic block. This is typically either a 571 // jump or a return statement. But it could also be a "normal" instruction 572 // that is followed by a jump target. 573 Instruction *last_insn = first_block->instructions.back(); 574 if (BPF_CLASS(last_insn->code) == BPF_JMP) { 575 // Remove outgoing branches. This might end up moving our descendants 576 // into set of "head" nodes that no longer have any incoming branches. 577 TargetsToBlocks::const_iterator iter; 578 if (BPF_OP(last_insn->code) != BPF_JA) { 579 iter = blocks.find(last_insn->jf_ptr); 580 if (!--unordered_blocks[iter->second]) { 581 heads.insert(iter->second); 582 } 583 } 584 iter = blocks.find(last_insn->jt_ptr); 585 if (!--unordered_blocks[iter->second]) { 586 first_block = iter->second; 587 continue; 588 } 589 } else if (BPF_CLASS(last_insn->code) != BPF_RET) { 590 // We encountered an instruction that doesn't change code flow. Try to 591 // pick the next "first_block" from "last_insn->next", if possible. 592 TargetsToBlocks::const_iterator iter; 593 iter = blocks.find(last_insn->next); 594 if (!--unordered_blocks[iter->second]) { 595 first_block = iter->second; 596 continue; 597 } else { 598 // Our basic block is supposed to be followed by "last_insn->next", 599 // but dependencies prevent this from happening. Insert a BPF_JA 600 // instruction to correct the code flow. 601 Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, last_insn->next); 602 first_block->instructions.push_back(ja); 603 last_insn->next = ja; 604 } 605 } 606 if (heads.empty()) { 607 if (unordered_blocks.size() != basic_blocks->size()) { 608 SANDBOX_DIE("Internal compiler error; cyclic graph detected"); 609 } 610 return; 611 } 612 // Proceed by picking an arbitrary node from the set of basic blocks that 613 // do not have any incoming branches. 614 first_block = *heads.begin(); 615 heads.erase(heads.begin()); 616 } 617} 618 619void CodeGen::ComputeRelativeJumps(BasicBlocks *basic_blocks, 620 const TargetsToBlocks& targets_to_blocks) { 621 // While we previously used pointers in jt_ptr and jf_ptr to link jump 622 // instructions to their targets, we now convert these jumps to relative 623 // jumps that are suitable for loading the BPF program into the kernel. 624 int offset = 0; 625 626 // Since we just completed a toposort, all jump targets are guaranteed to 627 // go forward. This means, iterating over the basic blocks in reverse makes 628 // it trivial to compute the correct offsets. 629 BasicBlock *bb = NULL; 630 BasicBlock *last_bb = NULL; 631 for (BasicBlocks::reverse_iterator iter = basic_blocks->rbegin(); 632 iter != basic_blocks->rend(); 633 ++iter) { 634 last_bb = bb; 635 bb = *iter; 636 Instruction *insn = bb->instructions.back(); 637 if (BPF_CLASS(insn->code) == BPF_JMP) { 638 // Basic block ended in a jump instruction. We can now compute the 639 // appropriate offsets. 640 if (BPF_OP(insn->code) == BPF_JA) { 641 // "Always" jumps use the 32bit "k" field for the offset, instead 642 // of the 8bit "jt" and "jf" fields. 643 int jmp = 644 offset - targets_to_blocks.find(insn->jt_ptr)->second->offset; 645 insn->k = jmp; 646 insn->jt = insn->jf = 0; 647 } else { 648 // The offset computations for conditional jumps are just the same 649 // as for "always" jumps. 650 int jt = offset-targets_to_blocks.find(insn->jt_ptr)->second->offset; 651 int jf = offset-targets_to_blocks.find(insn->jf_ptr)->second->offset; 652 653 // There is an added complication, because conditional relative jumps 654 // can only jump at most 255 instructions forward. If we have to jump 655 // further, insert an extra "always" jump. 656 Instructions::size_type jmp = bb->instructions.size(); 657 if (jt > 255 || (jt == 255 && jf > 255)) { 658 Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jt_ptr); 659 bb->instructions.push_back(ja); 660 ja->k = jt; 661 ja->jt = ja->jf = 0; 662 663 // The newly inserted "always" jump, of course, requires us to adjust 664 // the jump targets in the original conditional jump. 665 jt = 0; 666 ++jf; 667 } 668 if (jf > 255) { 669 Instruction *ja = MakeInstruction(BPF_JMP+BPF_JA, 0, insn->jf_ptr); 670 bb->instructions.insert(bb->instructions.begin() + jmp, ja); 671 ja->k = jf; 672 ja->jt = ja->jf = 0; 673 674 // Again, we have to adjust the jump targets in the original 675 // conditional jump. 676 ++jt; 677 jf = 0; 678 } 679 680 // Now we can finally set the relative jump targets in the conditional 681 // jump instruction. Afterwards, we must no longer access the jt_ptr 682 // and jf_ptr fields. 683 insn->jt = jt; 684 insn->jf = jf; 685 } 686 } else if (BPF_CLASS(insn->code) != BPF_RET && 687 targets_to_blocks.find(insn->next)->second != last_bb) { 688 SANDBOX_DIE("Internal compiler error; invalid basic block encountered"); 689 } 690 691 // Proceed to next basic block. 692 offset += bb->instructions.size(); 693 bb->offset = offset; 694 } 695 return; 696} 697 698void CodeGen::ConcatenateBasicBlocks(const BasicBlocks& basic_blocks, 699 Sandbox::Program *program) { 700 // Our basic blocks have been sorted and relative jump offsets have been 701 // computed. The last remaining step is for all the instructions in our 702 // basic blocks to be concatenated into a BPF program. 703 program->clear(); 704 for (BasicBlocks::const_iterator bb_iter = basic_blocks.begin(); 705 bb_iter != basic_blocks.end(); 706 ++bb_iter) { 707 const BasicBlock& bb = **bb_iter; 708 for (Instructions::const_iterator insn_iter = bb.instructions.begin(); 709 insn_iter != bb.instructions.end(); 710 ++insn_iter) { 711 const Instruction& insn = **insn_iter; 712 program->push_back( 713 (struct sock_filter) { insn.code, insn.jt, insn.jf, insn.k }); 714 } 715 } 716 return; 717} 718 719void CodeGen::Compile(Instruction *instructions, Sandbox::Program *program) { 720 if (compiled_) { 721 SANDBOX_DIE("Cannot call Compile() multiple times. Create a new code " 722 "generator instead"); 723 } 724 compiled_ = true; 725 726 BranchTargets branch_targets; 727 FindBranchTargets(*instructions, &branch_targets); 728 TargetsToBlocks all_blocks; 729 BasicBlock *first_block = 730 CutGraphIntoBasicBlocks(instructions, branch_targets, &all_blocks); 731 MergeTails(&all_blocks); 732 BasicBlocks basic_blocks; 733 TopoSortBasicBlocks(first_block, all_blocks, &basic_blocks); 734 ComputeRelativeJumps(&basic_blocks, all_blocks); 735 ConcatenateBasicBlocks(basic_blocks, program); 736 return; 737} 738 739} // namespace 740