1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "sandbox/linux/bpf_dsl/codegen.h" 6 7#include <stddef.h> 8#include <stdint.h> 9 10#include <map> 11#include <utility> 12#include <vector> 13 14#include "base/macros.h" 15#include "base/md5.h" 16#include "base/strings/string_piece.h" 17#include "sandbox/linux/system_headers/linux_filter.h" 18#include "testing/gtest/include/gtest/gtest.h" 19 20namespace sandbox { 21namespace { 22 23// Hash provides an abstraction for building "hash trees" from BPF 24// control flow graphs, and efficiently identifying equivalent graphs. 25// 26// For simplicity, we use MD5, because base happens to provide a 27// convenient API for its use. However, any collision-resistant hash 28// should suffice. 29class Hash { 30 public: 31 static const Hash kZero; 32 33 Hash() : digest_() {} 34 35 Hash(uint16_t code, 36 uint32_t k, 37 const Hash& jt = kZero, 38 const Hash& jf = kZero) 39 : digest_() { 40 base::MD5Context ctx; 41 base::MD5Init(&ctx); 42 HashValue(&ctx, code); 43 HashValue(&ctx, k); 44 HashValue(&ctx, jt); 45 HashValue(&ctx, jf); 46 base::MD5Final(&digest_, &ctx); 47 } 48 49 Hash(const Hash& hash) = default; 50 Hash& operator=(const Hash& rhs) = default; 51 52 friend bool operator==(const Hash& lhs, const Hash& rhs) { 53 return lhs.Base16() == rhs.Base16(); 54 } 55 friend bool operator!=(const Hash& lhs, const Hash& rhs) { 56 return !(lhs == rhs); 57 } 58 59 private: 60 template <typename T> 61 void HashValue(base::MD5Context* ctx, const T& value) { 62 base::MD5Update(ctx, 63 base::StringPiece(reinterpret_cast<const char*>(&value), 64 sizeof(value))); 65 } 66 67 std::string Base16() const { 68 return base::MD5DigestToBase16(digest_); 69 } 70 71 base::MD5Digest digest_; 72}; 73 74const Hash Hash::kZero; 75 76// Sanity check that equality and inequality work on Hash as required. 77TEST(CodeGen, HashSanity) { 78 std::vector<Hash> hashes; 79 80 // Push a bunch of logically distinct hashes. 81 hashes.push_back(Hash::kZero); 82 for (int i = 0; i < 4; ++i) { 83 hashes.push_back(Hash(i & 1, i & 2)); 84 } 85 for (int i = 0; i < 16; ++i) { 86 hashes.push_back(Hash(i & 1, i & 2, Hash(i & 4, i & 8))); 87 } 88 for (int i = 0; i < 64; ++i) { 89 hashes.push_back( 90 Hash(i & 1, i & 2, Hash(i & 4, i & 8), Hash(i & 16, i & 32))); 91 } 92 93 for (const Hash& a : hashes) { 94 for (const Hash& b : hashes) { 95 // Hashes should equal themselves, but not equal all others. 96 if (&a == &b) { 97 EXPECT_EQ(a, b); 98 } else { 99 EXPECT_NE(a, b); 100 } 101 } 102 } 103} 104 105// ProgramTest provides a fixture for writing compiling sample 106// programs with CodeGen and verifying the linearized output matches 107// the input DAG. 108class ProgramTest : public ::testing::Test { 109 protected: 110 ProgramTest() : gen_(), node_hashes_() {} 111 112 // MakeInstruction calls CodeGen::MakeInstruction() and associated 113 // the returned address with a hash of the instruction. 114 CodeGen::Node MakeInstruction(uint16_t code, 115 uint32_t k, 116 CodeGen::Node jt = CodeGen::kNullNode, 117 CodeGen::Node jf = CodeGen::kNullNode) { 118 CodeGen::Node res = gen_.MakeInstruction(code, k, jt, jf); 119 EXPECT_NE(CodeGen::kNullNode, res); 120 121 Hash digest(code, k, Lookup(jt), Lookup(jf)); 122 auto it = node_hashes_.insert(std::make_pair(res, digest)); 123 EXPECT_EQ(digest, it.first->second); 124 125 return res; 126 } 127 128 // RunTest compiles the program and verifies that the output matches 129 // what is expected. It should be called at the end of each program 130 // test case. 131 void RunTest(CodeGen::Node head) { 132 // Compile the program 133 CodeGen::Program program = gen_.Compile(head); 134 135 // Walk the program backwards, and compute the hash for each instruction. 136 std::vector<Hash> prog_hashes(program.size()); 137 for (size_t i = program.size(); i > 0; --i) { 138 const sock_filter& insn = program.at(i - 1); 139 Hash& hash = prog_hashes.at(i - 1); 140 141 if (BPF_CLASS(insn.code) == BPF_JMP) { 142 if (BPF_OP(insn.code) == BPF_JA) { 143 // The compiler adds JA instructions as needed, so skip them. 144 hash = prog_hashes.at(i + insn.k); 145 } else { 146 hash = Hash(insn.code, insn.k, prog_hashes.at(i + insn.jt), 147 prog_hashes.at(i + insn.jf)); 148 } 149 } else if (BPF_CLASS(insn.code) == BPF_RET) { 150 hash = Hash(insn.code, insn.k); 151 } else { 152 hash = Hash(insn.code, insn.k, prog_hashes.at(i)); 153 } 154 } 155 156 EXPECT_EQ(Lookup(head), prog_hashes.at(0)); 157 } 158 159 private: 160 const Hash& Lookup(CodeGen::Node next) const { 161 if (next == CodeGen::kNullNode) { 162 return Hash::kZero; 163 } 164 auto it = node_hashes_.find(next); 165 if (it == node_hashes_.end()) { 166 ADD_FAILURE() << "No hash found for node " << next; 167 return Hash::kZero; 168 } 169 return it->second; 170 } 171 172 CodeGen gen_; 173 std::map<CodeGen::Node, Hash> node_hashes_; 174 175 DISALLOW_COPY_AND_ASSIGN(ProgramTest); 176}; 177 178TEST_F(ProgramTest, OneInstruction) { 179 // Create the most basic valid BPF program: 180 // RET 0 181 CodeGen::Node head = MakeInstruction(BPF_RET + BPF_K, 0); 182 RunTest(head); 183} 184 185TEST_F(ProgramTest, SimpleBranch) { 186 // Create a program with a single branch: 187 // JUMP if eq 42 then $0 else $1 188 // 0: RET 1 189 // 1: RET 0 190 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, 191 MakeInstruction(BPF_RET + BPF_K, 1), 192 MakeInstruction(BPF_RET + BPF_K, 0)); 193 RunTest(head); 194} 195 196TEST_F(ProgramTest, AtypicalBranch) { 197 // Create a program with a single branch: 198 // JUMP if eq 42 then $0 else $0 199 // 0: RET 0 200 201 CodeGen::Node ret = MakeInstruction(BPF_RET + BPF_K, 0); 202 CodeGen::Node head = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, ret, ret); 203 204 // N.B.: As the instructions in both sides of the branch are already 205 // the same object, we do not actually have any "mergeable" branches. 206 // This needs to be reflected in our choice of "flags". 207 RunTest(head); 208} 209 210TEST_F(ProgramTest, Complex) { 211 // Creates a basic BPF program that we'll use to test some of the code: 212 // JUMP if eq 42 the $0 else $1 (insn6) 213 // 0: LD 23 (insn5) 214 // 1: JUMP if eq 42 then $2 else $4 (insn4) 215 // 2: JUMP to $3 (insn2) 216 // 3: LD 42 (insn1) 217 // RET 42 (insn0) 218 // 4: LD 42 (insn3) 219 // RET 42 (insn3+) 220 CodeGen::Node insn0 = MakeInstruction(BPF_RET + BPF_K, 42); 221 CodeGen::Node insn1 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, insn0); 222 CodeGen::Node insn2 = insn1; // Implicit JUMP 223 224 // We explicitly duplicate instructions to test that they're merged. 225 CodeGen::Node insn3 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 42, 226 MakeInstruction(BPF_RET + BPF_K, 42)); 227 EXPECT_EQ(insn2, insn3); 228 229 CodeGen::Node insn4 = 230 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn2, insn3); 231 CodeGen::Node insn5 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 23, insn4); 232 233 // Force a basic block that ends in neither a jump instruction nor a return 234 // instruction. It only contains "insn5". This exercises one of the less 235 // common code paths in the topo-sort algorithm. 236 // This also gives us a diamond-shaped pattern in our graph, which stresses 237 // another aspect of the topo-sort algorithm (namely, the ability to 238 // correctly count the incoming branches for subtrees that are not disjunct). 239 CodeGen::Node insn6 = 240 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 42, insn5, insn4); 241 242 RunTest(insn6); 243} 244 245TEST_F(ProgramTest, ConfusingTails) { 246 // This simple program demonstrates https://crbug.com/351103/ 247 // The two "LOAD 0" instructions are blocks of their own. MergeTails() could 248 // be tempted to merge them since they are the same. However, they are 249 // not mergeable because they fall-through to non semantically equivalent 250 // blocks. 251 // Without the fix for this bug, this program should trigger the check in 252 // CompileAndCompare: the serialized graphs from the program and its compiled 253 // version will differ. 254 // 255 // 0) LOAD 1 // ??? 256 // 1) if A == 0x1; then JMP 2 else JMP 3 257 // 2) LOAD 0 // System call number 258 // 3) if A == 0x2; then JMP 4 else JMP 5 259 // 4) LOAD 0 // System call number 260 // 5) if A == 0x1; then JMP 6 else JMP 7 261 // 6) RET 0 262 // 7) RET 1 263 264 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); 265 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); 266 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); 267 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); 268 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); 269 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); 270 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); 271 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); 272 273 RunTest(i0); 274} 275 276TEST_F(ProgramTest, ConfusingTailsBasic) { 277 // Without the fix for https://crbug.com/351103/, (see 278 // SampleProgramConfusingTails()), this would generate a cyclic graph and 279 // crash as the two "LOAD 0" instructions would get merged. 280 // 281 // 0) LOAD 1 // ??? 282 // 1) if A == 0x1; then JMP 2 else JMP 3 283 // 2) LOAD 0 // System call number 284 // 3) if A == 0x2; then JMP 4 else JMP 5 285 // 4) LOAD 0 // System call number 286 // 5) RET 1 287 288 CodeGen::Node i5 = MakeInstruction(BPF_RET + BPF_K, 1); 289 CodeGen::Node i4 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i5); 290 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); 291 CodeGen::Node i2 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, i3); 292 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); 293 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); 294 295 RunTest(i0); 296} 297 298TEST_F(ProgramTest, ConfusingTailsMergeable) { 299 // This is similar to SampleProgramConfusingTails(), except that 300 // instructions 2 and 4 are now RET instructions. 301 // In PointerCompare(), this exercises the path where two blocks are of the 302 // same length and identical and the last instruction is a JMP or RET, so the 303 // following blocks don't need to be looked at and the blocks are mergeable. 304 // 305 // 0) LOAD 1 // ??? 306 // 1) if A == 0x1; then JMP 2 else JMP 3 307 // 2) RET 42 308 // 3) if A == 0x2; then JMP 4 else JMP 5 309 // 4) RET 42 310 // 5) if A == 0x1; then JMP 6 else JMP 7 311 // 6) RET 0 312 // 7) RET 1 313 314 CodeGen::Node i7 = MakeInstruction(BPF_RET + BPF_K, 1); 315 CodeGen::Node i6 = MakeInstruction(BPF_RET + BPF_K, 0); 316 CodeGen::Node i5 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i6, i7); 317 CodeGen::Node i4 = MakeInstruction(BPF_RET + BPF_K, 42); 318 CodeGen::Node i3 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 2, i4, i5); 319 CodeGen::Node i2 = MakeInstruction(BPF_RET + BPF_K, 42); 320 CodeGen::Node i1 = MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, i2, i3); 321 CodeGen::Node i0 = MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 1, i1); 322 323 RunTest(i0); 324} 325 326TEST_F(ProgramTest, InstructionFolding) { 327 // Check that simple instructions are folded as expected. 328 CodeGen::Node a = MakeInstruction(BPF_RET + BPF_K, 0); 329 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0)); 330 CodeGen::Node b = MakeInstruction(BPF_RET + BPF_K, 1); 331 EXPECT_EQ(a, MakeInstruction(BPF_RET + BPF_K, 0)); 332 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1)); 333 EXPECT_EQ(b, MakeInstruction(BPF_RET + BPF_K, 1)); 334 335 // Check that complex sequences are folded too. 336 CodeGen::Node c = 337 MakeInstruction(BPF_LD + BPF_W + BPF_ABS, 0, 338 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b)); 339 EXPECT_EQ(c, MakeInstruction( 340 BPF_LD + BPF_W + BPF_ABS, 0, 341 MakeInstruction(BPF_JMP + BPF_JSET + BPF_K, 0x100, a, b))); 342 343 RunTest(c); 344} 345 346TEST_F(ProgramTest, FarBranches) { 347 // BPF instructions use 8-bit fields for branch offsets, which means 348 // branch targets must be within 255 instructions of the branch 349 // instruction. CodeGen abstracts away this detail by inserting jump 350 // instructions as needed, which we test here by generating programs 351 // that should trigger any interesting boundary conditions. 352 353 // Populate with 260 initial instruction nodes. 354 std::vector<CodeGen::Node> nodes; 355 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0)); 356 for (size_t i = 1; i < 260; ++i) { 357 nodes.push_back( 358 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back())); 359 } 360 361 // Exhaustively test branch offsets near BPF's limits. 362 for (size_t jt = 250; jt < 260; ++jt) { 363 for (size_t jf = 250; jf < 260; ++jf) { 364 nodes.push_back(MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, 365 nodes.rbegin()[jt], nodes.rbegin()[jf])); 366 RunTest(nodes.back()); 367 } 368 } 369} 370 371TEST_F(ProgramTest, JumpReuse) { 372 // As a code size optimization, we try to reuse jumps when possible 373 // instead of emitting new ones. Here we make sure that optimization 374 // is working as intended. 375 // 376 // NOTE: To simplify testing, we rely on implementation details 377 // about what CodeGen::Node values indicate (i.e., vector indices), 378 // but CodeGen users should treat them as opaque values. 379 380 // Populate with 260 initial instruction nodes. 381 std::vector<CodeGen::Node> nodes; 382 nodes.push_back(MakeInstruction(BPF_RET + BPF_K, 0)); 383 for (size_t i = 1; i < 260; ++i) { 384 nodes.push_back( 385 MakeInstruction(BPF_ALU + BPF_ADD + BPF_K, i, nodes.back())); 386 } 387 388 // Branching to nodes[0] and nodes[1] should require 3 new 389 // instructions: two far jumps plus the branch itself. 390 CodeGen::Node one = 391 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 0, nodes[0], nodes[1]); 392 EXPECT_EQ(nodes.back() + 3, one); // XXX: Implementation detail! 393 RunTest(one); 394 395 // Branching again to the same target nodes should require only one 396 // new instruction, as we can reuse the previous branch's jumps. 397 CodeGen::Node two = 398 MakeInstruction(BPF_JMP + BPF_JEQ + BPF_K, 1, nodes[0], nodes[1]); 399 EXPECT_EQ(one + 1, two); // XXX: Implementation detail! 400 RunTest(two); 401} 402 403} // namespace 404} // namespace sandbox 405