1/* 2 * Linux Socket Filter - Kernel level socket filtering 3 * 4 * Author: 5 * Jay Schulist <jschlst@samba.org> 6 * 7 * Based on the design of: 8 * - The Berkeley Packet Filter 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Andi Kleen - Fix a few bad bugs and races. 16 * Kris Katterjohn - Added many additional checks in sk_chk_filter() 17 */ 18 19#include <linux/module.h> 20#include <linux/types.h> 21#include <linux/mm.h> 22#include <linux/fcntl.h> 23#include <linux/socket.h> 24#include <linux/in.h> 25#include <linux/inet.h> 26#include <linux/netdevice.h> 27#include <linux/if_packet.h> 28#include <linux/gfp.h> 29#include <net/ip.h> 30#include <net/protocol.h> 31#include <net/netlink.h> 32#include <linux/skbuff.h> 33#include <net/sock.h> 34#include <linux/errno.h> 35#include <linux/timer.h> 36#include <asm/uaccess.h> 37#include <asm/unaligned.h> 38#include <linux/filter.h> 39#include <linux/reciprocal_div.h> 40#include <linux/ratelimit.h> 41#include <linux/seccomp.h> 42#include <linux/if_vlan.h> 43 44/* No hurry in this branch 45 * 46 * Exported for the bpf jit load helper. 47 */ 48void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size) 49{ 50 u8 *ptr = NULL; 51 52 if (k >= SKF_NET_OFF) 53 ptr = skb_network_header(skb) + k - SKF_NET_OFF; 54 else if (k >= SKF_LL_OFF) 55 ptr = skb_mac_header(skb) + k - SKF_LL_OFF; 56 57 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) 58 return ptr; 59 return NULL; 60} 61 62static inline void *load_pointer(const struct sk_buff *skb, int k, 63 unsigned int size, void *buffer) 64{ 65 if (k >= 0) 66 return skb_header_pointer(skb, k, size, buffer); 67 return bpf_internal_load_pointer_neg_helper(skb, k, size); 68} 69 70/** 71 * sk_filter - run a packet through a socket filter 72 * @sk: sock associated with &sk_buff 73 * @skb: buffer to filter 74 * 75 * Run the filter code and then cut skb->data to correct size returned by 76 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller 77 * than pkt_len we keep whole skb->data. This is the socket level 78 * wrapper to sk_run_filter. It returns 0 if the packet should 79 * be accepted or -EPERM if the packet should be tossed. 80 * 81 */ 82int sk_filter(struct sock *sk, struct sk_buff *skb) 83{ 84 int err; 85 struct sk_filter *filter; 86 87 /* 88 * If the skb was allocated from pfmemalloc reserves, only 89 * allow SOCK_MEMALLOC sockets to use it as this socket is 90 * helping free memory 91 */ 92 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) 93 return -ENOMEM; 94 95 err = security_sock_rcv_skb(sk, skb); 96 if (err) 97 return err; 98 99 rcu_read_lock(); 100 filter = rcu_dereference(sk->sk_filter); 101 if (filter) { 102 unsigned int pkt_len = SK_RUN_FILTER(filter, skb); 103 104 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 105 } 106 rcu_read_unlock(); 107 108 return err; 109} 110EXPORT_SYMBOL(sk_filter); 111 112/** 113 * sk_run_filter - run a filter on a socket 114 * @skb: buffer to run the filter on 115 * @fentry: filter to apply 116 * 117 * Decode and apply filter instructions to the skb->data. 118 * Return length to keep, 0 for none. @skb is the data we are 119 * filtering, @filter is the array of filter instructions. 120 * Because all jumps are guaranteed to be before last instruction, 121 * and last instruction guaranteed to be a RET, we dont need to check 122 * flen. (We used to pass to this function the length of filter) 123 */ 124unsigned int sk_run_filter(const struct sk_buff *skb, 125 const struct sock_filter *fentry) 126{ 127 void *ptr; 128 u32 A = 0; /* Accumulator */ 129 u32 X = 0; /* Index Register */ 130 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ 131 u32 tmp; 132 int k; 133 134 /* 135 * Process array of filter instructions. 136 */ 137 for (;; fentry++) { 138#if defined(CONFIG_X86_32) 139#define K (fentry->k) 140#else 141 const u32 K = fentry->k; 142#endif 143 144 switch (fentry->code) { 145 case BPF_S_ALU_ADD_X: 146 A += X; 147 continue; 148 case BPF_S_ALU_ADD_K: 149 A += K; 150 continue; 151 case BPF_S_ALU_SUB_X: 152 A -= X; 153 continue; 154 case BPF_S_ALU_SUB_K: 155 A -= K; 156 continue; 157 case BPF_S_ALU_MUL_X: 158 A *= X; 159 continue; 160 case BPF_S_ALU_MUL_K: 161 A *= K; 162 continue; 163 case BPF_S_ALU_DIV_X: 164 if (X == 0) 165 return 0; 166 A /= X; 167 continue; 168 case BPF_S_ALU_DIV_K: 169 A = reciprocal_divide(A, K); 170 continue; 171 case BPF_S_ALU_MOD_X: 172 if (X == 0) 173 return 0; 174 A %= X; 175 continue; 176 case BPF_S_ALU_MOD_K: 177 A %= K; 178 continue; 179 case BPF_S_ALU_AND_X: 180 A &= X; 181 continue; 182 case BPF_S_ALU_AND_K: 183 A &= K; 184 continue; 185 case BPF_S_ALU_OR_X: 186 A |= X; 187 continue; 188 case BPF_S_ALU_OR_K: 189 A |= K; 190 continue; 191 case BPF_S_ANC_ALU_XOR_X: 192 case BPF_S_ALU_XOR_X: 193 A ^= X; 194 continue; 195 case BPF_S_ALU_XOR_K: 196 A ^= K; 197 continue; 198 case BPF_S_ALU_LSH_X: 199 A <<= X; 200 continue; 201 case BPF_S_ALU_LSH_K: 202 A <<= K; 203 continue; 204 case BPF_S_ALU_RSH_X: 205 A >>= X; 206 continue; 207 case BPF_S_ALU_RSH_K: 208 A >>= K; 209 continue; 210 case BPF_S_ALU_NEG: 211 A = -A; 212 continue; 213 case BPF_S_JMP_JA: 214 fentry += K; 215 continue; 216 case BPF_S_JMP_JGT_K: 217 fentry += (A > K) ? fentry->jt : fentry->jf; 218 continue; 219 case BPF_S_JMP_JGE_K: 220 fentry += (A >= K) ? fentry->jt : fentry->jf; 221 continue; 222 case BPF_S_JMP_JEQ_K: 223 fentry += (A == K) ? fentry->jt : fentry->jf; 224 continue; 225 case BPF_S_JMP_JSET_K: 226 fentry += (A & K) ? fentry->jt : fentry->jf; 227 continue; 228 case BPF_S_JMP_JGT_X: 229 fentry += (A > X) ? fentry->jt : fentry->jf; 230 continue; 231 case BPF_S_JMP_JGE_X: 232 fentry += (A >= X) ? fentry->jt : fentry->jf; 233 continue; 234 case BPF_S_JMP_JEQ_X: 235 fentry += (A == X) ? fentry->jt : fentry->jf; 236 continue; 237 case BPF_S_JMP_JSET_X: 238 fentry += (A & X) ? fentry->jt : fentry->jf; 239 continue; 240 case BPF_S_LD_W_ABS: 241 k = K; 242load_w: 243 ptr = load_pointer(skb, k, 4, &tmp); 244 if (ptr != NULL) { 245 A = get_unaligned_be32(ptr); 246 continue; 247 } 248 return 0; 249 case BPF_S_LD_H_ABS: 250 k = K; 251load_h: 252 ptr = load_pointer(skb, k, 2, &tmp); 253 if (ptr != NULL) { 254 A = get_unaligned_be16(ptr); 255 continue; 256 } 257 return 0; 258 case BPF_S_LD_B_ABS: 259 k = K; 260load_b: 261 ptr = load_pointer(skb, k, 1, &tmp); 262 if (ptr != NULL) { 263 A = *(u8 *)ptr; 264 continue; 265 } 266 return 0; 267 case BPF_S_LD_W_LEN: 268 A = skb->len; 269 continue; 270 case BPF_S_LDX_W_LEN: 271 X = skb->len; 272 continue; 273 case BPF_S_LD_W_IND: 274 k = X + K; 275 goto load_w; 276 case BPF_S_LD_H_IND: 277 k = X + K; 278 goto load_h; 279 case BPF_S_LD_B_IND: 280 k = X + K; 281 goto load_b; 282 case BPF_S_LDX_B_MSH: 283 ptr = load_pointer(skb, K, 1, &tmp); 284 if (ptr != NULL) { 285 X = (*(u8 *)ptr & 0xf) << 2; 286 continue; 287 } 288 return 0; 289 case BPF_S_LD_IMM: 290 A = K; 291 continue; 292 case BPF_S_LDX_IMM: 293 X = K; 294 continue; 295 case BPF_S_LD_MEM: 296 A = mem[K]; 297 continue; 298 case BPF_S_LDX_MEM: 299 X = mem[K]; 300 continue; 301 case BPF_S_MISC_TAX: 302 X = A; 303 continue; 304 case BPF_S_MISC_TXA: 305 A = X; 306 continue; 307 case BPF_S_RET_K: 308 return K; 309 case BPF_S_RET_A: 310 return A; 311 case BPF_S_ST: 312 mem[K] = A; 313 continue; 314 case BPF_S_STX: 315 mem[K] = X; 316 continue; 317 case BPF_S_ANC_PROTOCOL: 318 A = ntohs(skb->protocol); 319 continue; 320 case BPF_S_ANC_PKTTYPE: 321 A = skb->pkt_type; 322 continue; 323 case BPF_S_ANC_IFINDEX: 324 if (!skb->dev) 325 return 0; 326 A = skb->dev->ifindex; 327 continue; 328 case BPF_S_ANC_MARK: 329 A = skb->mark; 330 continue; 331 case BPF_S_ANC_QUEUE: 332 A = skb->queue_mapping; 333 continue; 334 case BPF_S_ANC_HATYPE: 335 if (!skb->dev) 336 return 0; 337 A = skb->dev->type; 338 continue; 339 case BPF_S_ANC_RXHASH: 340 A = skb->rxhash; 341 continue; 342 case BPF_S_ANC_CPU: 343 A = raw_smp_processor_id(); 344 continue; 345 case BPF_S_ANC_VLAN_TAG: 346 A = vlan_tx_tag_get(skb); 347 continue; 348 case BPF_S_ANC_VLAN_TAG_PRESENT: 349 A = !!vlan_tx_tag_present(skb); 350 continue; 351 case BPF_S_ANC_PAY_OFFSET: 352 A = __skb_get_poff(skb); 353 continue; 354 case BPF_S_ANC_NLATTR: { 355 struct nlattr *nla; 356 357 if (skb_is_nonlinear(skb)) 358 return 0; 359 if (A > skb->len - sizeof(struct nlattr)) 360 return 0; 361 362 nla = nla_find((struct nlattr *)&skb->data[A], 363 skb->len - A, X); 364 if (nla) 365 A = (void *)nla - (void *)skb->data; 366 else 367 A = 0; 368 continue; 369 } 370 case BPF_S_ANC_NLATTR_NEST: { 371 struct nlattr *nla; 372 373 if (skb_is_nonlinear(skb)) 374 return 0; 375 if (A > skb->len - sizeof(struct nlattr)) 376 return 0; 377 378 nla = (struct nlattr *)&skb->data[A]; 379 if (nla->nla_len > A - skb->len) 380 return 0; 381 382 nla = nla_find_nested(nla, X); 383 if (nla) 384 A = (void *)nla - (void *)skb->data; 385 else 386 A = 0; 387 continue; 388 } 389#ifdef CONFIG_SECCOMP_FILTER 390 case BPF_S_ANC_SECCOMP_LD_W: 391 A = seccomp_bpf_load(fentry->k); 392 continue; 393#endif 394 default: 395 WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n", 396 fentry->code, fentry->jt, 397 fentry->jf, fentry->k); 398 return 0; 399 } 400 } 401 402 return 0; 403} 404EXPORT_SYMBOL(sk_run_filter); 405 406/* 407 * Security : 408 * A BPF program is able to use 16 cells of memory to store intermediate 409 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()) 410 * As we dont want to clear mem[] array for each packet going through 411 * sk_run_filter(), we check that filter loaded by user never try to read 412 * a cell if not previously written, and we check all branches to be sure 413 * a malicious user doesn't try to abuse us. 414 */ 415static int check_load_and_stores(struct sock_filter *filter, int flen) 416{ 417 u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ 418 int pc, ret = 0; 419 420 BUILD_BUG_ON(BPF_MEMWORDS > 16); 421 masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); 422 if (!masks) 423 return -ENOMEM; 424 memset(masks, 0xff, flen * sizeof(*masks)); 425 426 for (pc = 0; pc < flen; pc++) { 427 memvalid &= masks[pc]; 428 429 switch (filter[pc].code) { 430 case BPF_S_ST: 431 case BPF_S_STX: 432 memvalid |= (1 << filter[pc].k); 433 break; 434 case BPF_S_LD_MEM: 435 case BPF_S_LDX_MEM: 436 if (!(memvalid & (1 << filter[pc].k))) { 437 ret = -EINVAL; 438 goto error; 439 } 440 break; 441 case BPF_S_JMP_JA: 442 /* a jump must set masks on target */ 443 masks[pc + 1 + filter[pc].k] &= memvalid; 444 memvalid = ~0; 445 break; 446 case BPF_S_JMP_JEQ_K: 447 case BPF_S_JMP_JEQ_X: 448 case BPF_S_JMP_JGE_K: 449 case BPF_S_JMP_JGE_X: 450 case BPF_S_JMP_JGT_K: 451 case BPF_S_JMP_JGT_X: 452 case BPF_S_JMP_JSET_X: 453 case BPF_S_JMP_JSET_K: 454 /* a jump must set masks on targets */ 455 masks[pc + 1 + filter[pc].jt] &= memvalid; 456 masks[pc + 1 + filter[pc].jf] &= memvalid; 457 memvalid = ~0; 458 break; 459 } 460 } 461error: 462 kfree(masks); 463 return ret; 464} 465 466/** 467 * sk_chk_filter - verify socket filter code 468 * @filter: filter to verify 469 * @flen: length of filter 470 * 471 * Check the user's filter code. If we let some ugly 472 * filter code slip through kaboom! The filter must contain 473 * no references or jumps that are out of range, no illegal 474 * instructions, and must end with a RET instruction. 475 * 476 * All jumps are forward as they are not signed. 477 * 478 * Returns 0 if the rule set is legal or -EINVAL if not. 479 */ 480int sk_chk_filter(struct sock_filter *filter, unsigned int flen) 481{ 482 /* 483 * Valid instructions are initialized to non-0. 484 * Invalid instructions are initialized to 0. 485 */ 486 static const u8 codes[] = { 487 [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K, 488 [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X, 489 [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K, 490 [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X, 491 [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, 492 [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, 493 [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, 494 [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K, 495 [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X, 496 [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, 497 [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, 498 [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, 499 [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, 500 [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K, 501 [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X, 502 [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, 503 [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, 504 [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, 505 [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X, 506 [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG, 507 [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS, 508 [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS, 509 [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS, 510 [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN, 511 [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND, 512 [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND, 513 [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND, 514 [BPF_LD|BPF_IMM] = BPF_S_LD_IMM, 515 [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN, 516 [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH, 517 [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM, 518 [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX, 519 [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA, 520 [BPF_RET|BPF_K] = BPF_S_RET_K, 521 [BPF_RET|BPF_A] = BPF_S_RET_A, 522 [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K, 523 [BPF_LD|BPF_MEM] = BPF_S_LD_MEM, 524 [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM, 525 [BPF_ST] = BPF_S_ST, 526 [BPF_STX] = BPF_S_STX, 527 [BPF_JMP|BPF_JA] = BPF_S_JMP_JA, 528 [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K, 529 [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X, 530 [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K, 531 [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X, 532 [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K, 533 [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X, 534 [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K, 535 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, 536 }; 537 int pc; 538 bool anc_found; 539 540 if (flen == 0 || flen > BPF_MAXINSNS) 541 return -EINVAL; 542 543 /* check the filter code now */ 544 for (pc = 0; pc < flen; pc++) { 545 struct sock_filter *ftest = &filter[pc]; 546 u16 code = ftest->code; 547 548 if (code >= ARRAY_SIZE(codes)) 549 return -EINVAL; 550 code = codes[code]; 551 if (!code) 552 return -EINVAL; 553 /* Some instructions need special checks */ 554 switch (code) { 555 case BPF_S_ALU_DIV_K: 556 /* check for division by zero */ 557 if (ftest->k == 0) 558 return -EINVAL; 559 ftest->k = reciprocal_value(ftest->k); 560 break; 561 case BPF_S_ALU_MOD_K: 562 /* check for division by zero */ 563 if (ftest->k == 0) 564 return -EINVAL; 565 break; 566 case BPF_S_LD_MEM: 567 case BPF_S_LDX_MEM: 568 case BPF_S_ST: 569 case BPF_S_STX: 570 /* check for invalid memory addresses */ 571 if (ftest->k >= BPF_MEMWORDS) 572 return -EINVAL; 573 break; 574 case BPF_S_JMP_JA: 575 /* 576 * Note, the large ftest->k might cause loops. 577 * Compare this with conditional jumps below, 578 * where offsets are limited. --ANK (981016) 579 */ 580 if (ftest->k >= (unsigned int)(flen-pc-1)) 581 return -EINVAL; 582 break; 583 case BPF_S_JMP_JEQ_K: 584 case BPF_S_JMP_JEQ_X: 585 case BPF_S_JMP_JGE_K: 586 case BPF_S_JMP_JGE_X: 587 case BPF_S_JMP_JGT_K: 588 case BPF_S_JMP_JGT_X: 589 case BPF_S_JMP_JSET_X: 590 case BPF_S_JMP_JSET_K: 591 /* for conditionals both must be safe */ 592 if (pc + ftest->jt + 1 >= flen || 593 pc + ftest->jf + 1 >= flen) 594 return -EINVAL; 595 break; 596 case BPF_S_LD_W_ABS: 597 case BPF_S_LD_H_ABS: 598 case BPF_S_LD_B_ABS: 599 anc_found = false; 600#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ 601 code = BPF_S_ANC_##CODE; \ 602 anc_found = true; \ 603 break 604 switch (ftest->k) { 605 ANCILLARY(PROTOCOL); 606 ANCILLARY(PKTTYPE); 607 ANCILLARY(IFINDEX); 608 ANCILLARY(NLATTR); 609 ANCILLARY(NLATTR_NEST); 610 ANCILLARY(MARK); 611 ANCILLARY(QUEUE); 612 ANCILLARY(HATYPE); 613 ANCILLARY(RXHASH); 614 ANCILLARY(CPU); 615 ANCILLARY(ALU_XOR_X); 616 ANCILLARY(VLAN_TAG); 617 ANCILLARY(VLAN_TAG_PRESENT); 618 ANCILLARY(PAY_OFFSET); 619 } 620 621 /* ancillary operation unknown or unsupported */ 622 if (anc_found == false && ftest->k >= SKF_AD_OFF) 623 return -EINVAL; 624 } 625 ftest->code = code; 626 } 627 628 /* last instruction must be a RET code */ 629 switch (filter[flen - 1].code) { 630 case BPF_S_RET_K: 631 case BPF_S_RET_A: 632 return check_load_and_stores(filter, flen); 633 } 634 return -EINVAL; 635} 636EXPORT_SYMBOL(sk_chk_filter); 637 638/** 639 * sk_filter_release_rcu - Release a socket filter by rcu_head 640 * @rcu: rcu_head that contains the sk_filter to free 641 */ 642void sk_filter_release_rcu(struct rcu_head *rcu) 643{ 644 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 645 646 bpf_jit_free(fp); 647 kfree(fp); 648} 649EXPORT_SYMBOL(sk_filter_release_rcu); 650 651static int __sk_prepare_filter(struct sk_filter *fp) 652{ 653 int err; 654 655 fp->bpf_func = sk_run_filter; 656 657 err = sk_chk_filter(fp->insns, fp->len); 658 if (err) 659 return err; 660 661 bpf_jit_compile(fp); 662 return 0; 663} 664 665/** 666 * sk_unattached_filter_create - create an unattached filter 667 * @fprog: the filter program 668 * @pfp: the unattached filter that is created 669 * 670 * Create a filter independent of any socket. We first run some 671 * sanity checks on it to make sure it does not explode on us later. 672 * If an error occurs or there is insufficient memory for the filter 673 * a negative errno code is returned. On success the return is zero. 674 */ 675int sk_unattached_filter_create(struct sk_filter **pfp, 676 struct sock_fprog *fprog) 677{ 678 struct sk_filter *fp; 679 unsigned int fsize = sizeof(struct sock_filter) * fprog->len; 680 int err; 681 682 /* Make sure new filter is there and in the right amounts. */ 683 if (fprog->filter == NULL) 684 return -EINVAL; 685 686 fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL); 687 if (!fp) 688 return -ENOMEM; 689 memcpy(fp->insns, fprog->filter, fsize); 690 691 atomic_set(&fp->refcnt, 1); 692 fp->len = fprog->len; 693 694 err = __sk_prepare_filter(fp); 695 if (err) 696 goto free_mem; 697 698 *pfp = fp; 699 return 0; 700free_mem: 701 kfree(fp); 702 return err; 703} 704EXPORT_SYMBOL_GPL(sk_unattached_filter_create); 705 706void sk_unattached_filter_destroy(struct sk_filter *fp) 707{ 708 sk_filter_release(fp); 709} 710EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy); 711 712/** 713 * sk_attach_filter - attach a socket filter 714 * @fprog: the filter program 715 * @sk: the socket to use 716 * 717 * Attach the user's filter code. We first run some sanity checks on 718 * it to make sure it does not explode on us later. If an error 719 * occurs or there is insufficient memory for the filter a negative 720 * errno code is returned. On success the return is zero. 721 */ 722int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) 723{ 724 struct sk_filter *fp, *old_fp; 725 unsigned int fsize = sizeof(struct sock_filter) * fprog->len; 726 int err; 727 728 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 729 return -EPERM; 730 731 /* Make sure new filter is there and in the right amounts. */ 732 if (fprog->filter == NULL) 733 return -EINVAL; 734 735 fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); 736 if (!fp) 737 return -ENOMEM; 738 if (copy_from_user(fp->insns, fprog->filter, fsize)) { 739 sock_kfree_s(sk, fp, fsize+sizeof(*fp)); 740 return -EFAULT; 741 } 742 743 atomic_set(&fp->refcnt, 1); 744 fp->len = fprog->len; 745 746 err = __sk_prepare_filter(fp); 747 if (err) { 748 sk_filter_uncharge(sk, fp); 749 return err; 750 } 751 752 old_fp = rcu_dereference_protected(sk->sk_filter, 753 sock_owned_by_user(sk)); 754 rcu_assign_pointer(sk->sk_filter, fp); 755 756 if (old_fp) 757 sk_filter_uncharge(sk, old_fp); 758 return 0; 759} 760EXPORT_SYMBOL_GPL(sk_attach_filter); 761 762int sk_detach_filter(struct sock *sk) 763{ 764 int ret = -ENOENT; 765 struct sk_filter *filter; 766 767 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 768 return -EPERM; 769 770 filter = rcu_dereference_protected(sk->sk_filter, 771 sock_owned_by_user(sk)); 772 if (filter) { 773 RCU_INIT_POINTER(sk->sk_filter, NULL); 774 sk_filter_uncharge(sk, filter); 775 ret = 0; 776 } 777 return ret; 778} 779EXPORT_SYMBOL_GPL(sk_detach_filter); 780 781void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) 782{ 783 static const u16 decodes[] = { 784 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, 785 [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, 786 [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, 787 [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, 788 [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, 789 [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, 790 [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, 791 [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, 792 [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, 793 [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, 794 [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, 795 [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, 796 [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, 797 [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, 798 [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, 799 [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, 800 [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, 801 [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, 802 [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, 803 [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, 804 [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, 805 [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, 806 [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, 807 [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, 808 [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, 809 [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, 810 [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, 811 [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, 812 [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, 813 [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, 814 [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, 815 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, 816 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, 817 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, 818 [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, 819 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, 820 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, 821 [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, 822 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, 823 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, 824 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, 825 [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, 826 [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, 827 [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, 828 [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, 829 [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, 830 [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, 831 [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, 832 [BPF_S_RET_K] = BPF_RET|BPF_K, 833 [BPF_S_RET_A] = BPF_RET|BPF_A, 834 [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, 835 [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, 836 [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, 837 [BPF_S_ST] = BPF_ST, 838 [BPF_S_STX] = BPF_STX, 839 [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, 840 [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, 841 [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, 842 [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, 843 [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, 844 [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, 845 [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, 846 [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, 847 [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, 848 }; 849 u16 code; 850 851 code = filt->code; 852 853 to->code = decodes[code]; 854 to->jt = filt->jt; 855 to->jf = filt->jf; 856 857 if (code == BPF_S_ALU_DIV_K) { 858 /* 859 * When loaded this rule user gave us X, which was 860 * translated into R = r(X). Now we calculate the 861 * RR = r(R) and report it back. If next time this 862 * value is loaded and RRR = r(RR) is calculated 863 * then the R == RRR will be true. 864 * 865 * One exception. X == 1 translates into R == 0 and 866 * we can't calculate RR out of it with r(). 867 */ 868 869 if (filt->k == 0) 870 to->k = 1; 871 else 872 to->k = reciprocal_value(filt->k); 873 874 BUG_ON(reciprocal_value(to->k) != filt->k); 875 } else 876 to->k = filt->k; 877} 878 879int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) 880{ 881 struct sk_filter *filter; 882 int i, ret; 883 884 lock_sock(sk); 885 filter = rcu_dereference_protected(sk->sk_filter, 886 sock_owned_by_user(sk)); 887 ret = 0; 888 if (!filter) 889 goto out; 890 ret = filter->len; 891 if (!len) 892 goto out; 893 ret = -EINVAL; 894 if (len < filter->len) 895 goto out; 896 897 ret = -EFAULT; 898 for (i = 0; i < filter->len; i++) { 899 struct sock_filter fb; 900 901 sk_decode_filter(&filter->insns[i], &fb); 902 if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) 903 goto out; 904 } 905 906 ret = filter->len; 907out: 908 release_sock(sk); 909 return ret; 910} 911