filter.c revision d59577b6ffd313d0ab3be39cb1ab47e29bdc9182
1/* 2 * Linux Socket Filter - Kernel level socket filtering 3 * 4 * Author: 5 * Jay Schulist <jschlst@samba.org> 6 * 7 * Based on the design of: 8 * - The Berkeley Packet Filter 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 * 15 * Andi Kleen - Fix a few bad bugs and races. 16 * Kris Katterjohn - Added many additional checks in sk_chk_filter() 17 */ 18 19#include <linux/module.h> 20#include <linux/types.h> 21#include <linux/mm.h> 22#include <linux/fcntl.h> 23#include <linux/socket.h> 24#include <linux/in.h> 25#include <linux/inet.h> 26#include <linux/netdevice.h> 27#include <linux/if_packet.h> 28#include <linux/gfp.h> 29#include <net/ip.h> 30#include <net/protocol.h> 31#include <net/netlink.h> 32#include <linux/skbuff.h> 33#include <net/sock.h> 34#include <linux/errno.h> 35#include <linux/timer.h> 36#include <asm/uaccess.h> 37#include <asm/unaligned.h> 38#include <linux/filter.h> 39#include <linux/reciprocal_div.h> 40#include <linux/ratelimit.h> 41#include <linux/seccomp.h> 42#include <linux/if_vlan.h> 43 44/* No hurry in this branch 45 * 46 * Exported for the bpf jit load helper. 47 */ 48void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size) 49{ 50 u8 *ptr = NULL; 51 52 if (k >= SKF_NET_OFF) 53 ptr = skb_network_header(skb) + k - SKF_NET_OFF; 54 else if (k >= SKF_LL_OFF) 55 ptr = skb_mac_header(skb) + k - SKF_LL_OFF; 56 57 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) 58 return ptr; 59 return NULL; 60} 61 62static inline void *load_pointer(const struct sk_buff *skb, int k, 63 unsigned int size, void *buffer) 64{ 65 if (k >= 0) 66 return skb_header_pointer(skb, k, size, buffer); 67 return bpf_internal_load_pointer_neg_helper(skb, k, size); 68} 69 70/** 71 * sk_filter - run a packet through a socket filter 72 * @sk: sock associated with &sk_buff 73 * @skb: buffer to filter 74 * 75 * Run the filter code and then cut skb->data to correct size returned by 76 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller 77 * than pkt_len we keep whole skb->data. This is the socket level 78 * wrapper to sk_run_filter. It returns 0 if the packet should 79 * be accepted or -EPERM if the packet should be tossed. 80 * 81 */ 82int sk_filter(struct sock *sk, struct sk_buff *skb) 83{ 84 int err; 85 struct sk_filter *filter; 86 87 /* 88 * If the skb was allocated from pfmemalloc reserves, only 89 * allow SOCK_MEMALLOC sockets to use it as this socket is 90 * helping free memory 91 */ 92 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) 93 return -ENOMEM; 94 95 err = security_sock_rcv_skb(sk, skb); 96 if (err) 97 return err; 98 99 rcu_read_lock(); 100 filter = rcu_dereference(sk->sk_filter); 101 if (filter) { 102 unsigned int pkt_len = SK_RUN_FILTER(filter, skb); 103 104 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; 105 } 106 rcu_read_unlock(); 107 108 return err; 109} 110EXPORT_SYMBOL(sk_filter); 111 112/** 113 * sk_run_filter - run a filter on a socket 114 * @skb: buffer to run the filter on 115 * @fentry: filter to apply 116 * 117 * Decode and apply filter instructions to the skb->data. 118 * Return length to keep, 0 for none. @skb is the data we are 119 * filtering, @filter is the array of filter instructions. 120 * Because all jumps are guaranteed to be before last instruction, 121 * and last instruction guaranteed to be a RET, we dont need to check 122 * flen. (We used to pass to this function the length of filter) 123 */ 124unsigned int sk_run_filter(const struct sk_buff *skb, 125 const struct sock_filter *fentry) 126{ 127 void *ptr; 128 u32 A = 0; /* Accumulator */ 129 u32 X = 0; /* Index Register */ 130 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ 131 u32 tmp; 132 int k; 133 134 /* 135 * Process array of filter instructions. 136 */ 137 for (;; fentry++) { 138#if defined(CONFIG_X86_32) 139#define K (fentry->k) 140#else 141 const u32 K = fentry->k; 142#endif 143 144 switch (fentry->code) { 145 case BPF_S_ALU_ADD_X: 146 A += X; 147 continue; 148 case BPF_S_ALU_ADD_K: 149 A += K; 150 continue; 151 case BPF_S_ALU_SUB_X: 152 A -= X; 153 continue; 154 case BPF_S_ALU_SUB_K: 155 A -= K; 156 continue; 157 case BPF_S_ALU_MUL_X: 158 A *= X; 159 continue; 160 case BPF_S_ALU_MUL_K: 161 A *= K; 162 continue; 163 case BPF_S_ALU_DIV_X: 164 if (X == 0) 165 return 0; 166 A /= X; 167 continue; 168 case BPF_S_ALU_DIV_K: 169 A = reciprocal_divide(A, K); 170 continue; 171 case BPF_S_ALU_MOD_X: 172 if (X == 0) 173 return 0; 174 A %= X; 175 continue; 176 case BPF_S_ALU_MOD_K: 177 A %= K; 178 continue; 179 case BPF_S_ALU_AND_X: 180 A &= X; 181 continue; 182 case BPF_S_ALU_AND_K: 183 A &= K; 184 continue; 185 case BPF_S_ALU_OR_X: 186 A |= X; 187 continue; 188 case BPF_S_ALU_OR_K: 189 A |= K; 190 continue; 191 case BPF_S_ANC_ALU_XOR_X: 192 case BPF_S_ALU_XOR_X: 193 A ^= X; 194 continue; 195 case BPF_S_ALU_XOR_K: 196 A ^= K; 197 continue; 198 case BPF_S_ALU_LSH_X: 199 A <<= X; 200 continue; 201 case BPF_S_ALU_LSH_K: 202 A <<= K; 203 continue; 204 case BPF_S_ALU_RSH_X: 205 A >>= X; 206 continue; 207 case BPF_S_ALU_RSH_K: 208 A >>= K; 209 continue; 210 case BPF_S_ALU_NEG: 211 A = -A; 212 continue; 213 case BPF_S_JMP_JA: 214 fentry += K; 215 continue; 216 case BPF_S_JMP_JGT_K: 217 fentry += (A > K) ? fentry->jt : fentry->jf; 218 continue; 219 case BPF_S_JMP_JGE_K: 220 fentry += (A >= K) ? fentry->jt : fentry->jf; 221 continue; 222 case BPF_S_JMP_JEQ_K: 223 fentry += (A == K) ? fentry->jt : fentry->jf; 224 continue; 225 case BPF_S_JMP_JSET_K: 226 fentry += (A & K) ? fentry->jt : fentry->jf; 227 continue; 228 case BPF_S_JMP_JGT_X: 229 fentry += (A > X) ? fentry->jt : fentry->jf; 230 continue; 231 case BPF_S_JMP_JGE_X: 232 fentry += (A >= X) ? fentry->jt : fentry->jf; 233 continue; 234 case BPF_S_JMP_JEQ_X: 235 fentry += (A == X) ? fentry->jt : fentry->jf; 236 continue; 237 case BPF_S_JMP_JSET_X: 238 fentry += (A & X) ? fentry->jt : fentry->jf; 239 continue; 240 case BPF_S_LD_W_ABS: 241 k = K; 242load_w: 243 ptr = load_pointer(skb, k, 4, &tmp); 244 if (ptr != NULL) { 245 A = get_unaligned_be32(ptr); 246 continue; 247 } 248 return 0; 249 case BPF_S_LD_H_ABS: 250 k = K; 251load_h: 252 ptr = load_pointer(skb, k, 2, &tmp); 253 if (ptr != NULL) { 254 A = get_unaligned_be16(ptr); 255 continue; 256 } 257 return 0; 258 case BPF_S_LD_B_ABS: 259 k = K; 260load_b: 261 ptr = load_pointer(skb, k, 1, &tmp); 262 if (ptr != NULL) { 263 A = *(u8 *)ptr; 264 continue; 265 } 266 return 0; 267 case BPF_S_LD_W_LEN: 268 A = skb->len; 269 continue; 270 case BPF_S_LDX_W_LEN: 271 X = skb->len; 272 continue; 273 case BPF_S_LD_W_IND: 274 k = X + K; 275 goto load_w; 276 case BPF_S_LD_H_IND: 277 k = X + K; 278 goto load_h; 279 case BPF_S_LD_B_IND: 280 k = X + K; 281 goto load_b; 282 case BPF_S_LDX_B_MSH: 283 ptr = load_pointer(skb, K, 1, &tmp); 284 if (ptr != NULL) { 285 X = (*(u8 *)ptr & 0xf) << 2; 286 continue; 287 } 288 return 0; 289 case BPF_S_LD_IMM: 290 A = K; 291 continue; 292 case BPF_S_LDX_IMM: 293 X = K; 294 continue; 295 case BPF_S_LD_MEM: 296 A = mem[K]; 297 continue; 298 case BPF_S_LDX_MEM: 299 X = mem[K]; 300 continue; 301 case BPF_S_MISC_TAX: 302 X = A; 303 continue; 304 case BPF_S_MISC_TXA: 305 A = X; 306 continue; 307 case BPF_S_RET_K: 308 return K; 309 case BPF_S_RET_A: 310 return A; 311 case BPF_S_ST: 312 mem[K] = A; 313 continue; 314 case BPF_S_STX: 315 mem[K] = X; 316 continue; 317 case BPF_S_ANC_PROTOCOL: 318 A = ntohs(skb->protocol); 319 continue; 320 case BPF_S_ANC_PKTTYPE: 321 A = skb->pkt_type; 322 continue; 323 case BPF_S_ANC_IFINDEX: 324 if (!skb->dev) 325 return 0; 326 A = skb->dev->ifindex; 327 continue; 328 case BPF_S_ANC_MARK: 329 A = skb->mark; 330 continue; 331 case BPF_S_ANC_QUEUE: 332 A = skb->queue_mapping; 333 continue; 334 case BPF_S_ANC_HATYPE: 335 if (!skb->dev) 336 return 0; 337 A = skb->dev->type; 338 continue; 339 case BPF_S_ANC_RXHASH: 340 A = skb->rxhash; 341 continue; 342 case BPF_S_ANC_CPU: 343 A = raw_smp_processor_id(); 344 continue; 345 case BPF_S_ANC_VLAN_TAG: 346 A = vlan_tx_tag_get(skb); 347 continue; 348 case BPF_S_ANC_VLAN_TAG_PRESENT: 349 A = !!vlan_tx_tag_present(skb); 350 continue; 351 case BPF_S_ANC_NLATTR: { 352 struct nlattr *nla; 353 354 if (skb_is_nonlinear(skb)) 355 return 0; 356 if (A > skb->len - sizeof(struct nlattr)) 357 return 0; 358 359 nla = nla_find((struct nlattr *)&skb->data[A], 360 skb->len - A, X); 361 if (nla) 362 A = (void *)nla - (void *)skb->data; 363 else 364 A = 0; 365 continue; 366 } 367 case BPF_S_ANC_NLATTR_NEST: { 368 struct nlattr *nla; 369 370 if (skb_is_nonlinear(skb)) 371 return 0; 372 if (A > skb->len - sizeof(struct nlattr)) 373 return 0; 374 375 nla = (struct nlattr *)&skb->data[A]; 376 if (nla->nla_len > A - skb->len) 377 return 0; 378 379 nla = nla_find_nested(nla, X); 380 if (nla) 381 A = (void *)nla - (void *)skb->data; 382 else 383 A = 0; 384 continue; 385 } 386#ifdef CONFIG_SECCOMP_FILTER 387 case BPF_S_ANC_SECCOMP_LD_W: 388 A = seccomp_bpf_load(fentry->k); 389 continue; 390#endif 391 default: 392 WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n", 393 fentry->code, fentry->jt, 394 fentry->jf, fentry->k); 395 return 0; 396 } 397 } 398 399 return 0; 400} 401EXPORT_SYMBOL(sk_run_filter); 402 403/* 404 * Security : 405 * A BPF program is able to use 16 cells of memory to store intermediate 406 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()) 407 * As we dont want to clear mem[] array for each packet going through 408 * sk_run_filter(), we check that filter loaded by user never try to read 409 * a cell if not previously written, and we check all branches to be sure 410 * a malicious user doesn't try to abuse us. 411 */ 412static int check_load_and_stores(struct sock_filter *filter, int flen) 413{ 414 u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ 415 int pc, ret = 0; 416 417 BUILD_BUG_ON(BPF_MEMWORDS > 16); 418 masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); 419 if (!masks) 420 return -ENOMEM; 421 memset(masks, 0xff, flen * sizeof(*masks)); 422 423 for (pc = 0; pc < flen; pc++) { 424 memvalid &= masks[pc]; 425 426 switch (filter[pc].code) { 427 case BPF_S_ST: 428 case BPF_S_STX: 429 memvalid |= (1 << filter[pc].k); 430 break; 431 case BPF_S_LD_MEM: 432 case BPF_S_LDX_MEM: 433 if (!(memvalid & (1 << filter[pc].k))) { 434 ret = -EINVAL; 435 goto error; 436 } 437 break; 438 case BPF_S_JMP_JA: 439 /* a jump must set masks on target */ 440 masks[pc + 1 + filter[pc].k] &= memvalid; 441 memvalid = ~0; 442 break; 443 case BPF_S_JMP_JEQ_K: 444 case BPF_S_JMP_JEQ_X: 445 case BPF_S_JMP_JGE_K: 446 case BPF_S_JMP_JGE_X: 447 case BPF_S_JMP_JGT_K: 448 case BPF_S_JMP_JGT_X: 449 case BPF_S_JMP_JSET_X: 450 case BPF_S_JMP_JSET_K: 451 /* a jump must set masks on targets */ 452 masks[pc + 1 + filter[pc].jt] &= memvalid; 453 masks[pc + 1 + filter[pc].jf] &= memvalid; 454 memvalid = ~0; 455 break; 456 } 457 } 458error: 459 kfree(masks); 460 return ret; 461} 462 463/** 464 * sk_chk_filter - verify socket filter code 465 * @filter: filter to verify 466 * @flen: length of filter 467 * 468 * Check the user's filter code. If we let some ugly 469 * filter code slip through kaboom! The filter must contain 470 * no references or jumps that are out of range, no illegal 471 * instructions, and must end with a RET instruction. 472 * 473 * All jumps are forward as they are not signed. 474 * 475 * Returns 0 if the rule set is legal or -EINVAL if not. 476 */ 477int sk_chk_filter(struct sock_filter *filter, unsigned int flen) 478{ 479 /* 480 * Valid instructions are initialized to non-0. 481 * Invalid instructions are initialized to 0. 482 */ 483 static const u8 codes[] = { 484 [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K, 485 [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X, 486 [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K, 487 [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X, 488 [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K, 489 [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X, 490 [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X, 491 [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K, 492 [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X, 493 [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K, 494 [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X, 495 [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K, 496 [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X, 497 [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K, 498 [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X, 499 [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K, 500 [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X, 501 [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K, 502 [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X, 503 [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG, 504 [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS, 505 [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS, 506 [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS, 507 [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN, 508 [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND, 509 [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND, 510 [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND, 511 [BPF_LD|BPF_IMM] = BPF_S_LD_IMM, 512 [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN, 513 [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH, 514 [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM, 515 [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX, 516 [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA, 517 [BPF_RET|BPF_K] = BPF_S_RET_K, 518 [BPF_RET|BPF_A] = BPF_S_RET_A, 519 [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K, 520 [BPF_LD|BPF_MEM] = BPF_S_LD_MEM, 521 [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM, 522 [BPF_ST] = BPF_S_ST, 523 [BPF_STX] = BPF_S_STX, 524 [BPF_JMP|BPF_JA] = BPF_S_JMP_JA, 525 [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K, 526 [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X, 527 [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K, 528 [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X, 529 [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K, 530 [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X, 531 [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K, 532 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X, 533 }; 534 int pc; 535 bool anc_found; 536 537 if (flen == 0 || flen > BPF_MAXINSNS) 538 return -EINVAL; 539 540 /* check the filter code now */ 541 for (pc = 0; pc < flen; pc++) { 542 struct sock_filter *ftest = &filter[pc]; 543 u16 code = ftest->code; 544 545 if (code >= ARRAY_SIZE(codes)) 546 return -EINVAL; 547 code = codes[code]; 548 if (!code) 549 return -EINVAL; 550 /* Some instructions need special checks */ 551 switch (code) { 552 case BPF_S_ALU_DIV_K: 553 /* check for division by zero */ 554 if (ftest->k == 0) 555 return -EINVAL; 556 ftest->k = reciprocal_value(ftest->k); 557 break; 558 case BPF_S_ALU_MOD_K: 559 /* check for division by zero */ 560 if (ftest->k == 0) 561 return -EINVAL; 562 break; 563 case BPF_S_LD_MEM: 564 case BPF_S_LDX_MEM: 565 case BPF_S_ST: 566 case BPF_S_STX: 567 /* check for invalid memory addresses */ 568 if (ftest->k >= BPF_MEMWORDS) 569 return -EINVAL; 570 break; 571 case BPF_S_JMP_JA: 572 /* 573 * Note, the large ftest->k might cause loops. 574 * Compare this with conditional jumps below, 575 * where offsets are limited. --ANK (981016) 576 */ 577 if (ftest->k >= (unsigned int)(flen-pc-1)) 578 return -EINVAL; 579 break; 580 case BPF_S_JMP_JEQ_K: 581 case BPF_S_JMP_JEQ_X: 582 case BPF_S_JMP_JGE_K: 583 case BPF_S_JMP_JGE_X: 584 case BPF_S_JMP_JGT_K: 585 case BPF_S_JMP_JGT_X: 586 case BPF_S_JMP_JSET_X: 587 case BPF_S_JMP_JSET_K: 588 /* for conditionals both must be safe */ 589 if (pc + ftest->jt + 1 >= flen || 590 pc + ftest->jf + 1 >= flen) 591 return -EINVAL; 592 break; 593 case BPF_S_LD_W_ABS: 594 case BPF_S_LD_H_ABS: 595 case BPF_S_LD_B_ABS: 596 anc_found = false; 597#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ 598 code = BPF_S_ANC_##CODE; \ 599 anc_found = true; \ 600 break 601 switch (ftest->k) { 602 ANCILLARY(PROTOCOL); 603 ANCILLARY(PKTTYPE); 604 ANCILLARY(IFINDEX); 605 ANCILLARY(NLATTR); 606 ANCILLARY(NLATTR_NEST); 607 ANCILLARY(MARK); 608 ANCILLARY(QUEUE); 609 ANCILLARY(HATYPE); 610 ANCILLARY(RXHASH); 611 ANCILLARY(CPU); 612 ANCILLARY(ALU_XOR_X); 613 ANCILLARY(VLAN_TAG); 614 ANCILLARY(VLAN_TAG_PRESENT); 615 } 616 617 /* ancillary operation unknown or unsupported */ 618 if (anc_found == false && ftest->k >= SKF_AD_OFF) 619 return -EINVAL; 620 } 621 ftest->code = code; 622 } 623 624 /* last instruction must be a RET code */ 625 switch (filter[flen - 1].code) { 626 case BPF_S_RET_K: 627 case BPF_S_RET_A: 628 return check_load_and_stores(filter, flen); 629 } 630 return -EINVAL; 631} 632EXPORT_SYMBOL(sk_chk_filter); 633 634/** 635 * sk_filter_release_rcu - Release a socket filter by rcu_head 636 * @rcu: rcu_head that contains the sk_filter to free 637 */ 638void sk_filter_release_rcu(struct rcu_head *rcu) 639{ 640 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 641 642 bpf_jit_free(fp); 643 kfree(fp); 644} 645EXPORT_SYMBOL(sk_filter_release_rcu); 646 647static int __sk_prepare_filter(struct sk_filter *fp) 648{ 649 int err; 650 651 fp->bpf_func = sk_run_filter; 652 653 err = sk_chk_filter(fp->insns, fp->len); 654 if (err) 655 return err; 656 657 bpf_jit_compile(fp); 658 return 0; 659} 660 661/** 662 * sk_unattached_filter_create - create an unattached filter 663 * @fprog: the filter program 664 * @pfp: the unattached filter that is created 665 * 666 * Create a filter independent of any socket. We first run some 667 * sanity checks on it to make sure it does not explode on us later. 668 * If an error occurs or there is insufficient memory for the filter 669 * a negative errno code is returned. On success the return is zero. 670 */ 671int sk_unattached_filter_create(struct sk_filter **pfp, 672 struct sock_fprog *fprog) 673{ 674 struct sk_filter *fp; 675 unsigned int fsize = sizeof(struct sock_filter) * fprog->len; 676 int err; 677 678 /* Make sure new filter is there and in the right amounts. */ 679 if (fprog->filter == NULL) 680 return -EINVAL; 681 682 fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL); 683 if (!fp) 684 return -ENOMEM; 685 memcpy(fp->insns, fprog->filter, fsize); 686 687 atomic_set(&fp->refcnt, 1); 688 fp->len = fprog->len; 689 690 err = __sk_prepare_filter(fp); 691 if (err) 692 goto free_mem; 693 694 *pfp = fp; 695 return 0; 696free_mem: 697 kfree(fp); 698 return err; 699} 700EXPORT_SYMBOL_GPL(sk_unattached_filter_create); 701 702void sk_unattached_filter_destroy(struct sk_filter *fp) 703{ 704 sk_filter_release(fp); 705} 706EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy); 707 708/** 709 * sk_attach_filter - attach a socket filter 710 * @fprog: the filter program 711 * @sk: the socket to use 712 * 713 * Attach the user's filter code. We first run some sanity checks on 714 * it to make sure it does not explode on us later. If an error 715 * occurs or there is insufficient memory for the filter a negative 716 * errno code is returned. On success the return is zero. 717 */ 718int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) 719{ 720 struct sk_filter *fp, *old_fp; 721 unsigned int fsize = sizeof(struct sock_filter) * fprog->len; 722 int err; 723 724 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 725 return -EPERM; 726 727 /* Make sure new filter is there and in the right amounts. */ 728 if (fprog->filter == NULL) 729 return -EINVAL; 730 731 fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL); 732 if (!fp) 733 return -ENOMEM; 734 if (copy_from_user(fp->insns, fprog->filter, fsize)) { 735 sock_kfree_s(sk, fp, fsize+sizeof(*fp)); 736 return -EFAULT; 737 } 738 739 atomic_set(&fp->refcnt, 1); 740 fp->len = fprog->len; 741 742 err = __sk_prepare_filter(fp); 743 if (err) { 744 sk_filter_uncharge(sk, fp); 745 return err; 746 } 747 748 old_fp = rcu_dereference_protected(sk->sk_filter, 749 sock_owned_by_user(sk)); 750 rcu_assign_pointer(sk->sk_filter, fp); 751 752 if (old_fp) 753 sk_filter_uncharge(sk, old_fp); 754 return 0; 755} 756EXPORT_SYMBOL_GPL(sk_attach_filter); 757 758int sk_detach_filter(struct sock *sk) 759{ 760 int ret = -ENOENT; 761 struct sk_filter *filter; 762 763 if (sock_flag(sk, SOCK_FILTER_LOCKED)) 764 return -EPERM; 765 766 filter = rcu_dereference_protected(sk->sk_filter, 767 sock_owned_by_user(sk)); 768 if (filter) { 769 RCU_INIT_POINTER(sk->sk_filter, NULL); 770 sk_filter_uncharge(sk, filter); 771 ret = 0; 772 } 773 return ret; 774} 775EXPORT_SYMBOL_GPL(sk_detach_filter); 776 777static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) 778{ 779 static const u16 decodes[] = { 780 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, 781 [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, 782 [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, 783 [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, 784 [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, 785 [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, 786 [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, 787 [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, 788 [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, 789 [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, 790 [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, 791 [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, 792 [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, 793 [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, 794 [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, 795 [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, 796 [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, 797 [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, 798 [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, 799 [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, 800 [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, 801 [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, 802 [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, 803 [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, 804 [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, 805 [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, 806 [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, 807 [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, 808 [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, 809 [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, 810 [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, 811 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, 812 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, 813 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, 814 [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, 815 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, 816 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, 817 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, 818 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, 819 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, 820 [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, 821 [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, 822 [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, 823 [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, 824 [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, 825 [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, 826 [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, 827 [BPF_S_RET_K] = BPF_RET|BPF_K, 828 [BPF_S_RET_A] = BPF_RET|BPF_A, 829 [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, 830 [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, 831 [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, 832 [BPF_S_ST] = BPF_ST, 833 [BPF_S_STX] = BPF_STX, 834 [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, 835 [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, 836 [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, 837 [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, 838 [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, 839 [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, 840 [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, 841 [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, 842 [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, 843 }; 844 u16 code; 845 846 code = filt->code; 847 848 to->code = decodes[code]; 849 to->jt = filt->jt; 850 to->jf = filt->jf; 851 852 if (code == BPF_S_ALU_DIV_K) { 853 /* 854 * When loaded this rule user gave us X, which was 855 * translated into R = r(X). Now we calculate the 856 * RR = r(R) and report it back. If next time this 857 * value is loaded and RRR = r(RR) is calculated 858 * then the R == RRR will be true. 859 * 860 * One exception. X == 1 translates into R == 0 and 861 * we can't calculate RR out of it with r(). 862 */ 863 864 if (filt->k == 0) 865 to->k = 1; 866 else 867 to->k = reciprocal_value(filt->k); 868 869 BUG_ON(reciprocal_value(to->k) != filt->k); 870 } else 871 to->k = filt->k; 872} 873 874int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) 875{ 876 struct sk_filter *filter; 877 int i, ret; 878 879 lock_sock(sk); 880 filter = rcu_dereference_protected(sk->sk_filter, 881 sock_owned_by_user(sk)); 882 ret = 0; 883 if (!filter) 884 goto out; 885 ret = filter->len; 886 if (!len) 887 goto out; 888 ret = -EINVAL; 889 if (len < filter->len) 890 goto out; 891 892 ret = -EFAULT; 893 for (i = 0; i < filter->len; i++) { 894 struct sock_filter fb; 895 896 sk_decode_filter(&filter->insns[i], &fb); 897 if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) 898 goto out; 899 } 900 901 ret = filter->len; 902out: 903 release_sock(sk); 904 return ret; 905} 906