nf_conntrack_reasm.c revision 9e903e085262ffbf1fc44a17ac06058aca03524a
1/* 2 * IPv6 fragment reassembly for connection tracking 3 * 4 * Copyright (C)2004 USAGI/WIDE Project 5 * 6 * Author: 7 * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 8 * 9 * Based on: net/ipv6/reassembly.c 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License 13 * as published by the Free Software Foundation; either version 14 * 2 of the License, or (at your option) any later version. 15 */ 16 17#include <linux/errno.h> 18#include <linux/types.h> 19#include <linux/string.h> 20#include <linux/socket.h> 21#include <linux/sockios.h> 22#include <linux/jiffies.h> 23#include <linux/net.h> 24#include <linux/list.h> 25#include <linux/netdevice.h> 26#include <linux/in6.h> 27#include <linux/ipv6.h> 28#include <linux/icmpv6.h> 29#include <linux/random.h> 30#include <linux/slab.h> 31 32#include <net/sock.h> 33#include <net/snmp.h> 34#include <net/inet_frag.h> 35 36#include <net/ipv6.h> 37#include <net/protocol.h> 38#include <net/transp_v6.h> 39#include <net/rawv6.h> 40#include <net/ndisc.h> 41#include <net/addrconf.h> 42#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 43#include <linux/sysctl.h> 44#include <linux/netfilter.h> 45#include <linux/netfilter_ipv6.h> 46#include <linux/kernel.h> 47#include <linux/module.h> 48#include <net/netfilter/ipv6/nf_defrag_ipv6.h> 49 50 51struct nf_ct_frag6_skb_cb 52{ 53 struct inet6_skb_parm h; 54 int offset; 55 struct sk_buff *orig; 56}; 57 58#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) 59 60struct nf_ct_frag6_queue 61{ 62 struct inet_frag_queue q; 63 64 __be32 id; /* fragment id */ 65 u32 user; 66 struct in6_addr saddr; 67 struct in6_addr daddr; 68 69 unsigned int csum; 70 __u16 nhoffset; 71}; 72 73static struct inet_frags nf_frags; 74static struct netns_frags nf_init_frags; 75 76#ifdef CONFIG_SYSCTL 77static struct ctl_table nf_ct_frag6_sysctl_table[] = { 78 { 79 .procname = "nf_conntrack_frag6_timeout", 80 .data = &nf_init_frags.timeout, 81 .maxlen = sizeof(unsigned int), 82 .mode = 0644, 83 .proc_handler = proc_dointvec_jiffies, 84 }, 85 { 86 .procname = "nf_conntrack_frag6_low_thresh", 87 .data = &nf_init_frags.low_thresh, 88 .maxlen = sizeof(unsigned int), 89 .mode = 0644, 90 .proc_handler = proc_dointvec, 91 }, 92 { 93 .procname = "nf_conntrack_frag6_high_thresh", 94 .data = &nf_init_frags.high_thresh, 95 .maxlen = sizeof(unsigned int), 96 .mode = 0644, 97 .proc_handler = proc_dointvec, 98 }, 99 { } 100}; 101 102static struct ctl_table_header *nf_ct_frag6_sysctl_header; 103#endif 104 105static unsigned int nf_hashfn(struct inet_frag_queue *q) 106{ 107 const struct nf_ct_frag6_queue *nq; 108 109 nq = container_of(q, struct nf_ct_frag6_queue, q); 110 return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); 111} 112 113static void nf_skb_free(struct sk_buff *skb) 114{ 115 if (NFCT_FRAG6_CB(skb)->orig) 116 kfree_skb(NFCT_FRAG6_CB(skb)->orig); 117} 118 119/* Destruction primitives. */ 120 121static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) 122{ 123 inet_frag_put(&fq->q, &nf_frags); 124} 125 126/* Kill fq entry. It is not destroyed immediately, 127 * because caller (and someone more) holds reference count. 128 */ 129static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) 130{ 131 inet_frag_kill(&fq->q, &nf_frags); 132} 133 134static void nf_ct_frag6_evictor(void) 135{ 136 local_bh_disable(); 137 inet_frag_evictor(&nf_init_frags, &nf_frags); 138 local_bh_enable(); 139} 140 141static void nf_ct_frag6_expire(unsigned long data) 142{ 143 struct nf_ct_frag6_queue *fq; 144 145 fq = container_of((struct inet_frag_queue *)data, 146 struct nf_ct_frag6_queue, q); 147 148 spin_lock(&fq->q.lock); 149 150 if (fq->q.last_in & INET_FRAG_COMPLETE) 151 goto out; 152 153 fq_kill(fq); 154 155out: 156 spin_unlock(&fq->q.lock); 157 fq_put(fq); 158} 159 160/* Creation primitives. */ 161 162static __inline__ struct nf_ct_frag6_queue * 163fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) 164{ 165 struct inet_frag_queue *q; 166 struct ip6_create_arg arg; 167 unsigned int hash; 168 169 arg.id = id; 170 arg.user = user; 171 arg.src = src; 172 arg.dst = dst; 173 174 read_lock_bh(&nf_frags.lock); 175 hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); 176 177 q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); 178 local_bh_enable(); 179 if (q == NULL) 180 goto oom; 181 182 return container_of(q, struct nf_ct_frag6_queue, q); 183 184oom: 185 pr_debug("Can't alloc new queue\n"); 186 return NULL; 187} 188 189 190static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, 191 const struct frag_hdr *fhdr, int nhoff) 192{ 193 struct sk_buff *prev, *next; 194 int offset, end; 195 196 if (fq->q.last_in & INET_FRAG_COMPLETE) { 197 pr_debug("Already completed\n"); 198 goto err; 199 } 200 201 offset = ntohs(fhdr->frag_off) & ~0x7; 202 end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - 203 ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); 204 205 if ((unsigned int)end > IPV6_MAXPLEN) { 206 pr_debug("offset is too large.\n"); 207 return -1; 208 } 209 210 if (skb->ip_summed == CHECKSUM_COMPLETE) { 211 const unsigned char *nh = skb_network_header(skb); 212 skb->csum = csum_sub(skb->csum, 213 csum_partial(nh, (u8 *)(fhdr + 1) - nh, 214 0)); 215 } 216 217 /* Is this the final fragment? */ 218 if (!(fhdr->frag_off & htons(IP6_MF))) { 219 /* If we already have some bits beyond end 220 * or have different end, the segment is corrupted. 221 */ 222 if (end < fq->q.len || 223 ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) { 224 pr_debug("already received last fragment\n"); 225 goto err; 226 } 227 fq->q.last_in |= INET_FRAG_LAST_IN; 228 fq->q.len = end; 229 } else { 230 /* Check if the fragment is rounded to 8 bytes. 231 * Required by the RFC. 232 */ 233 if (end & 0x7) { 234 /* RFC2460 says always send parameter problem in 235 * this case. -DaveM 236 */ 237 pr_debug("end of fragment not rounded to 8 bytes.\n"); 238 return -1; 239 } 240 if (end > fq->q.len) { 241 /* Some bits beyond end -> corruption. */ 242 if (fq->q.last_in & INET_FRAG_LAST_IN) { 243 pr_debug("last packet already reached.\n"); 244 goto err; 245 } 246 fq->q.len = end; 247 } 248 } 249 250 if (end == offset) 251 goto err; 252 253 /* Point into the IP datagram 'data' part. */ 254 if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) { 255 pr_debug("queue: message is too short.\n"); 256 goto err; 257 } 258 if (pskb_trim_rcsum(skb, end - offset)) { 259 pr_debug("Can't trim\n"); 260 goto err; 261 } 262 263 /* Find out which fragments are in front and at the back of us 264 * in the chain of fragments so far. We must know where to put 265 * this fragment, right? 266 */ 267 prev = fq->q.fragments_tail; 268 if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) { 269 next = NULL; 270 goto found; 271 } 272 prev = NULL; 273 for (next = fq->q.fragments; next != NULL; next = next->next) { 274 if (NFCT_FRAG6_CB(next)->offset >= offset) 275 break; /* bingo! */ 276 prev = next; 277 } 278 279found: 280 /* RFC5722, Section 4: 281 * When reassembling an IPv6 datagram, if 282 * one or more its constituent fragments is determined to be an 283 * overlapping fragment, the entire datagram (and any constituent 284 * fragments, including those not yet received) MUST be silently 285 * discarded. 286 */ 287 288 /* Check for overlap with preceding fragment. */ 289 if (prev && 290 (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset) 291 goto discard_fq; 292 293 /* Look for overlap with succeeding segment. */ 294 if (next && NFCT_FRAG6_CB(next)->offset < end) 295 goto discard_fq; 296 297 NFCT_FRAG6_CB(skb)->offset = offset; 298 299 /* Insert this fragment in the chain of fragments. */ 300 skb->next = next; 301 if (!next) 302 fq->q.fragments_tail = skb; 303 if (prev) 304 prev->next = skb; 305 else 306 fq->q.fragments = skb; 307 308 skb->dev = NULL; 309 fq->q.stamp = skb->tstamp; 310 fq->q.meat += skb->len; 311 atomic_add(skb->truesize, &nf_init_frags.mem); 312 313 /* The first fragment. 314 * nhoffset is obtained from the first fragment, of course. 315 */ 316 if (offset == 0) { 317 fq->nhoffset = nhoff; 318 fq->q.last_in |= INET_FRAG_FIRST_IN; 319 } 320 write_lock(&nf_frags.lock); 321 list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list); 322 write_unlock(&nf_frags.lock); 323 return 0; 324 325discard_fq: 326 fq_kill(fq); 327err: 328 return -1; 329} 330 331/* 332 * Check if this packet is complete. 333 * Returns NULL on failure by any reason, and pointer 334 * to current nexthdr field in reassembled frame. 335 * 336 * It is called with locked fq, and caller must check that 337 * queue is eligible for reassembly i.e. it is not COMPLETE, 338 * the last and the first frames arrived and all the bits are here. 339 */ 340static struct sk_buff * 341nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) 342{ 343 struct sk_buff *fp, *op, *head = fq->q.fragments; 344 int payload_len; 345 346 fq_kill(fq); 347 348 WARN_ON(head == NULL); 349 WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); 350 351 /* Unfragmented part is taken from the first segment. */ 352 payload_len = ((head->data - skb_network_header(head)) - 353 sizeof(struct ipv6hdr) + fq->q.len - 354 sizeof(struct frag_hdr)); 355 if (payload_len > IPV6_MAXPLEN) { 356 pr_debug("payload len is too large.\n"); 357 goto out_oversize; 358 } 359 360 /* Head of list must not be cloned. */ 361 if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { 362 pr_debug("skb is cloned but can't expand head"); 363 goto out_oom; 364 } 365 366 /* If the first fragment is fragmented itself, we split 367 * it to two chunks: the first with data and paged part 368 * and the second, holding only fragments. */ 369 if (skb_has_frag_list(head)) { 370 struct sk_buff *clone; 371 int i, plen = 0; 372 373 if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) { 374 pr_debug("Can't alloc skb\n"); 375 goto out_oom; 376 } 377 clone->next = head->next; 378 head->next = clone; 379 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; 380 skb_frag_list_init(head); 381 for (i = 0; i < skb_shinfo(head)->nr_frags; i++) 382 plen += skb_frag_size(&skb_shinfo(head)->frags[i]); 383 clone->len = clone->data_len = head->data_len - plen; 384 head->data_len -= clone->len; 385 head->len -= clone->len; 386 clone->csum = 0; 387 clone->ip_summed = head->ip_summed; 388 389 NFCT_FRAG6_CB(clone)->orig = NULL; 390 atomic_add(clone->truesize, &nf_init_frags.mem); 391 } 392 393 /* We have to remove fragment header from datagram and to relocate 394 * header in order to calculate ICV correctly. */ 395 skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0]; 396 memmove(head->head + sizeof(struct frag_hdr), head->head, 397 (head->data - head->head) - sizeof(struct frag_hdr)); 398 head->mac_header += sizeof(struct frag_hdr); 399 head->network_header += sizeof(struct frag_hdr); 400 401 skb_shinfo(head)->frag_list = head->next; 402 skb_reset_transport_header(head); 403 skb_push(head, head->data - skb_network_header(head)); 404 405 for (fp=head->next; fp; fp = fp->next) { 406 head->data_len += fp->len; 407 head->len += fp->len; 408 if (head->ip_summed != fp->ip_summed) 409 head->ip_summed = CHECKSUM_NONE; 410 else if (head->ip_summed == CHECKSUM_COMPLETE) 411 head->csum = csum_add(head->csum, fp->csum); 412 head->truesize += fp->truesize; 413 } 414 atomic_sub(head->truesize, &nf_init_frags.mem); 415 416 head->next = NULL; 417 head->dev = dev; 418 head->tstamp = fq->q.stamp; 419 ipv6_hdr(head)->payload_len = htons(payload_len); 420 421 /* Yes, and fold redundant checksum back. 8) */ 422 if (head->ip_summed == CHECKSUM_COMPLETE) 423 head->csum = csum_partial(skb_network_header(head), 424 skb_network_header_len(head), 425 head->csum); 426 427 fq->q.fragments = NULL; 428 fq->q.fragments_tail = NULL; 429 430 /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ 431 fp = skb_shinfo(head)->frag_list; 432 if (fp && NFCT_FRAG6_CB(fp)->orig == NULL) 433 /* at above code, head skb is divided into two skbs. */ 434 fp = fp->next; 435 436 op = NFCT_FRAG6_CB(head)->orig; 437 for (; fp; fp = fp->next) { 438 struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig; 439 440 op->next = orig; 441 op = orig; 442 NFCT_FRAG6_CB(fp)->orig = NULL; 443 } 444 445 return head; 446 447out_oversize: 448 if (net_ratelimit()) 449 printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len); 450 goto out_fail; 451out_oom: 452 if (net_ratelimit()) 453 printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n"); 454out_fail: 455 return NULL; 456} 457 458/* 459 * find the header just before Fragment Header. 460 * 461 * if success return 0 and set ... 462 * (*prevhdrp): the value of "Next Header Field" in the header 463 * just before Fragment Header. 464 * (*prevhoff): the offset of "Next Header Field" in the header 465 * just before Fragment Header. 466 * (*fhoff) : the offset of Fragment Header. 467 * 468 * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c 469 * 470 */ 471static int 472find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) 473{ 474 u8 nexthdr = ipv6_hdr(skb)->nexthdr; 475 const int netoff = skb_network_offset(skb); 476 u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr); 477 int start = netoff + sizeof(struct ipv6hdr); 478 int len = skb->len - start; 479 u8 prevhdr = NEXTHDR_IPV6; 480 481 while (nexthdr != NEXTHDR_FRAGMENT) { 482 struct ipv6_opt_hdr hdr; 483 int hdrlen; 484 485 if (!ipv6_ext_hdr(nexthdr)) { 486 return -1; 487 } 488 if (nexthdr == NEXTHDR_NONE) { 489 pr_debug("next header is none\n"); 490 return -1; 491 } 492 if (len < (int)sizeof(struct ipv6_opt_hdr)) { 493 pr_debug("too short\n"); 494 return -1; 495 } 496 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) 497 BUG(); 498 if (nexthdr == NEXTHDR_AUTH) 499 hdrlen = (hdr.hdrlen+2)<<2; 500 else 501 hdrlen = ipv6_optlen(&hdr); 502 503 prevhdr = nexthdr; 504 prev_nhoff = start; 505 506 nexthdr = hdr.nexthdr; 507 len -= hdrlen; 508 start += hdrlen; 509 } 510 511 if (len < 0) 512 return -1; 513 514 *prevhdrp = prevhdr; 515 *prevhoff = prev_nhoff; 516 *fhoff = start; 517 518 return 0; 519} 520 521struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) 522{ 523 struct sk_buff *clone; 524 struct net_device *dev = skb->dev; 525 struct frag_hdr *fhdr; 526 struct nf_ct_frag6_queue *fq; 527 struct ipv6hdr *hdr; 528 int fhoff, nhoff; 529 u8 prevhdr; 530 struct sk_buff *ret_skb = NULL; 531 532 /* Jumbo payload inhibits frag. header */ 533 if (ipv6_hdr(skb)->payload_len == 0) { 534 pr_debug("payload len = 0\n"); 535 return skb; 536 } 537 538 if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) 539 return skb; 540 541 clone = skb_clone(skb, GFP_ATOMIC); 542 if (clone == NULL) { 543 pr_debug("Can't clone skb\n"); 544 return skb; 545 } 546 547 NFCT_FRAG6_CB(clone)->orig = skb; 548 549 if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { 550 pr_debug("message is too short.\n"); 551 goto ret_orig; 552 } 553 554 skb_set_transport_header(clone, fhoff); 555 hdr = ipv6_hdr(clone); 556 fhdr = (struct frag_hdr *)skb_transport_header(clone); 557 558 if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) 559 nf_ct_frag6_evictor(); 560 561 fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr); 562 if (fq == NULL) { 563 pr_debug("Can't find and can't create new queue\n"); 564 goto ret_orig; 565 } 566 567 spin_lock_bh(&fq->q.lock); 568 569 if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { 570 spin_unlock_bh(&fq->q.lock); 571 pr_debug("Can't insert skb to queue\n"); 572 fq_put(fq); 573 goto ret_orig; 574 } 575 576 if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && 577 fq->q.meat == fq->q.len) { 578 ret_skb = nf_ct_frag6_reasm(fq, dev); 579 if (ret_skb == NULL) 580 pr_debug("Can't reassemble fragmented packets\n"); 581 } 582 spin_unlock_bh(&fq->q.lock); 583 584 fq_put(fq); 585 return ret_skb; 586 587ret_orig: 588 kfree_skb(clone); 589 return skb; 590} 591 592void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, 593 struct net_device *in, struct net_device *out, 594 int (*okfn)(struct sk_buff *)) 595{ 596 struct sk_buff *s, *s2; 597 598 for (s = NFCT_FRAG6_CB(skb)->orig; s;) { 599 nf_conntrack_put_reasm(s->nfct_reasm); 600 nf_conntrack_get_reasm(skb); 601 s->nfct_reasm = skb; 602 603 s2 = s->next; 604 s->next = NULL; 605 606 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn, 607 NF_IP6_PRI_CONNTRACK_DEFRAG + 1); 608 s = s2; 609 } 610 nf_conntrack_put_reasm(skb); 611} 612 613int nf_ct_frag6_init(void) 614{ 615 nf_frags.hashfn = nf_hashfn; 616 nf_frags.constructor = ip6_frag_init; 617 nf_frags.destructor = NULL; 618 nf_frags.skb_free = nf_skb_free; 619 nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); 620 nf_frags.match = ip6_frag_match; 621 nf_frags.frag_expire = nf_ct_frag6_expire; 622 nf_frags.secret_interval = 10 * 60 * HZ; 623 nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; 624 nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH; 625 nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH; 626 inet_frags_init_net(&nf_init_frags); 627 inet_frags_init(&nf_frags); 628 629#ifdef CONFIG_SYSCTL 630 nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path, 631 nf_ct_frag6_sysctl_table); 632 if (!nf_ct_frag6_sysctl_header) { 633 inet_frags_fini(&nf_frags); 634 return -ENOMEM; 635 } 636#endif 637 638 return 0; 639} 640 641void nf_ct_frag6_cleanup(void) 642{ 643#ifdef CONFIG_SYSCTL 644 unregister_sysctl_table(nf_ct_frag6_sysctl_header); 645 nf_ct_frag6_sysctl_header = NULL; 646#endif 647 inet_frags_fini(&nf_frags); 648 649 nf_init_frags.low_thresh = 0; 650 nf_ct_frag6_evictor(); 651} 652