nf_conntrack_core.c revision 78f3648601fdc7a8166748bbd6d0555a88efa24a
1/* Connection state tracking for netfilter. This is separated from, 2 but required by, the NAT layer; it can also be used by an iptables 3 extension. */ 4 5/* (C) 1999-2001 Paul `Rusty' Russell 6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 */ 13 14#include <linux/types.h> 15#include <linux/netfilter.h> 16#include <linux/module.h> 17#include <linux/skbuff.h> 18#include <linux/proc_fs.h> 19#include <linux/vmalloc.h> 20#include <linux/stddef.h> 21#include <linux/slab.h> 22#include <linux/random.h> 23#include <linux/jhash.h> 24#include <linux/err.h> 25#include <linux/percpu.h> 26#include <linux/moduleparam.h> 27#include <linux/notifier.h> 28#include <linux/kernel.h> 29#include <linux/netdevice.h> 30#include <linux/socket.h> 31#include <linux/mm.h> 32 33#include <net/netfilter/nf_conntrack.h> 34#include <net/netfilter/nf_conntrack_l3proto.h> 35#include <net/netfilter/nf_conntrack_l4proto.h> 36#include <net/netfilter/nf_conntrack_expect.h> 37#include <net/netfilter/nf_conntrack_helper.h> 38#include <net/netfilter/nf_conntrack_core.h> 39#include <net/netfilter/nf_conntrack_extend.h> 40#include <net/netfilter/nf_conntrack_acct.h> 41#include <net/netfilter/nf_nat.h> 42#include <net/netfilter/nf_nat_core.h> 43 44#define NF_CONNTRACK_VERSION "0.5.0" 45 46int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, 47 enum nf_nat_manip_type manip, 48 struct nlattr *attr) __read_mostly; 49EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); 50 51DEFINE_SPINLOCK(nf_conntrack_lock); 52EXPORT_SYMBOL_GPL(nf_conntrack_lock); 53 54unsigned int nf_conntrack_htable_size __read_mostly; 55EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); 56 57unsigned int nf_conntrack_max __read_mostly; 58EXPORT_SYMBOL_GPL(nf_conntrack_max); 59 60struct nf_conn nf_conntrack_untracked __read_mostly; 61EXPORT_SYMBOL_GPL(nf_conntrack_untracked); 62 63static struct kmem_cache *nf_conntrack_cachep __read_mostly; 64 65static int nf_conntrack_hash_rnd_initted; 66static unsigned int nf_conntrack_hash_rnd; 67 68static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, 69 unsigned int size, unsigned int rnd) 70{ 71 unsigned int n; 72 u_int32_t h; 73 74 /* The direction must be ignored, so we hash everything up to the 75 * destination ports (which is a multiple of 4) and treat the last 76 * three bytes manually. 77 */ 78 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); 79 h = jhash2((u32 *)tuple, n, 80 rnd ^ (((__force __u16)tuple->dst.u.all << 16) | 81 tuple->dst.protonum)); 82 83 return ((u64)h * size) >> 32; 84} 85 86static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) 87{ 88 return __hash_conntrack(tuple, nf_conntrack_htable_size, 89 nf_conntrack_hash_rnd); 90} 91 92bool 93nf_ct_get_tuple(const struct sk_buff *skb, 94 unsigned int nhoff, 95 unsigned int dataoff, 96 u_int16_t l3num, 97 u_int8_t protonum, 98 struct nf_conntrack_tuple *tuple, 99 const struct nf_conntrack_l3proto *l3proto, 100 const struct nf_conntrack_l4proto *l4proto) 101{ 102 memset(tuple, 0, sizeof(*tuple)); 103 104 tuple->src.l3num = l3num; 105 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) 106 return false; 107 108 tuple->dst.protonum = protonum; 109 tuple->dst.dir = IP_CT_DIR_ORIGINAL; 110 111 return l4proto->pkt_to_tuple(skb, dataoff, tuple); 112} 113EXPORT_SYMBOL_GPL(nf_ct_get_tuple); 114 115bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, 116 u_int16_t l3num, struct nf_conntrack_tuple *tuple) 117{ 118 struct nf_conntrack_l3proto *l3proto; 119 struct nf_conntrack_l4proto *l4proto; 120 unsigned int protoff; 121 u_int8_t protonum; 122 int ret; 123 124 rcu_read_lock(); 125 126 l3proto = __nf_ct_l3proto_find(l3num); 127 ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum); 128 if (ret != NF_ACCEPT) { 129 rcu_read_unlock(); 130 return false; 131 } 132 133 l4proto = __nf_ct_l4proto_find(l3num, protonum); 134 135 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple, 136 l3proto, l4proto); 137 138 rcu_read_unlock(); 139 return ret; 140} 141EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); 142 143bool 144nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, 145 const struct nf_conntrack_tuple *orig, 146 const struct nf_conntrack_l3proto *l3proto, 147 const struct nf_conntrack_l4proto *l4proto) 148{ 149 memset(inverse, 0, sizeof(*inverse)); 150 151 inverse->src.l3num = orig->src.l3num; 152 if (l3proto->invert_tuple(inverse, orig) == 0) 153 return false; 154 155 inverse->dst.dir = !orig->dst.dir; 156 157 inverse->dst.protonum = orig->dst.protonum; 158 return l4proto->invert_tuple(inverse, orig); 159} 160EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); 161 162static void 163clean_from_lists(struct nf_conn *ct) 164{ 165 pr_debug("clean_from_lists(%p)\n", ct); 166 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 167 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); 168 169 /* Destroy all pending expectations */ 170 nf_ct_remove_expectations(ct); 171} 172 173static void 174destroy_conntrack(struct nf_conntrack *nfct) 175{ 176 struct nf_conn *ct = (struct nf_conn *)nfct; 177 struct net *net = nf_ct_net(ct); 178 struct nf_conntrack_l4proto *l4proto; 179 180 pr_debug("destroy_conntrack(%p)\n", ct); 181 NF_CT_ASSERT(atomic_read(&nfct->use) == 0); 182 NF_CT_ASSERT(!timer_pending(&ct->timeout)); 183 184 if (!test_bit(IPS_DYING_BIT, &ct->status)) 185 nf_conntrack_event(IPCT_DESTROY, ct); 186 set_bit(IPS_DYING_BIT, &ct->status); 187 188 /* To make sure we don't get any weird locking issues here: 189 * destroy_conntrack() MUST NOT be called with a write lock 190 * to nf_conntrack_lock!!! -HW */ 191 rcu_read_lock(); 192 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 193 if (l4proto && l4proto->destroy) 194 l4proto->destroy(ct); 195 196 rcu_read_unlock(); 197 198 spin_lock_bh(&nf_conntrack_lock); 199 /* Expectations will have been removed in clean_from_lists, 200 * except TFTP can create an expectation on the first packet, 201 * before connection is in the list, so we need to clean here, 202 * too. */ 203 nf_ct_remove_expectations(ct); 204 205 /* We overload first tuple to link into unconfirmed list. */ 206 if (!nf_ct_is_confirmed(ct)) { 207 BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode)); 208 hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 209 } 210 211 NF_CT_STAT_INC(net, delete); 212 spin_unlock_bh(&nf_conntrack_lock); 213 214 if (ct->master) 215 nf_ct_put(ct->master); 216 217 pr_debug("destroy_conntrack: returning ct=%p to slab\n", ct); 218 nf_conntrack_free(ct); 219} 220 221static void death_by_timeout(unsigned long ul_conntrack) 222{ 223 struct nf_conn *ct = (void *)ul_conntrack; 224 struct net *net = nf_ct_net(ct); 225 struct nf_conn_help *help = nfct_help(ct); 226 struct nf_conntrack_helper *helper; 227 228 if (help) { 229 rcu_read_lock(); 230 helper = rcu_dereference(help->helper); 231 if (helper && helper->destroy) 232 helper->destroy(ct); 233 rcu_read_unlock(); 234 } 235 236 spin_lock_bh(&nf_conntrack_lock); 237 /* Inside lock so preempt is disabled on module removal path. 238 * Otherwise we can get spurious warnings. */ 239 NF_CT_STAT_INC(net, delete_list); 240 clean_from_lists(ct); 241 spin_unlock_bh(&nf_conntrack_lock); 242 nf_ct_put(ct); 243} 244 245struct nf_conntrack_tuple_hash * 246__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) 247{ 248 struct nf_conntrack_tuple_hash *h; 249 struct hlist_node *n; 250 unsigned int hash = hash_conntrack(tuple); 251 252 /* Disable BHs the entire time since we normally need to disable them 253 * at least once for the stats anyway. 254 */ 255 local_bh_disable(); 256 hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { 257 if (nf_ct_tuple_equal(tuple, &h->tuple)) { 258 NF_CT_STAT_INC(net, found); 259 local_bh_enable(); 260 return h; 261 } 262 NF_CT_STAT_INC(net, searched); 263 } 264 local_bh_enable(); 265 266 return NULL; 267} 268EXPORT_SYMBOL_GPL(__nf_conntrack_find); 269 270/* Find a connection corresponding to a tuple. */ 271struct nf_conntrack_tuple_hash * 272nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) 273{ 274 struct nf_conntrack_tuple_hash *h; 275 struct nf_conn *ct; 276 277 rcu_read_lock(); 278 h = __nf_conntrack_find(net, tuple); 279 if (h) { 280 ct = nf_ct_tuplehash_to_ctrack(h); 281 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) 282 h = NULL; 283 } 284 rcu_read_unlock(); 285 286 return h; 287} 288EXPORT_SYMBOL_GPL(nf_conntrack_find_get); 289 290static void __nf_conntrack_hash_insert(struct nf_conn *ct, 291 unsigned int hash, 292 unsigned int repl_hash) 293{ 294 struct net *net = nf_ct_net(ct); 295 296 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, 297 &net->ct.hash[hash]); 298 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, 299 &net->ct.hash[repl_hash]); 300} 301 302void nf_conntrack_hash_insert(struct nf_conn *ct) 303{ 304 unsigned int hash, repl_hash; 305 306 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 307 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 308 309 __nf_conntrack_hash_insert(ct, hash, repl_hash); 310} 311EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); 312 313/* Confirm a connection given skb; places it in hash table */ 314int 315__nf_conntrack_confirm(struct sk_buff *skb) 316{ 317 unsigned int hash, repl_hash; 318 struct nf_conntrack_tuple_hash *h; 319 struct nf_conn *ct; 320 struct nf_conn_help *help; 321 struct hlist_node *n; 322 enum ip_conntrack_info ctinfo; 323 struct net *net; 324 325 ct = nf_ct_get(skb, &ctinfo); 326 net = nf_ct_net(ct); 327 328 /* ipt_REJECT uses nf_conntrack_attach to attach related 329 ICMP/TCP RST packets in other direction. Actual packet 330 which created connection will be IP_CT_NEW or for an 331 expected connection, IP_CT_RELATED. */ 332 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 333 return NF_ACCEPT; 334 335 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 336 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 337 338 /* We're not in hash table, and we refuse to set up related 339 connections for unconfirmed conns. But packet copies and 340 REJECT will give spurious warnings here. */ 341 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ 342 343 /* No external references means noone else could have 344 confirmed us. */ 345 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 346 pr_debug("Confirming conntrack %p\n", ct); 347 348 spin_lock_bh(&nf_conntrack_lock); 349 350 /* See if there's one in the list already, including reverse: 351 NAT could have grabbed it without realizing, since we're 352 not in the hash. If there is, we lost race. */ 353 hlist_for_each_entry(h, n, &net->ct.hash[hash], hnode) 354 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 355 &h->tuple)) 356 goto out; 357 hlist_for_each_entry(h, n, &net->ct.hash[repl_hash], hnode) 358 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, 359 &h->tuple)) 360 goto out; 361 362 /* Remove from unconfirmed list */ 363 hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 364 365 __nf_conntrack_hash_insert(ct, hash, repl_hash); 366 /* Timer relative to confirmation time, not original 367 setting time, otherwise we'd get timer wrap in 368 weird delay cases. */ 369 ct->timeout.expires += jiffies; 370 add_timer(&ct->timeout); 371 atomic_inc(&ct->ct_general.use); 372 set_bit(IPS_CONFIRMED_BIT, &ct->status); 373 NF_CT_STAT_INC(net, insert); 374 spin_unlock_bh(&nf_conntrack_lock); 375 help = nfct_help(ct); 376 if (help && help->helper) 377 nf_conntrack_event_cache(IPCT_HELPER, ct); 378#ifdef CONFIG_NF_NAT_NEEDED 379 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || 380 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) 381 nf_conntrack_event_cache(IPCT_NATINFO, ct); 382#endif 383 nf_conntrack_event_cache(master_ct(ct) ? 384 IPCT_RELATED : IPCT_NEW, ct); 385 return NF_ACCEPT; 386 387out: 388 NF_CT_STAT_INC(net, insert_failed); 389 spin_unlock_bh(&nf_conntrack_lock); 390 return NF_DROP; 391} 392EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); 393 394/* Returns true if a connection correspondings to the tuple (required 395 for NAT). */ 396int 397nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, 398 const struct nf_conn *ignored_conntrack) 399{ 400 struct net *net = nf_ct_net(ignored_conntrack); 401 struct nf_conntrack_tuple_hash *h; 402 struct hlist_node *n; 403 unsigned int hash = hash_conntrack(tuple); 404 405 /* Disable BHs the entire time since we need to disable them at 406 * least once for the stats anyway. 407 */ 408 rcu_read_lock_bh(); 409 hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { 410 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && 411 nf_ct_tuple_equal(tuple, &h->tuple)) { 412 NF_CT_STAT_INC(net, found); 413 rcu_read_unlock_bh(); 414 return 1; 415 } 416 NF_CT_STAT_INC(net, searched); 417 } 418 rcu_read_unlock_bh(); 419 420 return 0; 421} 422EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); 423 424#define NF_CT_EVICTION_RANGE 8 425 426/* There's a small race here where we may free a just-assured 427 connection. Too bad: we're in trouble anyway. */ 428static noinline int early_drop(struct net *net, unsigned int hash) 429{ 430 /* Use oldest entry, which is roughly LRU */ 431 struct nf_conntrack_tuple_hash *h; 432 struct nf_conn *ct = NULL, *tmp; 433 struct hlist_node *n; 434 unsigned int i, cnt = 0; 435 int dropped = 0; 436 437 rcu_read_lock(); 438 for (i = 0; i < nf_conntrack_htable_size; i++) { 439 hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], 440 hnode) { 441 tmp = nf_ct_tuplehash_to_ctrack(h); 442 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) 443 ct = tmp; 444 cnt++; 445 } 446 447 if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) 448 ct = NULL; 449 if (ct || cnt >= NF_CT_EVICTION_RANGE) 450 break; 451 hash = (hash + 1) % nf_conntrack_htable_size; 452 } 453 rcu_read_unlock(); 454 455 if (!ct) 456 return dropped; 457 458 if (del_timer(&ct->timeout)) { 459 death_by_timeout((unsigned long)ct); 460 dropped = 1; 461 NF_CT_STAT_INC_ATOMIC(net, early_drop); 462 } 463 nf_ct_put(ct); 464 return dropped; 465} 466 467struct nf_conn *nf_conntrack_alloc(struct net *net, 468 const struct nf_conntrack_tuple *orig, 469 const struct nf_conntrack_tuple *repl, 470 gfp_t gfp) 471{ 472 struct nf_conn *ct; 473 474 if (unlikely(!nf_conntrack_hash_rnd_initted)) { 475 get_random_bytes(&nf_conntrack_hash_rnd, 476 sizeof(nf_conntrack_hash_rnd)); 477 nf_conntrack_hash_rnd_initted = 1; 478 } 479 480 /* We don't want any race condition at early drop stage */ 481 atomic_inc(&net->ct.count); 482 483 if (nf_conntrack_max && 484 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { 485 unsigned int hash = hash_conntrack(orig); 486 if (!early_drop(net, hash)) { 487 atomic_dec(&net->ct.count); 488 if (net_ratelimit()) 489 printk(KERN_WARNING 490 "nf_conntrack: table full, dropping" 491 " packet.\n"); 492 return ERR_PTR(-ENOMEM); 493 } 494 } 495 496 ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp); 497 if (ct == NULL) { 498 pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); 499 atomic_dec(&net->ct.count); 500 return ERR_PTR(-ENOMEM); 501 } 502 503 atomic_set(&ct->ct_general.use, 1); 504 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 505 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 506 /* Don't set timer yet: wait for confirmation */ 507 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); 508#ifdef CONFIG_NET_NS 509 ct->ct_net = net; 510#endif 511 INIT_RCU_HEAD(&ct->rcu); 512 513 return ct; 514} 515EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 516 517static void nf_conntrack_free_rcu(struct rcu_head *head) 518{ 519 struct nf_conn *ct = container_of(head, struct nf_conn, rcu); 520 521 nf_ct_ext_free(ct); 522 kmem_cache_free(nf_conntrack_cachep, ct); 523} 524 525void nf_conntrack_free(struct nf_conn *ct) 526{ 527 struct net *net = nf_ct_net(ct); 528 529 nf_ct_ext_destroy(ct); 530 atomic_dec(&net->ct.count); 531 call_rcu(&ct->rcu, nf_conntrack_free_rcu); 532} 533EXPORT_SYMBOL_GPL(nf_conntrack_free); 534 535/* Allocate a new conntrack: we return -ENOMEM if classification 536 failed due to stress. Otherwise it really is unclassifiable. */ 537static struct nf_conntrack_tuple_hash * 538init_conntrack(struct net *net, 539 const struct nf_conntrack_tuple *tuple, 540 struct nf_conntrack_l3proto *l3proto, 541 struct nf_conntrack_l4proto *l4proto, 542 struct sk_buff *skb, 543 unsigned int dataoff) 544{ 545 struct nf_conn *ct; 546 struct nf_conn_help *help; 547 struct nf_conntrack_tuple repl_tuple; 548 struct nf_conntrack_expect *exp; 549 550 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { 551 pr_debug("Can't invert tuple.\n"); 552 return NULL; 553 } 554 555 ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); 556 if (IS_ERR(ct)) { 557 pr_debug("Can't allocate conntrack.\n"); 558 return (struct nf_conntrack_tuple_hash *)ct; 559 } 560 561 if (!l4proto->new(ct, skb, dataoff)) { 562 nf_conntrack_free(ct); 563 pr_debug("init conntrack: can't track with proto module\n"); 564 return NULL; 565 } 566 567 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 568 569 spin_lock_bh(&nf_conntrack_lock); 570 exp = nf_ct_find_expectation(net, tuple); 571 if (exp) { 572 pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", 573 ct, exp); 574 /* Welcome, Mr. Bond. We've been expecting you... */ 575 __set_bit(IPS_EXPECTED_BIT, &ct->status); 576 ct->master = exp->master; 577 if (exp->helper) { 578 help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); 579 if (help) 580 rcu_assign_pointer(help->helper, exp->helper); 581 } 582 583#ifdef CONFIG_NF_CONNTRACK_MARK 584 ct->mark = exp->master->mark; 585#endif 586#ifdef CONFIG_NF_CONNTRACK_SECMARK 587 ct->secmark = exp->master->secmark; 588#endif 589 nf_conntrack_get(&ct->master->ct_general); 590 NF_CT_STAT_INC(net, expect_new); 591 } else { 592 __nf_ct_try_assign_helper(ct, GFP_ATOMIC); 593 NF_CT_STAT_INC(net, new); 594 } 595 596 /* Overload tuple linked list to put us in unconfirmed list. */ 597 hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, 598 &net->ct.unconfirmed); 599 600 spin_unlock_bh(&nf_conntrack_lock); 601 602 if (exp) { 603 if (exp->expectfn) 604 exp->expectfn(ct, exp); 605 nf_ct_expect_put(exp); 606 } 607 608 return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; 609} 610 611/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ 612static inline struct nf_conn * 613resolve_normal_ct(struct net *net, 614 struct sk_buff *skb, 615 unsigned int dataoff, 616 u_int16_t l3num, 617 u_int8_t protonum, 618 struct nf_conntrack_l3proto *l3proto, 619 struct nf_conntrack_l4proto *l4proto, 620 int *set_reply, 621 enum ip_conntrack_info *ctinfo) 622{ 623 struct nf_conntrack_tuple tuple; 624 struct nf_conntrack_tuple_hash *h; 625 struct nf_conn *ct; 626 627 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 628 dataoff, l3num, protonum, &tuple, l3proto, 629 l4proto)) { 630 pr_debug("resolve_normal_ct: Can't get tuple\n"); 631 return NULL; 632 } 633 634 /* look for tuple match */ 635 h = nf_conntrack_find_get(net, &tuple); 636 if (!h) { 637 h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); 638 if (!h) 639 return NULL; 640 if (IS_ERR(h)) 641 return (void *)h; 642 } 643 ct = nf_ct_tuplehash_to_ctrack(h); 644 645 /* It exists; we have (non-exclusive) reference. */ 646 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { 647 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; 648 /* Please set reply bit if this packet OK */ 649 *set_reply = 1; 650 } else { 651 /* Once we've had two way comms, always ESTABLISHED. */ 652 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { 653 pr_debug("nf_conntrack_in: normal packet for %p\n", ct); 654 *ctinfo = IP_CT_ESTABLISHED; 655 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { 656 pr_debug("nf_conntrack_in: related packet for %p\n", 657 ct); 658 *ctinfo = IP_CT_RELATED; 659 } else { 660 pr_debug("nf_conntrack_in: new packet for %p\n", ct); 661 *ctinfo = IP_CT_NEW; 662 } 663 *set_reply = 0; 664 } 665 skb->nfct = &ct->ct_general; 666 skb->nfctinfo = *ctinfo; 667 return ct; 668} 669 670unsigned int 671nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, 672 struct sk_buff *skb) 673{ 674 struct nf_conn *ct; 675 enum ip_conntrack_info ctinfo; 676 struct nf_conntrack_l3proto *l3proto; 677 struct nf_conntrack_l4proto *l4proto; 678 unsigned int dataoff; 679 u_int8_t protonum; 680 int set_reply = 0; 681 int ret; 682 683 /* Previously seen (loopback or untracked)? Ignore. */ 684 if (skb->nfct) { 685 NF_CT_STAT_INC_ATOMIC(net, ignore); 686 return NF_ACCEPT; 687 } 688 689 /* rcu_read_lock()ed by nf_hook_slow */ 690 l3proto = __nf_ct_l3proto_find(pf); 691 ret = l3proto->get_l4proto(skb, skb_network_offset(skb), 692 &dataoff, &protonum); 693 if (ret <= 0) { 694 pr_debug("not prepared to track yet or error occured\n"); 695 NF_CT_STAT_INC_ATOMIC(net, error); 696 NF_CT_STAT_INC_ATOMIC(net, invalid); 697 return -ret; 698 } 699 700 l4proto = __nf_ct_l4proto_find(pf, protonum); 701 702 /* It may be an special packet, error, unclean... 703 * inverse of the return code tells to the netfilter 704 * core what to do with the packet. */ 705 if (l4proto->error != NULL) { 706 ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum); 707 if (ret <= 0) { 708 NF_CT_STAT_INC_ATOMIC(net, error); 709 NF_CT_STAT_INC_ATOMIC(net, invalid); 710 return -ret; 711 } 712 } 713 714 ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, 715 l3proto, l4proto, &set_reply, &ctinfo); 716 if (!ct) { 717 /* Not valid part of a connection */ 718 NF_CT_STAT_INC_ATOMIC(net, invalid); 719 return NF_ACCEPT; 720 } 721 722 if (IS_ERR(ct)) { 723 /* Too stressed to deal. */ 724 NF_CT_STAT_INC_ATOMIC(net, drop); 725 return NF_DROP; 726 } 727 728 NF_CT_ASSERT(skb->nfct); 729 730 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); 731 if (ret < 0) { 732 /* Invalid: inverse of the return code tells 733 * the netfilter core what to do */ 734 pr_debug("nf_conntrack_in: Can't track with proto module\n"); 735 nf_conntrack_put(skb->nfct); 736 skb->nfct = NULL; 737 NF_CT_STAT_INC_ATOMIC(net, invalid); 738 if (ret == -NF_DROP) 739 NF_CT_STAT_INC_ATOMIC(net, drop); 740 return -ret; 741 } 742 743 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 744 nf_conntrack_event_cache(IPCT_STATUS, ct); 745 746 return ret; 747} 748EXPORT_SYMBOL_GPL(nf_conntrack_in); 749 750bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, 751 const struct nf_conntrack_tuple *orig) 752{ 753 bool ret; 754 755 rcu_read_lock(); 756 ret = nf_ct_invert_tuple(inverse, orig, 757 __nf_ct_l3proto_find(orig->src.l3num), 758 __nf_ct_l4proto_find(orig->src.l3num, 759 orig->dst.protonum)); 760 rcu_read_unlock(); 761 return ret; 762} 763EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); 764 765/* Alter reply tuple (maybe alter helper). This is for NAT, and is 766 implicitly racy: see __nf_conntrack_confirm */ 767void nf_conntrack_alter_reply(struct nf_conn *ct, 768 const struct nf_conntrack_tuple *newreply) 769{ 770 struct nf_conn_help *help = nfct_help(ct); 771 772 /* Should be unconfirmed, so not in hash table yet */ 773 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 774 775 pr_debug("Altering reply tuple of %p to ", ct); 776 nf_ct_dump_tuple(newreply); 777 778 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; 779 if (ct->master || (help && !hlist_empty(&help->expectations))) 780 return; 781 782 rcu_read_lock(); 783 __nf_ct_try_assign_helper(ct, GFP_ATOMIC); 784 rcu_read_unlock(); 785} 786EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); 787 788/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ 789void __nf_ct_refresh_acct(struct nf_conn *ct, 790 enum ip_conntrack_info ctinfo, 791 const struct sk_buff *skb, 792 unsigned long extra_jiffies, 793 int do_acct) 794{ 795 int event = 0; 796 797 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); 798 NF_CT_ASSERT(skb); 799 800 spin_lock_bh(&nf_conntrack_lock); 801 802 /* Only update if this is not a fixed timeout */ 803 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) 804 goto acct; 805 806 /* If not in hash table, timer will not be active yet */ 807 if (!nf_ct_is_confirmed(ct)) { 808 ct->timeout.expires = extra_jiffies; 809 event = IPCT_REFRESH; 810 } else { 811 unsigned long newtime = jiffies + extra_jiffies; 812 813 /* Only update the timeout if the new timeout is at least 814 HZ jiffies from the old timeout. Need del_timer for race 815 avoidance (may already be dying). */ 816 if (newtime - ct->timeout.expires >= HZ 817 && del_timer(&ct->timeout)) { 818 ct->timeout.expires = newtime; 819 add_timer(&ct->timeout); 820 event = IPCT_REFRESH; 821 } 822 } 823 824acct: 825 if (do_acct) { 826 struct nf_conn_counter *acct; 827 828 acct = nf_conn_acct_find(ct); 829 if (acct) { 830 acct[CTINFO2DIR(ctinfo)].packets++; 831 acct[CTINFO2DIR(ctinfo)].bytes += 832 skb->len - skb_network_offset(skb); 833 } 834 } 835 836 spin_unlock_bh(&nf_conntrack_lock); 837 838 /* must be unlocked when calling event cache */ 839 if (event) 840 nf_conntrack_event_cache(event, ct); 841} 842EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); 843 844bool __nf_ct_kill_acct(struct nf_conn *ct, 845 enum ip_conntrack_info ctinfo, 846 const struct sk_buff *skb, 847 int do_acct) 848{ 849 if (do_acct) { 850 struct nf_conn_counter *acct; 851 852 spin_lock_bh(&nf_conntrack_lock); 853 acct = nf_conn_acct_find(ct); 854 if (acct) { 855 acct[CTINFO2DIR(ctinfo)].packets++; 856 acct[CTINFO2DIR(ctinfo)].bytes += 857 skb->len - skb_network_offset(skb); 858 } 859 spin_unlock_bh(&nf_conntrack_lock); 860 } 861 862 if (del_timer(&ct->timeout)) { 863 ct->timeout.function((unsigned long)ct); 864 return true; 865 } 866 return false; 867} 868EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); 869 870#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 871 872#include <linux/netfilter/nfnetlink.h> 873#include <linux/netfilter/nfnetlink_conntrack.h> 874#include <linux/mutex.h> 875 876/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be 877 * in ip_conntrack_core, since we don't want the protocols to autoload 878 * or depend on ctnetlink */ 879int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, 880 const struct nf_conntrack_tuple *tuple) 881{ 882 NLA_PUT_BE16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port); 883 NLA_PUT_BE16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port); 884 return 0; 885 886nla_put_failure: 887 return -1; 888} 889EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr); 890 891const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = { 892 [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 }, 893 [CTA_PROTO_DST_PORT] = { .type = NLA_U16 }, 894}; 895EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy); 896 897int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], 898 struct nf_conntrack_tuple *t) 899{ 900 if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT]) 901 return -EINVAL; 902 903 t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]); 904 t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]); 905 906 return 0; 907} 908EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); 909#endif 910 911/* Used by ipt_REJECT and ip6t_REJECT. */ 912static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) 913{ 914 struct nf_conn *ct; 915 enum ip_conntrack_info ctinfo; 916 917 /* This ICMP is in reverse direction to the packet which caused it */ 918 ct = nf_ct_get(skb, &ctinfo); 919 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) 920 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; 921 else 922 ctinfo = IP_CT_RELATED; 923 924 /* Attach to new skbuff, and increment count */ 925 nskb->nfct = &ct->ct_general; 926 nskb->nfctinfo = ctinfo; 927 nf_conntrack_get(nskb->nfct); 928} 929 930/* Bring out ya dead! */ 931static struct nf_conn * 932get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), 933 void *data, unsigned int *bucket) 934{ 935 struct nf_conntrack_tuple_hash *h; 936 struct nf_conn *ct; 937 struct hlist_node *n; 938 939 spin_lock_bh(&nf_conntrack_lock); 940 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { 941 hlist_for_each_entry(h, n, &net->ct.hash[*bucket], hnode) { 942 ct = nf_ct_tuplehash_to_ctrack(h); 943 if (iter(ct, data)) 944 goto found; 945 } 946 } 947 hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) { 948 ct = nf_ct_tuplehash_to_ctrack(h); 949 if (iter(ct, data)) 950 set_bit(IPS_DYING_BIT, &ct->status); 951 } 952 spin_unlock_bh(&nf_conntrack_lock); 953 return NULL; 954found: 955 atomic_inc(&ct->ct_general.use); 956 spin_unlock_bh(&nf_conntrack_lock); 957 return ct; 958} 959 960void nf_ct_iterate_cleanup(struct net *net, 961 int (*iter)(struct nf_conn *i, void *data), 962 void *data) 963{ 964 struct nf_conn *ct; 965 unsigned int bucket = 0; 966 967 while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { 968 /* Time to push up daises... */ 969 if (del_timer(&ct->timeout)) 970 death_by_timeout((unsigned long)ct); 971 /* ... else the timer will get him soon. */ 972 973 nf_ct_put(ct); 974 } 975} 976EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); 977 978struct __nf_ct_flush_report { 979 u32 pid; 980 int report; 981}; 982 983static int kill_all(struct nf_conn *i, void *data) 984{ 985 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; 986 987 /* get_next_corpse sets the dying bit for us */ 988 nf_conntrack_event_report(IPCT_DESTROY, 989 i, 990 fr->pid, 991 fr->report); 992 return 1; 993} 994 995void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size) 996{ 997 if (vmalloced) 998 vfree(hash); 999 else 1000 free_pages((unsigned long)hash, 1001 get_order(sizeof(struct hlist_head) * size)); 1002} 1003EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); 1004 1005void nf_conntrack_flush(struct net *net, u32 pid, int report) 1006{ 1007 struct __nf_ct_flush_report fr = { 1008 .pid = pid, 1009 .report = report, 1010 }; 1011 nf_ct_iterate_cleanup(net, kill_all, &fr); 1012} 1013EXPORT_SYMBOL_GPL(nf_conntrack_flush); 1014 1015static void nf_conntrack_cleanup_init_net(void) 1016{ 1017 nf_conntrack_helper_fini(); 1018 nf_conntrack_proto_fini(); 1019 kmem_cache_destroy(nf_conntrack_cachep); 1020} 1021 1022static void nf_conntrack_cleanup_net(struct net *net) 1023{ 1024 nf_ct_event_cache_flush(net); 1025 nf_conntrack_ecache_fini(net); 1026 i_see_dead_people: 1027 nf_conntrack_flush(net, 0, 0); 1028 if (atomic_read(&net->ct.count) != 0) { 1029 schedule(); 1030 goto i_see_dead_people; 1031 } 1032 /* wait until all references to nf_conntrack_untracked are dropped */ 1033 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) 1034 schedule(); 1035 1036 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1037 nf_conntrack_htable_size); 1038 nf_conntrack_acct_fini(net); 1039 nf_conntrack_expect_fini(net); 1040 free_percpu(net->ct.stat); 1041} 1042 1043/* Mishearing the voices in his head, our hero wonders how he's 1044 supposed to kill the mall. */ 1045void nf_conntrack_cleanup(struct net *net) 1046{ 1047 if (net_eq(net, &init_net)) 1048 rcu_assign_pointer(ip_ct_attach, NULL); 1049 1050 /* This makes sure all current packets have passed through 1051 netfilter framework. Roll on, two-stage module 1052 delete... */ 1053 synchronize_net(); 1054 1055 nf_conntrack_cleanup_net(net); 1056 1057 if (net_eq(net, &init_net)) { 1058 rcu_assign_pointer(nf_ct_destroy, NULL); 1059 nf_conntrack_cleanup_init_net(); 1060 } 1061} 1062 1063struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) 1064{ 1065 struct hlist_head *hash; 1066 unsigned int size, i; 1067 1068 *vmalloced = 0; 1069 1070 size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head)); 1071 hash = (void*)__get_free_pages(GFP_KERNEL|__GFP_NOWARN, 1072 get_order(sizeof(struct hlist_head) 1073 * size)); 1074 if (!hash) { 1075 *vmalloced = 1; 1076 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1077 hash = vmalloc(sizeof(struct hlist_head) * size); 1078 } 1079 1080 if (hash) 1081 for (i = 0; i < size; i++) 1082 INIT_HLIST_HEAD(&hash[i]); 1083 1084 return hash; 1085} 1086EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); 1087 1088int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) 1089{ 1090 int i, bucket, vmalloced, old_vmalloced; 1091 unsigned int hashsize, old_size; 1092 int rnd; 1093 struct hlist_head *hash, *old_hash; 1094 struct nf_conntrack_tuple_hash *h; 1095 1096 /* On boot, we can set this without any fancy locking. */ 1097 if (!nf_conntrack_htable_size) 1098 return param_set_uint(val, kp); 1099 1100 hashsize = simple_strtoul(val, NULL, 0); 1101 if (!hashsize) 1102 return -EINVAL; 1103 1104 hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced); 1105 if (!hash) 1106 return -ENOMEM; 1107 1108 /* We have to rehahs for the new table anyway, so we also can 1109 * use a newrandom seed */ 1110 get_random_bytes(&rnd, sizeof(rnd)); 1111 1112 /* Lookups in the old hash might happen in parallel, which means we 1113 * might get false negatives during connection lookup. New connections 1114 * created because of a false negative won't make it into the hash 1115 * though since that required taking the lock. 1116 */ 1117 spin_lock_bh(&nf_conntrack_lock); 1118 for (i = 0; i < nf_conntrack_htable_size; i++) { 1119 while (!hlist_empty(&init_net.ct.hash[i])) { 1120 h = hlist_entry(init_net.ct.hash[i].first, 1121 struct nf_conntrack_tuple_hash, hnode); 1122 hlist_del_rcu(&h->hnode); 1123 bucket = __hash_conntrack(&h->tuple, hashsize, rnd); 1124 hlist_add_head_rcu(&h->hnode, &hash[bucket]); 1125 } 1126 } 1127 old_size = nf_conntrack_htable_size; 1128 old_vmalloced = init_net.ct.hash_vmalloc; 1129 old_hash = init_net.ct.hash; 1130 1131 nf_conntrack_htable_size = hashsize; 1132 init_net.ct.hash_vmalloc = vmalloced; 1133 init_net.ct.hash = hash; 1134 nf_conntrack_hash_rnd = rnd; 1135 spin_unlock_bh(&nf_conntrack_lock); 1136 1137 nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); 1138 return 0; 1139} 1140EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); 1141 1142module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 1143 &nf_conntrack_htable_size, 0600); 1144 1145static int nf_conntrack_init_init_net(void) 1146{ 1147 int max_factor = 8; 1148 int ret; 1149 1150 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1151 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ 1152 if (!nf_conntrack_htable_size) { 1153 nf_conntrack_htable_size 1154 = (((num_physpages << PAGE_SHIFT) / 16384) 1155 / sizeof(struct hlist_head)); 1156 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) 1157 nf_conntrack_htable_size = 16384; 1158 if (nf_conntrack_htable_size < 32) 1159 nf_conntrack_htable_size = 32; 1160 1161 /* Use a max. factor of four by default to get the same max as 1162 * with the old struct list_heads. When a table size is given 1163 * we use the old value of 8 to avoid reducing the max. 1164 * entries. */ 1165 max_factor = 4; 1166 } 1167 nf_conntrack_max = max_factor * nf_conntrack_htable_size; 1168 1169 printk("nf_conntrack version %s (%u buckets, %d max)\n", 1170 NF_CONNTRACK_VERSION, nf_conntrack_htable_size, 1171 nf_conntrack_max); 1172 1173 nf_conntrack_cachep = kmem_cache_create("nf_conntrack", 1174 sizeof(struct nf_conn), 1175 0, 0, NULL); 1176 if (!nf_conntrack_cachep) { 1177 printk(KERN_ERR "Unable to create nf_conn slab cache\n"); 1178 ret = -ENOMEM; 1179 goto err_cache; 1180 } 1181 1182 ret = nf_conntrack_proto_init(); 1183 if (ret < 0) 1184 goto err_proto; 1185 1186 ret = nf_conntrack_helper_init(); 1187 if (ret < 0) 1188 goto err_helper; 1189 1190 return 0; 1191 1192err_helper: 1193 nf_conntrack_proto_fini(); 1194err_proto: 1195 kmem_cache_destroy(nf_conntrack_cachep); 1196err_cache: 1197 return ret; 1198} 1199 1200static int nf_conntrack_init_net(struct net *net) 1201{ 1202 int ret; 1203 1204 atomic_set(&net->ct.count, 0); 1205 INIT_HLIST_HEAD(&net->ct.unconfirmed); 1206 net->ct.stat = alloc_percpu(struct ip_conntrack_stat); 1207 if (!net->ct.stat) { 1208 ret = -ENOMEM; 1209 goto err_stat; 1210 } 1211 ret = nf_conntrack_ecache_init(net); 1212 if (ret < 0) 1213 goto err_ecache; 1214 net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1215 &net->ct.hash_vmalloc); 1216 if (!net->ct.hash) { 1217 ret = -ENOMEM; 1218 printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); 1219 goto err_hash; 1220 } 1221 ret = nf_conntrack_expect_init(net); 1222 if (ret < 0) 1223 goto err_expect; 1224 ret = nf_conntrack_acct_init(net); 1225 if (ret < 0) 1226 goto err_acct; 1227 1228 /* Set up fake conntrack: 1229 - to never be deleted, not in any hashes */ 1230#ifdef CONFIG_NET_NS 1231 nf_conntrack_untracked.ct_net = &init_net; 1232#endif 1233 atomic_set(&nf_conntrack_untracked.ct_general.use, 1); 1234 /* - and look it like as a confirmed connection */ 1235 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); 1236 1237 return 0; 1238 1239err_acct: 1240 nf_conntrack_expect_fini(net); 1241err_expect: 1242 nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1243 nf_conntrack_htable_size); 1244err_hash: 1245 nf_conntrack_ecache_fini(net); 1246err_ecache: 1247 free_percpu(net->ct.stat); 1248err_stat: 1249 return ret; 1250} 1251 1252int nf_conntrack_init(struct net *net) 1253{ 1254 int ret; 1255 1256 if (net_eq(net, &init_net)) { 1257 ret = nf_conntrack_init_init_net(); 1258 if (ret < 0) 1259 goto out_init_net; 1260 } 1261 ret = nf_conntrack_init_net(net); 1262 if (ret < 0) 1263 goto out_net; 1264 1265 if (net_eq(net, &init_net)) { 1266 /* For use by REJECT target */ 1267 rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); 1268 rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); 1269 } 1270 return 0; 1271 1272out_net: 1273 if (net_eq(net, &init_net)) 1274 nf_conntrack_cleanup_init_net(); 1275out_init_net: 1276 return ret; 1277} 1278