nf_conntrack_core.c revision 7c9728c393dceb724d66d696cfabce82151a78e5
1/* Connection state tracking for netfilter. This is separated from, 2 but required by, the NAT layer; it can also be used by an iptables 3 extension. */ 4 5/* (C) 1999-2001 Paul `Rusty' Russell 6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License version 2 as 11 * published by the Free Software Foundation. 12 * 13 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org> 14 * - new API and handling of conntrack/nat helpers 15 * - now capable of multiple expectations for one master 16 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org> 17 * - add usage/reference counts to ip_conntrack_expect 18 * - export ip_conntrack[_expect]_{find_get,put} functions 19 * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 20 * - generalize L3 protocol denendent part. 21 * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 22 * - add support various size of conntrack structures. 23 * 26 Jan 2006: Harald Welte <laforge@netfilter.org> 24 * - restructure nf_conn (introduce nf_conn_help) 25 * - redesign 'features' how they were originally intended 26 * 26 Feb 2006: Pablo Neira Ayuso <pablo@eurodev.net> 27 * - add support for L3 protocol module load on demand. 28 * 29 * Derived from net/ipv4/netfilter/ip_conntrack_core.c 30 */ 31 32#include <linux/config.h> 33#include <linux/types.h> 34#include <linux/netfilter.h> 35#include <linux/module.h> 36#include <linux/skbuff.h> 37#include <linux/proc_fs.h> 38#include <linux/vmalloc.h> 39#include <linux/stddef.h> 40#include <linux/slab.h> 41#include <linux/random.h> 42#include <linux/jhash.h> 43#include <linux/err.h> 44#include <linux/percpu.h> 45#include <linux/moduleparam.h> 46#include <linux/notifier.h> 47#include <linux/kernel.h> 48#include <linux/netdevice.h> 49#include <linux/socket.h> 50 51/* This rwlock protects the main hash table, protocol/helper/expected 52 registrations, conntrack timers*/ 53#define ASSERT_READ_LOCK(x) 54#define ASSERT_WRITE_LOCK(x) 55 56#include <net/netfilter/nf_conntrack.h> 57#include <net/netfilter/nf_conntrack_l3proto.h> 58#include <net/netfilter/nf_conntrack_protocol.h> 59#include <net/netfilter/nf_conntrack_helper.h> 60#include <net/netfilter/nf_conntrack_core.h> 61#include <linux/netfilter_ipv4/listhelp.h> 62 63#define NF_CONNTRACK_VERSION "0.5.0" 64 65#if 0 66#define DEBUGP printk 67#else 68#define DEBUGP(format, args...) 69#endif 70 71DEFINE_RWLOCK(nf_conntrack_lock); 72 73/* nf_conntrack_standalone needs this */ 74atomic_t nf_conntrack_count = ATOMIC_INIT(0); 75 76void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; 77LIST_HEAD(nf_conntrack_expect_list); 78struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; 79struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX]; 80static LIST_HEAD(helpers); 81unsigned int nf_conntrack_htable_size = 0; 82int nf_conntrack_max; 83struct list_head *nf_conntrack_hash; 84static kmem_cache_t *nf_conntrack_expect_cachep; 85struct nf_conn nf_conntrack_untracked; 86unsigned int nf_ct_log_invalid; 87static LIST_HEAD(unconfirmed); 88static int nf_conntrack_vmalloc; 89 90static unsigned int nf_conntrack_next_id; 91static unsigned int nf_conntrack_expect_next_id; 92#ifdef CONFIG_NF_CONNTRACK_EVENTS 93ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain); 94ATOMIC_NOTIFIER_HEAD(nf_conntrack_expect_chain); 95 96DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); 97 98/* deliver cached events and clear cache entry - must be called with locally 99 * disabled softirqs */ 100static inline void 101__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) 102{ 103 DEBUGP("ecache: delivering events for %p\n", ecache->ct); 104 if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) 105 && ecache->events) 106 atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events, 107 ecache->ct); 108 109 ecache->events = 0; 110 nf_ct_put(ecache->ct); 111 ecache->ct = NULL; 112} 113 114/* Deliver all cached events for a particular conntrack. This is called 115 * by code prior to async packet handling for freeing the skb */ 116void nf_ct_deliver_cached_events(const struct nf_conn *ct) 117{ 118 struct nf_conntrack_ecache *ecache; 119 120 local_bh_disable(); 121 ecache = &__get_cpu_var(nf_conntrack_ecache); 122 if (ecache->ct == ct) 123 __nf_ct_deliver_cached_events(ecache); 124 local_bh_enable(); 125} 126 127/* Deliver cached events for old pending events, if current conntrack != old */ 128void __nf_ct_event_cache_init(struct nf_conn *ct) 129{ 130 struct nf_conntrack_ecache *ecache; 131 132 /* take care of delivering potentially old events */ 133 ecache = &__get_cpu_var(nf_conntrack_ecache); 134 BUG_ON(ecache->ct == ct); 135 if (ecache->ct) 136 __nf_ct_deliver_cached_events(ecache); 137 /* initialize for this conntrack/packet */ 138 ecache->ct = ct; 139 nf_conntrack_get(&ct->ct_general); 140} 141 142/* flush the event cache - touches other CPU's data and must not be called 143 * while packets are still passing through the code */ 144static void nf_ct_event_cache_flush(void) 145{ 146 struct nf_conntrack_ecache *ecache; 147 int cpu; 148 149 for_each_possible_cpu(cpu) { 150 ecache = &per_cpu(nf_conntrack_ecache, cpu); 151 if (ecache->ct) 152 nf_ct_put(ecache->ct); 153 } 154} 155#else 156static inline void nf_ct_event_cache_flush(void) {} 157#endif /* CONFIG_NF_CONNTRACK_EVENTS */ 158 159DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); 160EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); 161 162/* 163 * This scheme offers various size of "struct nf_conn" dependent on 164 * features(helper, nat, ...) 165 */ 166 167#define NF_CT_FEATURES_NAMELEN 256 168static struct { 169 /* name of slab cache. printed in /proc/slabinfo */ 170 char *name; 171 172 /* size of slab cache */ 173 size_t size; 174 175 /* slab cache pointer */ 176 kmem_cache_t *cachep; 177 178 /* allocated slab cache + modules which uses this slab cache */ 179 int use; 180 181} nf_ct_cache[NF_CT_F_NUM]; 182 183/* protect members of nf_ct_cache except of "use" */ 184DEFINE_RWLOCK(nf_ct_cache_lock); 185 186/* This avoids calling kmem_cache_create() with same name simultaneously */ 187static DEFINE_MUTEX(nf_ct_cache_mutex); 188 189extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; 190struct nf_conntrack_protocol * 191__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol) 192{ 193 if (unlikely(l3proto >= AF_MAX || nf_ct_protos[l3proto] == NULL)) 194 return &nf_conntrack_generic_protocol; 195 196 return nf_ct_protos[l3proto][protocol]; 197} 198 199/* this is guaranteed to always return a valid protocol helper, since 200 * it falls back to generic_protocol */ 201struct nf_conntrack_protocol * 202nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol) 203{ 204 struct nf_conntrack_protocol *p; 205 206 preempt_disable(); 207 p = __nf_ct_proto_find(l3proto, protocol); 208 if (!try_module_get(p->me)) 209 p = &nf_conntrack_generic_protocol; 210 preempt_enable(); 211 212 return p; 213} 214 215void nf_ct_proto_put(struct nf_conntrack_protocol *p) 216{ 217 module_put(p->me); 218} 219 220struct nf_conntrack_l3proto * 221nf_ct_l3proto_find_get(u_int16_t l3proto) 222{ 223 struct nf_conntrack_l3proto *p; 224 225 preempt_disable(); 226 p = __nf_ct_l3proto_find(l3proto); 227 if (!try_module_get(p->me)) 228 p = &nf_conntrack_generic_l3proto; 229 preempt_enable(); 230 231 return p; 232} 233 234void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p) 235{ 236 module_put(p->me); 237} 238 239int 240nf_ct_l3proto_try_module_get(unsigned short l3proto) 241{ 242 int ret; 243 struct nf_conntrack_l3proto *p; 244 245retry: p = nf_ct_l3proto_find_get(l3proto); 246 if (p == &nf_conntrack_generic_l3proto) { 247 ret = request_module("nf_conntrack-%d", l3proto); 248 if (!ret) 249 goto retry; 250 251 return -EPROTOTYPE; 252 } 253 254 return 0; 255} 256 257void nf_ct_l3proto_module_put(unsigned short l3proto) 258{ 259 struct nf_conntrack_l3proto *p; 260 261 preempt_disable(); 262 p = __nf_ct_l3proto_find(l3proto); 263 preempt_enable(); 264 265 module_put(p->me); 266} 267 268static int nf_conntrack_hash_rnd_initted; 269static unsigned int nf_conntrack_hash_rnd; 270 271static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, 272 unsigned int size, unsigned int rnd) 273{ 274 unsigned int a, b; 275 a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all), 276 ((tuple->src.l3num) << 16) | tuple->dst.protonum); 277 b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all), 278 (tuple->src.u.all << 16) | tuple->dst.u.all); 279 280 return jhash_2words(a, b, rnd) % size; 281} 282 283static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple) 284{ 285 return __hash_conntrack(tuple, nf_conntrack_htable_size, 286 nf_conntrack_hash_rnd); 287} 288 289int nf_conntrack_register_cache(u_int32_t features, const char *name, 290 size_t size) 291{ 292 int ret = 0; 293 char *cache_name; 294 kmem_cache_t *cachep; 295 296 DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n", 297 features, name, size); 298 299 if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) { 300 DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n", 301 features); 302 return -EINVAL; 303 } 304 305 mutex_lock(&nf_ct_cache_mutex); 306 307 write_lock_bh(&nf_ct_cache_lock); 308 /* e.g: multiple helpers are loaded */ 309 if (nf_ct_cache[features].use > 0) { 310 DEBUGP("nf_conntrack_register_cache: already resisterd.\n"); 311 if ((!strncmp(nf_ct_cache[features].name, name, 312 NF_CT_FEATURES_NAMELEN)) 313 && nf_ct_cache[features].size == size) { 314 DEBUGP("nf_conntrack_register_cache: reusing.\n"); 315 nf_ct_cache[features].use++; 316 ret = 0; 317 } else 318 ret = -EBUSY; 319 320 write_unlock_bh(&nf_ct_cache_lock); 321 mutex_unlock(&nf_ct_cache_mutex); 322 return ret; 323 } 324 write_unlock_bh(&nf_ct_cache_lock); 325 326 /* 327 * The memory space for name of slab cache must be alive until 328 * cache is destroyed. 329 */ 330 cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC); 331 if (cache_name == NULL) { 332 DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n"); 333 ret = -ENOMEM; 334 goto out_up_mutex; 335 } 336 337 if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN) 338 >= NF_CT_FEATURES_NAMELEN) { 339 printk("nf_conntrack_register_cache: name too long\n"); 340 ret = -EINVAL; 341 goto out_free_name; 342 } 343 344 cachep = kmem_cache_create(cache_name, size, 0, 0, 345 NULL, NULL); 346 if (!cachep) { 347 printk("nf_conntrack_register_cache: Can't create slab cache " 348 "for the features = 0x%x\n", features); 349 ret = -ENOMEM; 350 goto out_free_name; 351 } 352 353 write_lock_bh(&nf_ct_cache_lock); 354 nf_ct_cache[features].use = 1; 355 nf_ct_cache[features].size = size; 356 nf_ct_cache[features].cachep = cachep; 357 nf_ct_cache[features].name = cache_name; 358 write_unlock_bh(&nf_ct_cache_lock); 359 360 goto out_up_mutex; 361 362out_free_name: 363 kfree(cache_name); 364out_up_mutex: 365 mutex_unlock(&nf_ct_cache_mutex); 366 return ret; 367} 368 369/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */ 370void nf_conntrack_unregister_cache(u_int32_t features) 371{ 372 kmem_cache_t *cachep; 373 char *name; 374 375 /* 376 * This assures that kmem_cache_create() isn't called before destroying 377 * slab cache. 378 */ 379 DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features); 380 mutex_lock(&nf_ct_cache_mutex); 381 382 write_lock_bh(&nf_ct_cache_lock); 383 if (--nf_ct_cache[features].use > 0) { 384 write_unlock_bh(&nf_ct_cache_lock); 385 mutex_unlock(&nf_ct_cache_mutex); 386 return; 387 } 388 cachep = nf_ct_cache[features].cachep; 389 name = nf_ct_cache[features].name; 390 nf_ct_cache[features].cachep = NULL; 391 nf_ct_cache[features].name = NULL; 392 nf_ct_cache[features].size = 0; 393 write_unlock_bh(&nf_ct_cache_lock); 394 395 synchronize_net(); 396 397 kmem_cache_destroy(cachep); 398 kfree(name); 399 400 mutex_unlock(&nf_ct_cache_mutex); 401} 402 403int 404nf_ct_get_tuple(const struct sk_buff *skb, 405 unsigned int nhoff, 406 unsigned int dataoff, 407 u_int16_t l3num, 408 u_int8_t protonum, 409 struct nf_conntrack_tuple *tuple, 410 const struct nf_conntrack_l3proto *l3proto, 411 const struct nf_conntrack_protocol *protocol) 412{ 413 NF_CT_TUPLE_U_BLANK(tuple); 414 415 tuple->src.l3num = l3num; 416 if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) 417 return 0; 418 419 tuple->dst.protonum = protonum; 420 tuple->dst.dir = IP_CT_DIR_ORIGINAL; 421 422 return protocol->pkt_to_tuple(skb, dataoff, tuple); 423} 424 425int 426nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, 427 const struct nf_conntrack_tuple *orig, 428 const struct nf_conntrack_l3proto *l3proto, 429 const struct nf_conntrack_protocol *protocol) 430{ 431 NF_CT_TUPLE_U_BLANK(inverse); 432 433 inverse->src.l3num = orig->src.l3num; 434 if (l3proto->invert_tuple(inverse, orig) == 0) 435 return 0; 436 437 inverse->dst.dir = !orig->dst.dir; 438 439 inverse->dst.protonum = orig->dst.protonum; 440 return protocol->invert_tuple(inverse, orig); 441} 442 443/* nf_conntrack_expect helper functions */ 444void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) 445{ 446 struct nf_conn_help *master_help = nfct_help(exp->master); 447 448 NF_CT_ASSERT(master_help); 449 ASSERT_WRITE_LOCK(&nf_conntrack_lock); 450 NF_CT_ASSERT(!timer_pending(&exp->timeout)); 451 452 list_del(&exp->list); 453 NF_CT_STAT_INC(expect_delete); 454 master_help->expecting--; 455 nf_conntrack_expect_put(exp); 456} 457 458static void expectation_timed_out(unsigned long ul_expect) 459{ 460 struct nf_conntrack_expect *exp = (void *)ul_expect; 461 462 write_lock_bh(&nf_conntrack_lock); 463 nf_ct_unlink_expect(exp); 464 write_unlock_bh(&nf_conntrack_lock); 465 nf_conntrack_expect_put(exp); 466} 467 468struct nf_conntrack_expect * 469__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple) 470{ 471 struct nf_conntrack_expect *i; 472 473 list_for_each_entry(i, &nf_conntrack_expect_list, list) { 474 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { 475 atomic_inc(&i->use); 476 return i; 477 } 478 } 479 return NULL; 480} 481 482/* Just find a expectation corresponding to a tuple. */ 483struct nf_conntrack_expect * 484nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple) 485{ 486 struct nf_conntrack_expect *i; 487 488 read_lock_bh(&nf_conntrack_lock); 489 i = __nf_conntrack_expect_find(tuple); 490 read_unlock_bh(&nf_conntrack_lock); 491 492 return i; 493} 494 495/* If an expectation for this connection is found, it gets delete from 496 * global list then returned. */ 497static struct nf_conntrack_expect * 498find_expectation(const struct nf_conntrack_tuple *tuple) 499{ 500 struct nf_conntrack_expect *i; 501 502 list_for_each_entry(i, &nf_conntrack_expect_list, list) { 503 /* If master is not in hash table yet (ie. packet hasn't left 504 this machine yet), how can other end know about expected? 505 Hence these are not the droids you are looking for (if 506 master ct never got confirmed, we'd hold a reference to it 507 and weird things would happen to future packets). */ 508 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) 509 && nf_ct_is_confirmed(i->master)) { 510 if (i->flags & NF_CT_EXPECT_PERMANENT) { 511 atomic_inc(&i->use); 512 return i; 513 } else if (del_timer(&i->timeout)) { 514 nf_ct_unlink_expect(i); 515 return i; 516 } 517 } 518 } 519 return NULL; 520} 521 522/* delete all expectations for this conntrack */ 523void nf_ct_remove_expectations(struct nf_conn *ct) 524{ 525 struct nf_conntrack_expect *i, *tmp; 526 struct nf_conn_help *help = nfct_help(ct); 527 528 /* Optimization: most connection never expect any others. */ 529 if (!help || help->expecting == 0) 530 return; 531 532 list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) { 533 if (i->master == ct && del_timer(&i->timeout)) { 534 nf_ct_unlink_expect(i); 535 nf_conntrack_expect_put(i); 536 } 537 } 538} 539 540static void 541clean_from_lists(struct nf_conn *ct) 542{ 543 unsigned int ho, hr; 544 545 DEBUGP("clean_from_lists(%p)\n", ct); 546 ASSERT_WRITE_LOCK(&nf_conntrack_lock); 547 548 ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 549 hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 550 LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); 551 LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); 552 553 /* Destroy all pending expectations */ 554 nf_ct_remove_expectations(ct); 555} 556 557static void 558destroy_conntrack(struct nf_conntrack *nfct) 559{ 560 struct nf_conn *ct = (struct nf_conn *)nfct; 561 struct nf_conntrack_l3proto *l3proto; 562 struct nf_conntrack_protocol *proto; 563 564 DEBUGP("destroy_conntrack(%p)\n", ct); 565 NF_CT_ASSERT(atomic_read(&nfct->use) == 0); 566 NF_CT_ASSERT(!timer_pending(&ct->timeout)); 567 568 nf_conntrack_event(IPCT_DESTROY, ct); 569 set_bit(IPS_DYING_BIT, &ct->status); 570 571 /* To make sure we don't get any weird locking issues here: 572 * destroy_conntrack() MUST NOT be called with a write lock 573 * to nf_conntrack_lock!!! -HW */ 574 l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num); 575 if (l3proto && l3proto->destroy) 576 l3proto->destroy(ct); 577 578 proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); 579 if (proto && proto->destroy) 580 proto->destroy(ct); 581 582 if (nf_conntrack_destroyed) 583 nf_conntrack_destroyed(ct); 584 585 write_lock_bh(&nf_conntrack_lock); 586 /* Expectations will have been removed in clean_from_lists, 587 * except TFTP can create an expectation on the first packet, 588 * before connection is in the list, so we need to clean here, 589 * too. */ 590 nf_ct_remove_expectations(ct); 591 592 /* We overload first tuple to link into unconfirmed list. */ 593 if (!nf_ct_is_confirmed(ct)) { 594 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list)); 595 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); 596 } 597 598 NF_CT_STAT_INC(delete); 599 write_unlock_bh(&nf_conntrack_lock); 600 601 if (ct->master) 602 nf_ct_put(ct->master); 603 604 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); 605 nf_conntrack_free(ct); 606} 607 608static void death_by_timeout(unsigned long ul_conntrack) 609{ 610 struct nf_conn *ct = (void *)ul_conntrack; 611 612 write_lock_bh(&nf_conntrack_lock); 613 /* Inside lock so preempt is disabled on module removal path. 614 * Otherwise we can get spurious warnings. */ 615 NF_CT_STAT_INC(delete_list); 616 clean_from_lists(ct); 617 write_unlock_bh(&nf_conntrack_lock); 618 nf_ct_put(ct); 619} 620 621static inline int 622conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i, 623 const struct nf_conntrack_tuple *tuple, 624 const struct nf_conn *ignored_conntrack) 625{ 626 ASSERT_READ_LOCK(&nf_conntrack_lock); 627 return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack 628 && nf_ct_tuple_equal(tuple, &i->tuple); 629} 630 631struct nf_conntrack_tuple_hash * 632__nf_conntrack_find(const struct nf_conntrack_tuple *tuple, 633 const struct nf_conn *ignored_conntrack) 634{ 635 struct nf_conntrack_tuple_hash *h; 636 unsigned int hash = hash_conntrack(tuple); 637 638 ASSERT_READ_LOCK(&nf_conntrack_lock); 639 list_for_each_entry(h, &nf_conntrack_hash[hash], list) { 640 if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { 641 NF_CT_STAT_INC(found); 642 return h; 643 } 644 NF_CT_STAT_INC(searched); 645 } 646 647 return NULL; 648} 649 650/* Find a connection corresponding to a tuple. */ 651struct nf_conntrack_tuple_hash * 652nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, 653 const struct nf_conn *ignored_conntrack) 654{ 655 struct nf_conntrack_tuple_hash *h; 656 657 read_lock_bh(&nf_conntrack_lock); 658 h = __nf_conntrack_find(tuple, ignored_conntrack); 659 if (h) 660 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); 661 read_unlock_bh(&nf_conntrack_lock); 662 663 return h; 664} 665 666static void __nf_conntrack_hash_insert(struct nf_conn *ct, 667 unsigned int hash, 668 unsigned int repl_hash) 669{ 670 ct->id = ++nf_conntrack_next_id; 671 list_prepend(&nf_conntrack_hash[hash], 672 &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); 673 list_prepend(&nf_conntrack_hash[repl_hash], 674 &ct->tuplehash[IP_CT_DIR_REPLY].list); 675} 676 677void nf_conntrack_hash_insert(struct nf_conn *ct) 678{ 679 unsigned int hash, repl_hash; 680 681 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 682 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 683 684 write_lock_bh(&nf_conntrack_lock); 685 __nf_conntrack_hash_insert(ct, hash, repl_hash); 686 write_unlock_bh(&nf_conntrack_lock); 687} 688 689/* Confirm a connection given skb; places it in hash table */ 690int 691__nf_conntrack_confirm(struct sk_buff **pskb) 692{ 693 unsigned int hash, repl_hash; 694 struct nf_conn *ct; 695 enum ip_conntrack_info ctinfo; 696 697 ct = nf_ct_get(*pskb, &ctinfo); 698 699 /* ipt_REJECT uses nf_conntrack_attach to attach related 700 ICMP/TCP RST packets in other direction. Actual packet 701 which created connection will be IP_CT_NEW or for an 702 expected connection, IP_CT_RELATED. */ 703 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 704 return NF_ACCEPT; 705 706 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 707 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); 708 709 /* We're not in hash table, and we refuse to set up related 710 connections for unconfirmed conns. But packet copies and 711 REJECT will give spurious warnings here. */ 712 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ 713 714 /* No external references means noone else could have 715 confirmed us. */ 716 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 717 DEBUGP("Confirming conntrack %p\n", ct); 718 719 write_lock_bh(&nf_conntrack_lock); 720 721 /* See if there's one in the list already, including reverse: 722 NAT could have grabbed it without realizing, since we're 723 not in the hash. If there is, we lost race. */ 724 if (!LIST_FIND(&nf_conntrack_hash[hash], 725 conntrack_tuple_cmp, 726 struct nf_conntrack_tuple_hash *, 727 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) 728 && !LIST_FIND(&nf_conntrack_hash[repl_hash], 729 conntrack_tuple_cmp, 730 struct nf_conntrack_tuple_hash *, 731 &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { 732 struct nf_conn_help *help; 733 /* Remove from unconfirmed list */ 734 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); 735 736 __nf_conntrack_hash_insert(ct, hash, repl_hash); 737 /* Timer relative to confirmation time, not original 738 setting time, otherwise we'd get timer wrap in 739 weird delay cases. */ 740 ct->timeout.expires += jiffies; 741 add_timer(&ct->timeout); 742 atomic_inc(&ct->ct_general.use); 743 set_bit(IPS_CONFIRMED_BIT, &ct->status); 744 NF_CT_STAT_INC(insert); 745 write_unlock_bh(&nf_conntrack_lock); 746 help = nfct_help(ct); 747 if (help && help->helper) 748 nf_conntrack_event_cache(IPCT_HELPER, *pskb); 749#ifdef CONFIG_NF_NAT_NEEDED 750 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || 751 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) 752 nf_conntrack_event_cache(IPCT_NATINFO, *pskb); 753#endif 754 nf_conntrack_event_cache(master_ct(ct) ? 755 IPCT_RELATED : IPCT_NEW, *pskb); 756 return NF_ACCEPT; 757 } 758 759 NF_CT_STAT_INC(insert_failed); 760 write_unlock_bh(&nf_conntrack_lock); 761 return NF_DROP; 762} 763 764/* Returns true if a connection correspondings to the tuple (required 765 for NAT). */ 766int 767nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, 768 const struct nf_conn *ignored_conntrack) 769{ 770 struct nf_conntrack_tuple_hash *h; 771 772 read_lock_bh(&nf_conntrack_lock); 773 h = __nf_conntrack_find(tuple, ignored_conntrack); 774 read_unlock_bh(&nf_conntrack_lock); 775 776 return h != NULL; 777} 778 779/* There's a small race here where we may free a just-assured 780 connection. Too bad: we're in trouble anyway. */ 781static inline int unreplied(const struct nf_conntrack_tuple_hash *i) 782{ 783 return !(test_bit(IPS_ASSURED_BIT, 784 &nf_ct_tuplehash_to_ctrack(i)->status)); 785} 786 787static int early_drop(struct list_head *chain) 788{ 789 /* Traverse backwards: gives us oldest, which is roughly LRU */ 790 struct nf_conntrack_tuple_hash *h; 791 struct nf_conn *ct = NULL; 792 int dropped = 0; 793 794 read_lock_bh(&nf_conntrack_lock); 795 h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *); 796 if (h) { 797 ct = nf_ct_tuplehash_to_ctrack(h); 798 atomic_inc(&ct->ct_general.use); 799 } 800 read_unlock_bh(&nf_conntrack_lock); 801 802 if (!ct) 803 return dropped; 804 805 if (del_timer(&ct->timeout)) { 806 death_by_timeout((unsigned long)ct); 807 dropped = 1; 808 NF_CT_STAT_INC(early_drop); 809 } 810 nf_ct_put(ct); 811 return dropped; 812} 813 814static inline int helper_cmp(const struct nf_conntrack_helper *i, 815 const struct nf_conntrack_tuple *rtuple) 816{ 817 return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); 818} 819 820static struct nf_conntrack_helper * 821__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) 822{ 823 return LIST_FIND(&helpers, helper_cmp, 824 struct nf_conntrack_helper *, 825 tuple); 826} 827 828struct nf_conntrack_helper * 829nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple) 830{ 831 struct nf_conntrack_helper *helper; 832 833 /* need nf_conntrack_lock to assure that helper exists until 834 * try_module_get() is called */ 835 read_lock_bh(&nf_conntrack_lock); 836 837 helper = __nf_ct_helper_find(tuple); 838 if (helper) { 839 /* need to increase module usage count to assure helper will 840 * not go away while the caller is e.g. busy putting a 841 * conntrack in the hash that uses the helper */ 842 if (!try_module_get(helper->me)) 843 helper = NULL; 844 } 845 846 read_unlock_bh(&nf_conntrack_lock); 847 848 return helper; 849} 850 851void nf_ct_helper_put(struct nf_conntrack_helper *helper) 852{ 853 module_put(helper->me); 854} 855 856static struct nf_conn * 857__nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, 858 const struct nf_conntrack_tuple *repl, 859 const struct nf_conntrack_l3proto *l3proto) 860{ 861 struct nf_conn *conntrack = NULL; 862 u_int32_t features = 0; 863 struct nf_conntrack_helper *helper; 864 865 if (unlikely(!nf_conntrack_hash_rnd_initted)) { 866 get_random_bytes(&nf_conntrack_hash_rnd, 4); 867 nf_conntrack_hash_rnd_initted = 1; 868 } 869 870 if (nf_conntrack_max 871 && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) { 872 unsigned int hash = hash_conntrack(orig); 873 /* Try dropping from this hash chain. */ 874 if (!early_drop(&nf_conntrack_hash[hash])) { 875 if (net_ratelimit()) 876 printk(KERN_WARNING 877 "nf_conntrack: table full, dropping" 878 " packet.\n"); 879 return ERR_PTR(-ENOMEM); 880 } 881 } 882 883 /* find features needed by this conntrack. */ 884 features = l3proto->get_features(orig); 885 886 /* FIXME: protect helper list per RCU */ 887 read_lock_bh(&nf_conntrack_lock); 888 helper = __nf_ct_helper_find(repl); 889 if (helper) 890 features |= NF_CT_F_HELP; 891 read_unlock_bh(&nf_conntrack_lock); 892 893 DEBUGP("nf_conntrack_alloc: features=0x%x\n", features); 894 895 read_lock_bh(&nf_ct_cache_lock); 896 897 if (unlikely(!nf_ct_cache[features].use)) { 898 DEBUGP("nf_conntrack_alloc: not supported features = 0x%x\n", 899 features); 900 goto out; 901 } 902 903 conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC); 904 if (conntrack == NULL) { 905 DEBUGP("nf_conntrack_alloc: Can't alloc conntrack from cache\n"); 906 goto out; 907 } 908 909 memset(conntrack, 0, nf_ct_cache[features].size); 910 conntrack->features = features; 911 if (helper) { 912 struct nf_conn_help *help = nfct_help(conntrack); 913 NF_CT_ASSERT(help); 914 help->helper = helper; 915 } 916 917 atomic_set(&conntrack->ct_general.use, 1); 918 conntrack->ct_general.destroy = destroy_conntrack; 919 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 920 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 921 /* Don't set timer yet: wait for confirmation */ 922 init_timer(&conntrack->timeout); 923 conntrack->timeout.data = (unsigned long)conntrack; 924 conntrack->timeout.function = death_by_timeout; 925 926 atomic_inc(&nf_conntrack_count); 927out: 928 read_unlock_bh(&nf_ct_cache_lock); 929 return conntrack; 930} 931 932struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, 933 const struct nf_conntrack_tuple *repl) 934{ 935 struct nf_conntrack_l3proto *l3proto; 936 937 l3proto = __nf_ct_l3proto_find(orig->src.l3num); 938 return __nf_conntrack_alloc(orig, repl, l3proto); 939} 940 941void nf_conntrack_free(struct nf_conn *conntrack) 942{ 943 u_int32_t features = conntrack->features; 944 NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM); 945 DEBUGP("nf_conntrack_free: features = 0x%x, conntrack=%p\n", features, 946 conntrack); 947 kmem_cache_free(nf_ct_cache[features].cachep, conntrack); 948 atomic_dec(&nf_conntrack_count); 949} 950 951/* Allocate a new conntrack: we return -ENOMEM if classification 952 failed due to stress. Otherwise it really is unclassifiable. */ 953static struct nf_conntrack_tuple_hash * 954init_conntrack(const struct nf_conntrack_tuple *tuple, 955 struct nf_conntrack_l3proto *l3proto, 956 struct nf_conntrack_protocol *protocol, 957 struct sk_buff *skb, 958 unsigned int dataoff) 959{ 960 struct nf_conn *conntrack; 961 struct nf_conntrack_tuple repl_tuple; 962 struct nf_conntrack_expect *exp; 963 964 if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) { 965 DEBUGP("Can't invert tuple.\n"); 966 return NULL; 967 } 968 969 conntrack = __nf_conntrack_alloc(tuple, &repl_tuple, l3proto); 970 if (conntrack == NULL || IS_ERR(conntrack)) { 971 DEBUGP("Can't allocate conntrack.\n"); 972 return (struct nf_conntrack_tuple_hash *)conntrack; 973 } 974 975 if (!protocol->new(conntrack, skb, dataoff)) { 976 nf_conntrack_free(conntrack); 977 DEBUGP("init conntrack: can't track with proto module\n"); 978 return NULL; 979 } 980 981 write_lock_bh(&nf_conntrack_lock); 982 exp = find_expectation(tuple); 983 984 if (exp) { 985 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", 986 conntrack, exp); 987 /* Welcome, Mr. Bond. We've been expecting you... */ 988 __set_bit(IPS_EXPECTED_BIT, &conntrack->status); 989 conntrack->master = exp->master; 990#ifdef CONFIG_NF_CONNTRACK_MARK 991 conntrack->mark = exp->master->mark; 992#endif 993#ifdef CONFIG_NF_CONNTRACK_SECMARK 994 conntrack->secmark = exp->master->secmark; 995#endif 996 nf_conntrack_get(&conntrack->master->ct_general); 997 NF_CT_STAT_INC(expect_new); 998 } else 999 NF_CT_STAT_INC(new); 1000 1001 /* Overload tuple linked list to put us in unconfirmed list. */ 1002 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); 1003 1004 write_unlock_bh(&nf_conntrack_lock); 1005 1006 if (exp) { 1007 if (exp->expectfn) 1008 exp->expectfn(conntrack, exp); 1009 nf_conntrack_expect_put(exp); 1010 } 1011 1012 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; 1013} 1014 1015/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ 1016static inline struct nf_conn * 1017resolve_normal_ct(struct sk_buff *skb, 1018 unsigned int dataoff, 1019 u_int16_t l3num, 1020 u_int8_t protonum, 1021 struct nf_conntrack_l3proto *l3proto, 1022 struct nf_conntrack_protocol *proto, 1023 int *set_reply, 1024 enum ip_conntrack_info *ctinfo) 1025{ 1026 struct nf_conntrack_tuple tuple; 1027 struct nf_conntrack_tuple_hash *h; 1028 struct nf_conn *ct; 1029 1030 if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data), 1031 dataoff, l3num, protonum, &tuple, l3proto, 1032 proto)) { 1033 DEBUGP("resolve_normal_ct: Can't get tuple\n"); 1034 return NULL; 1035 } 1036 1037 /* look for tuple match */ 1038 h = nf_conntrack_find_get(&tuple, NULL); 1039 if (!h) { 1040 h = init_conntrack(&tuple, l3proto, proto, skb, dataoff); 1041 if (!h) 1042 return NULL; 1043 if (IS_ERR(h)) 1044 return (void *)h; 1045 } 1046 ct = nf_ct_tuplehash_to_ctrack(h); 1047 1048 /* It exists; we have (non-exclusive) reference. */ 1049 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { 1050 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; 1051 /* Please set reply bit if this packet OK */ 1052 *set_reply = 1; 1053 } else { 1054 /* Once we've had two way comms, always ESTABLISHED. */ 1055 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { 1056 DEBUGP("nf_conntrack_in: normal packet for %p\n", ct); 1057 *ctinfo = IP_CT_ESTABLISHED; 1058 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { 1059 DEBUGP("nf_conntrack_in: related packet for %p\n", ct); 1060 *ctinfo = IP_CT_RELATED; 1061 } else { 1062 DEBUGP("nf_conntrack_in: new packet for %p\n", ct); 1063 *ctinfo = IP_CT_NEW; 1064 } 1065 *set_reply = 0; 1066 } 1067 skb->nfct = &ct->ct_general; 1068 skb->nfctinfo = *ctinfo; 1069 return ct; 1070} 1071 1072unsigned int 1073nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) 1074{ 1075 struct nf_conn *ct; 1076 enum ip_conntrack_info ctinfo; 1077 struct nf_conntrack_l3proto *l3proto; 1078 struct nf_conntrack_protocol *proto; 1079 unsigned int dataoff; 1080 u_int8_t protonum; 1081 int set_reply = 0; 1082 int ret; 1083 1084 /* Previously seen (loopback or untracked)? Ignore. */ 1085 if ((*pskb)->nfct) { 1086 NF_CT_STAT_INC(ignore); 1087 return NF_ACCEPT; 1088 } 1089 1090 l3proto = __nf_ct_l3proto_find((u_int16_t)pf); 1091 if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) { 1092 DEBUGP("not prepared to track yet or error occured\n"); 1093 return -ret; 1094 } 1095 1096 proto = __nf_ct_proto_find((u_int16_t)pf, protonum); 1097 1098 /* It may be an special packet, error, unclean... 1099 * inverse of the return code tells to the netfilter 1100 * core what to do with the packet. */ 1101 if (proto->error != NULL && 1102 (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { 1103 NF_CT_STAT_INC(error); 1104 NF_CT_STAT_INC(invalid); 1105 return -ret; 1106 } 1107 1108 ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto, 1109 &set_reply, &ctinfo); 1110 if (!ct) { 1111 /* Not valid part of a connection */ 1112 NF_CT_STAT_INC(invalid); 1113 return NF_ACCEPT; 1114 } 1115 1116 if (IS_ERR(ct)) { 1117 /* Too stressed to deal. */ 1118 NF_CT_STAT_INC(drop); 1119 return NF_DROP; 1120 } 1121 1122 NF_CT_ASSERT((*pskb)->nfct); 1123 1124 ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum); 1125 if (ret < 0) { 1126 /* Invalid: inverse of the return code tells 1127 * the netfilter core what to do */ 1128 DEBUGP("nf_conntrack_in: Can't track with proto module\n"); 1129 nf_conntrack_put((*pskb)->nfct); 1130 (*pskb)->nfct = NULL; 1131 NF_CT_STAT_INC(invalid); 1132 return -ret; 1133 } 1134 1135 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 1136 nf_conntrack_event_cache(IPCT_STATUS, *pskb); 1137 1138 return ret; 1139} 1140 1141int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, 1142 const struct nf_conntrack_tuple *orig) 1143{ 1144 return nf_ct_invert_tuple(inverse, orig, 1145 __nf_ct_l3proto_find(orig->src.l3num), 1146 __nf_ct_proto_find(orig->src.l3num, 1147 orig->dst.protonum)); 1148} 1149 1150/* Would two expected things clash? */ 1151static inline int expect_clash(const struct nf_conntrack_expect *a, 1152 const struct nf_conntrack_expect *b) 1153{ 1154 /* Part covered by intersection of masks must be unequal, 1155 otherwise they clash */ 1156 struct nf_conntrack_tuple intersect_mask; 1157 int count; 1158 1159 intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num; 1160 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all; 1161 intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all; 1162 intersect_mask.dst.protonum = a->mask.dst.protonum 1163 & b->mask.dst.protonum; 1164 1165 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ 1166 intersect_mask.src.u3.all[count] = 1167 a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; 1168 } 1169 1170 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ 1171 intersect_mask.dst.u3.all[count] = 1172 a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count]; 1173 } 1174 1175 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); 1176} 1177 1178static inline int expect_matches(const struct nf_conntrack_expect *a, 1179 const struct nf_conntrack_expect *b) 1180{ 1181 return a->master == b->master 1182 && nf_ct_tuple_equal(&a->tuple, &b->tuple) 1183 && nf_ct_tuple_equal(&a->mask, &b->mask); 1184} 1185 1186/* Generally a bad idea to call this: could have matched already. */ 1187void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp) 1188{ 1189 struct nf_conntrack_expect *i; 1190 1191 write_lock_bh(&nf_conntrack_lock); 1192 /* choose the the oldest expectation to evict */ 1193 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { 1194 if (expect_matches(i, exp) && del_timer(&i->timeout)) { 1195 nf_ct_unlink_expect(i); 1196 write_unlock_bh(&nf_conntrack_lock); 1197 nf_conntrack_expect_put(i); 1198 return; 1199 } 1200 } 1201 write_unlock_bh(&nf_conntrack_lock); 1202} 1203 1204/* We don't increase the master conntrack refcount for non-fulfilled 1205 * conntracks. During the conntrack destruction, the expectations are 1206 * always killed before the conntrack itself */ 1207struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me) 1208{ 1209 struct nf_conntrack_expect *new; 1210 1211 new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC); 1212 if (!new) { 1213 DEBUGP("expect_related: OOM allocating expect\n"); 1214 return NULL; 1215 } 1216 new->master = me; 1217 atomic_set(&new->use, 1); 1218 return new; 1219} 1220 1221void nf_conntrack_expect_put(struct nf_conntrack_expect *exp) 1222{ 1223 if (atomic_dec_and_test(&exp->use)) 1224 kmem_cache_free(nf_conntrack_expect_cachep, exp); 1225} 1226 1227static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) 1228{ 1229 struct nf_conn_help *master_help = nfct_help(exp->master); 1230 1231 atomic_inc(&exp->use); 1232 master_help->expecting++; 1233 list_add(&exp->list, &nf_conntrack_expect_list); 1234 1235 init_timer(&exp->timeout); 1236 exp->timeout.data = (unsigned long)exp; 1237 exp->timeout.function = expectation_timed_out; 1238 exp->timeout.expires = jiffies + master_help->helper->timeout * HZ; 1239 add_timer(&exp->timeout); 1240 1241 exp->id = ++nf_conntrack_expect_next_id; 1242 atomic_inc(&exp->use); 1243 NF_CT_STAT_INC(expect_create); 1244} 1245 1246/* Race with expectations being used means we could have none to find; OK. */ 1247static void evict_oldest_expect(struct nf_conn *master) 1248{ 1249 struct nf_conntrack_expect *i; 1250 1251 list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { 1252 if (i->master == master) { 1253 if (del_timer(&i->timeout)) { 1254 nf_ct_unlink_expect(i); 1255 nf_conntrack_expect_put(i); 1256 } 1257 break; 1258 } 1259 } 1260} 1261 1262static inline int refresh_timer(struct nf_conntrack_expect *i) 1263{ 1264 struct nf_conn_help *master_help = nfct_help(i->master); 1265 1266 if (!del_timer(&i->timeout)) 1267 return 0; 1268 1269 i->timeout.expires = jiffies + master_help->helper->timeout*HZ; 1270 add_timer(&i->timeout); 1271 return 1; 1272} 1273 1274int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) 1275{ 1276 struct nf_conntrack_expect *i; 1277 struct nf_conn *master = expect->master; 1278 struct nf_conn_help *master_help = nfct_help(master); 1279 int ret; 1280 1281 NF_CT_ASSERT(master_help); 1282 1283 DEBUGP("nf_conntrack_expect_related %p\n", related_to); 1284 DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple); 1285 DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask); 1286 1287 write_lock_bh(&nf_conntrack_lock); 1288 list_for_each_entry(i, &nf_conntrack_expect_list, list) { 1289 if (expect_matches(i, expect)) { 1290 /* Refresh timer: if it's dying, ignore.. */ 1291 if (refresh_timer(i)) { 1292 ret = 0; 1293 goto out; 1294 } 1295 } else if (expect_clash(i, expect)) { 1296 ret = -EBUSY; 1297 goto out; 1298 } 1299 } 1300 /* Will be over limit? */ 1301 if (master_help->helper->max_expected && 1302 master_help->expecting >= master_help->helper->max_expected) 1303 evict_oldest_expect(master); 1304 1305 nf_conntrack_expect_insert(expect); 1306 nf_conntrack_expect_event(IPEXP_NEW, expect); 1307 ret = 0; 1308out: 1309 write_unlock_bh(&nf_conntrack_lock); 1310 return ret; 1311} 1312 1313int nf_conntrack_helper_register(struct nf_conntrack_helper *me) 1314{ 1315 int ret; 1316 BUG_ON(me->timeout == 0); 1317 1318 ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help", 1319 sizeof(struct nf_conn) 1320 + sizeof(struct nf_conn_help) 1321 + __alignof__(struct nf_conn_help)); 1322 if (ret < 0) { 1323 printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n"); 1324 return ret; 1325 } 1326 write_lock_bh(&nf_conntrack_lock); 1327 list_prepend(&helpers, me); 1328 write_unlock_bh(&nf_conntrack_lock); 1329 1330 return 0; 1331} 1332 1333struct nf_conntrack_helper * 1334__nf_conntrack_helper_find_byname(const char *name) 1335{ 1336 struct nf_conntrack_helper *h; 1337 1338 list_for_each_entry(h, &helpers, list) { 1339 if (!strcmp(h->name, name)) 1340 return h; 1341 } 1342 1343 return NULL; 1344} 1345 1346static inline int unhelp(struct nf_conntrack_tuple_hash *i, 1347 const struct nf_conntrack_helper *me) 1348{ 1349 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); 1350 struct nf_conn_help *help = nfct_help(ct); 1351 1352 if (help && help->helper == me) { 1353 nf_conntrack_event(IPCT_HELPER, ct); 1354 help->helper = NULL; 1355 } 1356 return 0; 1357} 1358 1359void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) 1360{ 1361 unsigned int i; 1362 struct nf_conntrack_expect *exp, *tmp; 1363 1364 /* Need write lock here, to delete helper. */ 1365 write_lock_bh(&nf_conntrack_lock); 1366 LIST_DELETE(&helpers, me); 1367 1368 /* Get rid of expectations */ 1369 list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { 1370 struct nf_conn_help *help = nfct_help(exp->master); 1371 if (help->helper == me && del_timer(&exp->timeout)) { 1372 nf_ct_unlink_expect(exp); 1373 nf_conntrack_expect_put(exp); 1374 } 1375 } 1376 1377 /* Get rid of expecteds, set helpers to NULL. */ 1378 LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me); 1379 for (i = 0; i < nf_conntrack_htable_size; i++) 1380 LIST_FIND_W(&nf_conntrack_hash[i], unhelp, 1381 struct nf_conntrack_tuple_hash *, me); 1382 write_unlock_bh(&nf_conntrack_lock); 1383 1384 /* Someone could be still looking at the helper in a bh. */ 1385 synchronize_net(); 1386} 1387 1388/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ 1389void __nf_ct_refresh_acct(struct nf_conn *ct, 1390 enum ip_conntrack_info ctinfo, 1391 const struct sk_buff *skb, 1392 unsigned long extra_jiffies, 1393 int do_acct) 1394{ 1395 int event = 0; 1396 1397 NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); 1398 NF_CT_ASSERT(skb); 1399 1400 write_lock_bh(&nf_conntrack_lock); 1401 1402 /* Only update if this is not a fixed timeout */ 1403 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) { 1404 write_unlock_bh(&nf_conntrack_lock); 1405 return; 1406 } 1407 1408 /* If not in hash table, timer will not be active yet */ 1409 if (!nf_ct_is_confirmed(ct)) { 1410 ct->timeout.expires = extra_jiffies; 1411 event = IPCT_REFRESH; 1412 } else { 1413 /* Need del_timer for race avoidance (may already be dying). */ 1414 if (del_timer(&ct->timeout)) { 1415 ct->timeout.expires = jiffies + extra_jiffies; 1416 add_timer(&ct->timeout); 1417 event = IPCT_REFRESH; 1418 } 1419 } 1420 1421#ifdef CONFIG_NF_CT_ACCT 1422 if (do_acct) { 1423 ct->counters[CTINFO2DIR(ctinfo)].packets++; 1424 ct->counters[CTINFO2DIR(ctinfo)].bytes += 1425 skb->len - (unsigned int)(skb->nh.raw - skb->data); 1426 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) 1427 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) 1428 event |= IPCT_COUNTER_FILLING; 1429 } 1430#endif 1431 1432 write_unlock_bh(&nf_conntrack_lock); 1433 1434 /* must be unlocked when calling event cache */ 1435 if (event) 1436 nf_conntrack_event_cache(event, skb); 1437} 1438 1439#if defined(CONFIG_NF_CT_NETLINK) || \ 1440 defined(CONFIG_NF_CT_NETLINK_MODULE) 1441 1442#include <linux/netfilter/nfnetlink.h> 1443#include <linux/netfilter/nfnetlink_conntrack.h> 1444#include <linux/mutex.h> 1445 1446 1447/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be 1448 * in ip_conntrack_core, since we don't want the protocols to autoload 1449 * or depend on ctnetlink */ 1450int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb, 1451 const struct nf_conntrack_tuple *tuple) 1452{ 1453 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), 1454 &tuple->src.u.tcp.port); 1455 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), 1456 &tuple->dst.u.tcp.port); 1457 return 0; 1458 1459nfattr_failure: 1460 return -1; 1461} 1462 1463static const size_t cta_min_proto[CTA_PROTO_MAX] = { 1464 [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), 1465 [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t) 1466}; 1467 1468int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[], 1469 struct nf_conntrack_tuple *t) 1470{ 1471 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) 1472 return -EINVAL; 1473 1474 if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) 1475 return -EINVAL; 1476 1477 t->src.u.tcp.port = 1478 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); 1479 t->dst.u.tcp.port = 1480 *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); 1481 1482 return 0; 1483} 1484#endif 1485 1486/* Used by ipt_REJECT and ip6t_REJECT. */ 1487void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) 1488{ 1489 struct nf_conn *ct; 1490 enum ip_conntrack_info ctinfo; 1491 1492 /* This ICMP is in reverse direction to the packet which caused it */ 1493 ct = nf_ct_get(skb, &ctinfo); 1494 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) 1495 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; 1496 else 1497 ctinfo = IP_CT_RELATED; 1498 1499 /* Attach to new skbuff, and increment count */ 1500 nskb->nfct = &ct->ct_general; 1501 nskb->nfctinfo = ctinfo; 1502 nf_conntrack_get(nskb->nfct); 1503} 1504 1505static inline int 1506do_iter(const struct nf_conntrack_tuple_hash *i, 1507 int (*iter)(struct nf_conn *i, void *data), 1508 void *data) 1509{ 1510 return iter(nf_ct_tuplehash_to_ctrack(i), data); 1511} 1512 1513/* Bring out ya dead! */ 1514static struct nf_conntrack_tuple_hash * 1515get_next_corpse(int (*iter)(struct nf_conn *i, void *data), 1516 void *data, unsigned int *bucket) 1517{ 1518 struct nf_conntrack_tuple_hash *h = NULL; 1519 1520 write_lock_bh(&nf_conntrack_lock); 1521 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { 1522 h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter, 1523 struct nf_conntrack_tuple_hash *, iter, data); 1524 if (h) 1525 break; 1526 } 1527 if (!h) 1528 h = LIST_FIND_W(&unconfirmed, do_iter, 1529 struct nf_conntrack_tuple_hash *, iter, data); 1530 if (h) 1531 atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); 1532 write_unlock_bh(&nf_conntrack_lock); 1533 1534 return h; 1535} 1536 1537void 1538nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) 1539{ 1540 struct nf_conntrack_tuple_hash *h; 1541 unsigned int bucket = 0; 1542 1543 while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { 1544 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 1545 /* Time to push up daises... */ 1546 if (del_timer(&ct->timeout)) 1547 death_by_timeout((unsigned long)ct); 1548 /* ... else the timer will get him soon. */ 1549 1550 nf_ct_put(ct); 1551 } 1552} 1553 1554static int kill_all(struct nf_conn *i, void *data) 1555{ 1556 return 1; 1557} 1558 1559static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size) 1560{ 1561 if (vmalloced) 1562 vfree(hash); 1563 else 1564 free_pages((unsigned long)hash, 1565 get_order(sizeof(struct list_head) * size)); 1566} 1567 1568void nf_conntrack_flush() 1569{ 1570 nf_ct_iterate_cleanup(kill_all, NULL); 1571} 1572 1573/* Mishearing the voices in his head, our hero wonders how he's 1574 supposed to kill the mall. */ 1575void nf_conntrack_cleanup(void) 1576{ 1577 int i; 1578 1579 ip_ct_attach = NULL; 1580 1581 /* This makes sure all current packets have passed through 1582 netfilter framework. Roll on, two-stage module 1583 delete... */ 1584 synchronize_net(); 1585 1586 nf_ct_event_cache_flush(); 1587 i_see_dead_people: 1588 nf_conntrack_flush(); 1589 if (atomic_read(&nf_conntrack_count) != 0) { 1590 schedule(); 1591 goto i_see_dead_people; 1592 } 1593 /* wait until all references to nf_conntrack_untracked are dropped */ 1594 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) 1595 schedule(); 1596 1597 for (i = 0; i < NF_CT_F_NUM; i++) { 1598 if (nf_ct_cache[i].use == 0) 1599 continue; 1600 1601 NF_CT_ASSERT(nf_ct_cache[i].use == 1); 1602 nf_ct_cache[i].use = 1; 1603 nf_conntrack_unregister_cache(i); 1604 } 1605 kmem_cache_destroy(nf_conntrack_expect_cachep); 1606 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, 1607 nf_conntrack_htable_size); 1608 1609 /* free l3proto protocol tables */ 1610 for (i = 0; i < PF_MAX; i++) 1611 if (nf_ct_protos[i]) { 1612 kfree(nf_ct_protos[i]); 1613 nf_ct_protos[i] = NULL; 1614 } 1615} 1616 1617static struct list_head *alloc_hashtable(int size, int *vmalloced) 1618{ 1619 struct list_head *hash; 1620 unsigned int i; 1621 1622 *vmalloced = 0; 1623 hash = (void*)__get_free_pages(GFP_KERNEL, 1624 get_order(sizeof(struct list_head) 1625 * size)); 1626 if (!hash) { 1627 *vmalloced = 1; 1628 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1629 hash = vmalloc(sizeof(struct list_head) * size); 1630 } 1631 1632 if (hash) 1633 for (i = 0; i < size; i++) 1634 INIT_LIST_HEAD(&hash[i]); 1635 1636 return hash; 1637} 1638 1639int set_hashsize(const char *val, struct kernel_param *kp) 1640{ 1641 int i, bucket, hashsize, vmalloced; 1642 int old_vmalloced, old_size; 1643 int rnd; 1644 struct list_head *hash, *old_hash; 1645 struct nf_conntrack_tuple_hash *h; 1646 1647 /* On boot, we can set this without any fancy locking. */ 1648 if (!nf_conntrack_htable_size) 1649 return param_set_uint(val, kp); 1650 1651 hashsize = simple_strtol(val, NULL, 0); 1652 if (!hashsize) 1653 return -EINVAL; 1654 1655 hash = alloc_hashtable(hashsize, &vmalloced); 1656 if (!hash) 1657 return -ENOMEM; 1658 1659 /* We have to rehahs for the new table anyway, so we also can 1660 * use a newrandom seed */ 1661 get_random_bytes(&rnd, 4); 1662 1663 write_lock_bh(&nf_conntrack_lock); 1664 for (i = 0; i < nf_conntrack_htable_size; i++) { 1665 while (!list_empty(&nf_conntrack_hash[i])) { 1666 h = list_entry(nf_conntrack_hash[i].next, 1667 struct nf_conntrack_tuple_hash, list); 1668 list_del(&h->list); 1669 bucket = __hash_conntrack(&h->tuple, hashsize, rnd); 1670 list_add_tail(&h->list, &hash[bucket]); 1671 } 1672 } 1673 old_size = nf_conntrack_htable_size; 1674 old_vmalloced = nf_conntrack_vmalloc; 1675 old_hash = nf_conntrack_hash; 1676 1677 nf_conntrack_htable_size = hashsize; 1678 nf_conntrack_vmalloc = vmalloced; 1679 nf_conntrack_hash = hash; 1680 nf_conntrack_hash_rnd = rnd; 1681 write_unlock_bh(&nf_conntrack_lock); 1682 1683 free_conntrack_hash(old_hash, old_vmalloced, old_size); 1684 return 0; 1685} 1686 1687module_param_call(hashsize, set_hashsize, param_get_uint, 1688 &nf_conntrack_htable_size, 0600); 1689 1690int __init nf_conntrack_init(void) 1691{ 1692 unsigned int i; 1693 int ret; 1694 1695 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1696 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ 1697 if (!nf_conntrack_htable_size) { 1698 nf_conntrack_htable_size 1699 = (((num_physpages << PAGE_SHIFT) / 16384) 1700 / sizeof(struct list_head)); 1701 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) 1702 nf_conntrack_htable_size = 8192; 1703 if (nf_conntrack_htable_size < 16) 1704 nf_conntrack_htable_size = 16; 1705 } 1706 nf_conntrack_max = 8 * nf_conntrack_htable_size; 1707 1708 printk("nf_conntrack version %s (%u buckets, %d max)\n", 1709 NF_CONNTRACK_VERSION, nf_conntrack_htable_size, 1710 nf_conntrack_max); 1711 1712 nf_conntrack_hash = alloc_hashtable(nf_conntrack_htable_size, 1713 &nf_conntrack_vmalloc); 1714 if (!nf_conntrack_hash) { 1715 printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); 1716 goto err_out; 1717 } 1718 1719 ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic", 1720 sizeof(struct nf_conn)); 1721 if (ret < 0) { 1722 printk(KERN_ERR "Unable to create nf_conn slab cache\n"); 1723 goto err_free_hash; 1724 } 1725 1726 nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect", 1727 sizeof(struct nf_conntrack_expect), 1728 0, 0, NULL, NULL); 1729 if (!nf_conntrack_expect_cachep) { 1730 printk(KERN_ERR "Unable to create nf_expect slab cache\n"); 1731 goto err_free_conntrack_slab; 1732 } 1733 1734 /* Don't NEED lock here, but good form anyway. */ 1735 write_lock_bh(&nf_conntrack_lock); 1736 for (i = 0; i < PF_MAX; i++) 1737 nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto; 1738 write_unlock_bh(&nf_conntrack_lock); 1739 1740 /* For use by REJECT target */ 1741 ip_ct_attach = __nf_conntrack_attach; 1742 1743 /* Set up fake conntrack: 1744 - to never be deleted, not in any hashes */ 1745 atomic_set(&nf_conntrack_untracked.ct_general.use, 1); 1746 /* - and look it like as a confirmed connection */ 1747 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); 1748 1749 return ret; 1750 1751err_free_conntrack_slab: 1752 nf_conntrack_unregister_cache(NF_CT_F_BASIC); 1753err_free_hash: 1754 free_conntrack_hash(nf_conntrack_hash, nf_conntrack_vmalloc, 1755 nf_conntrack_htable_size); 1756err_out: 1757 return -ENOMEM; 1758} 1759