1/* 2 * (C) 1999-2001 Paul `Rusty' Russell 3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2011 Patrick McHardy <kaber@trash.net> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/module.h> 12#include <linux/types.h> 13#include <linux/timer.h> 14#include <linux/skbuff.h> 15#include <linux/gfp.h> 16#include <net/xfrm.h> 17#include <linux/jhash.h> 18#include <linux/rtnetlink.h> 19 20#include <net/netfilter/nf_conntrack.h> 21#include <net/netfilter/nf_conntrack_core.h> 22#include <net/netfilter/nf_nat.h> 23#include <net/netfilter/nf_nat_l3proto.h> 24#include <net/netfilter/nf_nat_l4proto.h> 25#include <net/netfilter/nf_nat_core.h> 26#include <net/netfilter/nf_nat_helper.h> 27#include <net/netfilter/nf_conntrack_helper.h> 28#include <net/netfilter/nf_conntrack_l3proto.h> 29#include <net/netfilter/nf_conntrack_zones.h> 30#include <linux/netfilter/nf_nat.h> 31 32static DEFINE_SPINLOCK(nf_nat_lock); 33 34static DEFINE_MUTEX(nf_nat_proto_mutex); 35static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] 36 __read_mostly; 37static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] 38 __read_mostly; 39 40 41inline const struct nf_nat_l3proto * 42__nf_nat_l3proto_find(u8 family) 43{ 44 return rcu_dereference(nf_nat_l3protos[family]); 45} 46 47inline const struct nf_nat_l4proto * 48__nf_nat_l4proto_find(u8 family, u8 protonum) 49{ 50 return rcu_dereference(nf_nat_l4protos[family][protonum]); 51} 52EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find); 53 54#ifdef CONFIG_XFRM 55static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl) 56{ 57 const struct nf_nat_l3proto *l3proto; 58 const struct nf_conn *ct; 59 enum ip_conntrack_info ctinfo; 60 enum ip_conntrack_dir dir; 61 unsigned long statusbit; 62 u8 family; 63 64 ct = nf_ct_get(skb, &ctinfo); 65 if (ct == NULL) 66 return; 67 68 family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; 69 rcu_read_lock(); 70 l3proto = __nf_nat_l3proto_find(family); 71 if (l3proto == NULL) 72 goto out; 73 74 dir = CTINFO2DIR(ctinfo); 75 if (dir == IP_CT_DIR_ORIGINAL) 76 statusbit = IPS_DST_NAT; 77 else 78 statusbit = IPS_SRC_NAT; 79 80 l3proto->decode_session(skb, ct, dir, statusbit, fl); 81out: 82 rcu_read_unlock(); 83} 84 85int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family) 86{ 87 struct flowi fl; 88 unsigned int hh_len; 89 struct dst_entry *dst; 90 int err; 91 92 err = xfrm_decode_session(skb, &fl, family); 93 if (err < 0) 94 return err; 95 96 dst = skb_dst(skb); 97 if (dst->xfrm) 98 dst = ((struct xfrm_dst *)dst)->route; 99 dst_hold(dst); 100 101 dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); 102 if (IS_ERR(dst)) 103 return PTR_ERR(dst); 104 105 skb_dst_drop(skb); 106 skb_dst_set(skb, dst); 107 108 /* Change in oif may mean change in hh_len. */ 109 hh_len = skb_dst(skb)->dev->hard_header_len; 110 if (skb_headroom(skb) < hh_len && 111 pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) 112 return -ENOMEM; 113 return 0; 114} 115EXPORT_SYMBOL(nf_xfrm_me_harder); 116#endif /* CONFIG_XFRM */ 117 118/* We keep an extra hash for each conntrack, for fast searching. */ 119static inline unsigned int 120hash_by_src(const struct net *net, u16 zone, 121 const struct nf_conntrack_tuple *tuple) 122{ 123 unsigned int hash; 124 125 /* Original src, to ensure we map it consistently if poss. */ 126 hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), 127 tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd); 128 return ((u64)hash * net->ct.nat_htable_size) >> 32; 129} 130 131/* Is this tuple already taken? (not by us) */ 132int 133nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, 134 const struct nf_conn *ignored_conntrack) 135{ 136 /* Conntrack tracking doesn't keep track of outgoing tuples; only 137 * incoming ones. NAT means they don't have a fixed mapping, 138 * so we invert the tuple and look for the incoming reply. 139 * 140 * We could keep a separate hash if this proves too slow. 141 */ 142 struct nf_conntrack_tuple reply; 143 144 nf_ct_invert_tuplepr(&reply, tuple); 145 return nf_conntrack_tuple_taken(&reply, ignored_conntrack); 146} 147EXPORT_SYMBOL(nf_nat_used_tuple); 148 149/* If we source map this tuple so reply looks like reply_tuple, will 150 * that meet the constraints of range. 151 */ 152static int in_range(const struct nf_nat_l3proto *l3proto, 153 const struct nf_nat_l4proto *l4proto, 154 const struct nf_conntrack_tuple *tuple, 155 const struct nf_nat_range *range) 156{ 157 /* If we are supposed to map IPs, then we must be in the 158 * range specified, otherwise let this drag us onto a new src IP. 159 */ 160 if (range->flags & NF_NAT_RANGE_MAP_IPS && 161 !l3proto->in_range(tuple, range)) 162 return 0; 163 164 if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || 165 l4proto->in_range(tuple, NF_NAT_MANIP_SRC, 166 &range->min_proto, &range->max_proto)) 167 return 1; 168 169 return 0; 170} 171 172static inline int 173same_src(const struct nf_conn *ct, 174 const struct nf_conntrack_tuple *tuple) 175{ 176 const struct nf_conntrack_tuple *t; 177 178 t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 179 return (t->dst.protonum == tuple->dst.protonum && 180 nf_inet_addr_cmp(&t->src.u3, &tuple->src.u3) && 181 t->src.u.all == tuple->src.u.all); 182} 183 184/* Only called for SRC manip */ 185static int 186find_appropriate_src(struct net *net, u16 zone, 187 const struct nf_nat_l3proto *l3proto, 188 const struct nf_nat_l4proto *l4proto, 189 const struct nf_conntrack_tuple *tuple, 190 struct nf_conntrack_tuple *result, 191 const struct nf_nat_range *range) 192{ 193 unsigned int h = hash_by_src(net, zone, tuple); 194 const struct nf_conn_nat *nat; 195 const struct nf_conn *ct; 196 197 hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) { 198 ct = nat->ct; 199 if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) { 200 /* Copy source part from reply tuple. */ 201 nf_ct_invert_tuplepr(result, 202 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 203 result->dst = tuple->dst; 204 205 if (in_range(l3proto, l4proto, result, range)) 206 return 1; 207 } 208 } 209 return 0; 210} 211 212/* For [FUTURE] fragmentation handling, we want the least-used 213 * src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus 214 * if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports 215 * 1-65535, we don't do pro-rata allocation based on ports; we choose 216 * the ip with the lowest src-ip/dst-ip/proto usage. 217 */ 218static void 219find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, 220 const struct nf_nat_range *range, 221 const struct nf_conn *ct, 222 enum nf_nat_manip_type maniptype) 223{ 224 union nf_inet_addr *var_ipp; 225 unsigned int i, max; 226 /* Host order */ 227 u32 minip, maxip, j, dist; 228 bool full_range; 229 230 /* No IP mapping? Do nothing. */ 231 if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) 232 return; 233 234 if (maniptype == NF_NAT_MANIP_SRC) 235 var_ipp = &tuple->src.u3; 236 else 237 var_ipp = &tuple->dst.u3; 238 239 /* Fast path: only one choice. */ 240 if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) { 241 *var_ipp = range->min_addr; 242 return; 243 } 244 245 if (nf_ct_l3num(ct) == NFPROTO_IPV4) 246 max = sizeof(var_ipp->ip) / sizeof(u32) - 1; 247 else 248 max = sizeof(var_ipp->ip6) / sizeof(u32) - 1; 249 250 /* Hashing source and destination IPs gives a fairly even 251 * spread in practice (if there are a small number of IPs 252 * involved, there usually aren't that many connections 253 * anyway). The consistency means that servers see the same 254 * client coming from the same IP (some Internet Banking sites 255 * like this), even across reboots. 256 */ 257 j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32), 258 range->flags & NF_NAT_RANGE_PERSISTENT ? 259 0 : (__force u32)tuple->dst.u3.all[max] ^ zone); 260 261 full_range = false; 262 for (i = 0; i <= max; i++) { 263 /* If first bytes of the address are at the maximum, use the 264 * distance. Otherwise use the full range. 265 */ 266 if (!full_range) { 267 minip = ntohl((__force __be32)range->min_addr.all[i]); 268 maxip = ntohl((__force __be32)range->max_addr.all[i]); 269 dist = maxip - minip + 1; 270 } else { 271 minip = 0; 272 dist = ~0; 273 } 274 275 var_ipp->all[i] = (__force __u32) 276 htonl(minip + (((u64)j * dist) >> 32)); 277 if (var_ipp->all[i] != range->max_addr.all[i]) 278 full_range = true; 279 280 if (!(range->flags & NF_NAT_RANGE_PERSISTENT)) 281 j ^= (__force u32)tuple->dst.u3.all[i]; 282 } 283} 284 285/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING, 286 * we change the source to map into the range. For NF_INET_PRE_ROUTING 287 * and NF_INET_LOCAL_OUT, we change the destination to map into the 288 * range. It might not be possible to get a unique tuple, but we try. 289 * At worst (or if we race), we will end up with a final duplicate in 290 * __ip_conntrack_confirm and drop the packet. */ 291static void 292get_unique_tuple(struct nf_conntrack_tuple *tuple, 293 const struct nf_conntrack_tuple *orig_tuple, 294 const struct nf_nat_range *range, 295 struct nf_conn *ct, 296 enum nf_nat_manip_type maniptype) 297{ 298 const struct nf_nat_l3proto *l3proto; 299 const struct nf_nat_l4proto *l4proto; 300 struct net *net = nf_ct_net(ct); 301 u16 zone = nf_ct_zone(ct); 302 303 rcu_read_lock(); 304 l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num); 305 l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num, 306 orig_tuple->dst.protonum); 307 308 /* 1) If this srcip/proto/src-proto-part is currently mapped, 309 * and that same mapping gives a unique tuple within the given 310 * range, use that. 311 * 312 * This is only required for source (ie. NAT/masq) mappings. 313 * So far, we don't do local source mappings, so multiple 314 * manips not an issue. 315 */ 316 if (maniptype == NF_NAT_MANIP_SRC && 317 !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { 318 /* try the original tuple first */ 319 if (in_range(l3proto, l4proto, orig_tuple, range)) { 320 if (!nf_nat_used_tuple(orig_tuple, ct)) { 321 *tuple = *orig_tuple; 322 goto out; 323 } 324 } else if (find_appropriate_src(net, zone, l3proto, l4proto, 325 orig_tuple, tuple, range)) { 326 pr_debug("get_unique_tuple: Found current src map\n"); 327 if (!nf_nat_used_tuple(tuple, ct)) 328 goto out; 329 } 330 } 331 332 /* 2) Select the least-used IP/proto combination in the given range */ 333 *tuple = *orig_tuple; 334 find_best_ips_proto(zone, tuple, range, ct, maniptype); 335 336 /* 3) The per-protocol part of the manip is made to map into 337 * the range to make a unique tuple. 338 */ 339 340 /* Only bother mapping if it's not already in range and unique */ 341 if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { 342 if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { 343 if (l4proto->in_range(tuple, maniptype, 344 &range->min_proto, 345 &range->max_proto) && 346 (range->min_proto.all == range->max_proto.all || 347 !nf_nat_used_tuple(tuple, ct))) 348 goto out; 349 } else if (!nf_nat_used_tuple(tuple, ct)) { 350 goto out; 351 } 352 } 353 354 /* Last change: get protocol to try to obtain unique tuple. */ 355 l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct); 356out: 357 rcu_read_unlock(); 358} 359 360unsigned int 361nf_nat_setup_info(struct nf_conn *ct, 362 const struct nf_nat_range *range, 363 enum nf_nat_manip_type maniptype) 364{ 365 struct net *net = nf_ct_net(ct); 366 struct nf_conntrack_tuple curr_tuple, new_tuple; 367 struct nf_conn_nat *nat; 368 369 /* nat helper or nfctnetlink also setup binding */ 370 nat = nfct_nat(ct); 371 if (!nat) { 372 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); 373 if (nat == NULL) { 374 pr_debug("failed to add NAT extension\n"); 375 return NF_ACCEPT; 376 } 377 } 378 379 NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || 380 maniptype == NF_NAT_MANIP_DST); 381 BUG_ON(nf_nat_initialized(ct, maniptype)); 382 383 /* What we've got will look like inverse of reply. Normally 384 * this is what is in the conntrack, except for prior 385 * manipulations (future optimization: if num_manips == 0, 386 * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) 387 */ 388 nf_ct_invert_tuplepr(&curr_tuple, 389 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 390 391 get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); 392 393 if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { 394 struct nf_conntrack_tuple reply; 395 396 /* Alter conntrack table so will recognize replies. */ 397 nf_ct_invert_tuplepr(&reply, &new_tuple); 398 nf_conntrack_alter_reply(ct, &reply); 399 400 /* Non-atomic: we own this at the moment. */ 401 if (maniptype == NF_NAT_MANIP_SRC) 402 ct->status |= IPS_SRC_NAT; 403 else 404 ct->status |= IPS_DST_NAT; 405 } 406 407 if (maniptype == NF_NAT_MANIP_SRC) { 408 unsigned int srchash; 409 410 srchash = hash_by_src(net, nf_ct_zone(ct), 411 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 412 spin_lock_bh(&nf_nat_lock); 413 /* nf_conntrack_alter_reply might re-allocate extension aera */ 414 nat = nfct_nat(ct); 415 nat->ct = ct; 416 hlist_add_head_rcu(&nat->bysource, 417 &net->ct.nat_bysource[srchash]); 418 spin_unlock_bh(&nf_nat_lock); 419 } 420 421 /* It's done. */ 422 if (maniptype == NF_NAT_MANIP_DST) 423 ct->status |= IPS_DST_NAT_DONE; 424 else 425 ct->status |= IPS_SRC_NAT_DONE; 426 427 return NF_ACCEPT; 428} 429EXPORT_SYMBOL(nf_nat_setup_info); 430 431/* Do packet manipulations according to nf_nat_setup_info. */ 432unsigned int nf_nat_packet(struct nf_conn *ct, 433 enum ip_conntrack_info ctinfo, 434 unsigned int hooknum, 435 struct sk_buff *skb) 436{ 437 const struct nf_nat_l3proto *l3proto; 438 const struct nf_nat_l4proto *l4proto; 439 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 440 unsigned long statusbit; 441 enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); 442 443 if (mtype == NF_NAT_MANIP_SRC) 444 statusbit = IPS_SRC_NAT; 445 else 446 statusbit = IPS_DST_NAT; 447 448 /* Invert if this is reply dir. */ 449 if (dir == IP_CT_DIR_REPLY) 450 statusbit ^= IPS_NAT_MASK; 451 452 /* Non-atomic: these bits don't change. */ 453 if (ct->status & statusbit) { 454 struct nf_conntrack_tuple target; 455 456 /* We are aiming to look like inverse of other direction. */ 457 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); 458 459 l3proto = __nf_nat_l3proto_find(target.src.l3num); 460 l4proto = __nf_nat_l4proto_find(target.src.l3num, 461 target.dst.protonum); 462 if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) 463 return NF_DROP; 464 } 465 return NF_ACCEPT; 466} 467EXPORT_SYMBOL_GPL(nf_nat_packet); 468 469struct nf_nat_proto_clean { 470 u8 l3proto; 471 u8 l4proto; 472}; 473 474/* kill conntracks with affected NAT section */ 475static int nf_nat_proto_remove(struct nf_conn *i, void *data) 476{ 477 const struct nf_nat_proto_clean *clean = data; 478 struct nf_conn_nat *nat = nfct_nat(i); 479 480 if (!nat) 481 return 0; 482 483 if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || 484 (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) 485 return 0; 486 487 return i->status & IPS_NAT_MASK ? 1 : 0; 488} 489 490static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) 491{ 492 struct nf_nat_proto_clean clean = { 493 .l3proto = l3proto, 494 .l4proto = l4proto, 495 }; 496 struct net *net; 497 498 rtnl_lock(); 499 for_each_net(net) 500 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); 501 rtnl_unlock(); 502} 503 504static void nf_nat_l3proto_clean(u8 l3proto) 505{ 506 struct nf_nat_proto_clean clean = { 507 .l3proto = l3proto, 508 }; 509 struct net *net; 510 511 rtnl_lock(); 512 513 for_each_net(net) 514 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean); 515 rtnl_unlock(); 516} 517 518/* Protocol registration. */ 519int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto) 520{ 521 const struct nf_nat_l4proto **l4protos; 522 unsigned int i; 523 int ret = 0; 524 525 mutex_lock(&nf_nat_proto_mutex); 526 if (nf_nat_l4protos[l3proto] == NULL) { 527 l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *), 528 GFP_KERNEL); 529 if (l4protos == NULL) { 530 ret = -ENOMEM; 531 goto out; 532 } 533 534 for (i = 0; i < IPPROTO_MAX; i++) 535 RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown); 536 537 /* Before making proto_array visible to lockless readers, 538 * we must make sure its content is committed to memory. 539 */ 540 smp_wmb(); 541 542 nf_nat_l4protos[l3proto] = l4protos; 543 } 544 545 if (rcu_dereference_protected( 546 nf_nat_l4protos[l3proto][l4proto->l4proto], 547 lockdep_is_held(&nf_nat_proto_mutex) 548 ) != &nf_nat_l4proto_unknown) { 549 ret = -EBUSY; 550 goto out; 551 } 552 RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto); 553 out: 554 mutex_unlock(&nf_nat_proto_mutex); 555 return ret; 556} 557EXPORT_SYMBOL_GPL(nf_nat_l4proto_register); 558 559/* No one stores the protocol anywhere; simply delete it. */ 560void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto) 561{ 562 mutex_lock(&nf_nat_proto_mutex); 563 RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], 564 &nf_nat_l4proto_unknown); 565 mutex_unlock(&nf_nat_proto_mutex); 566 synchronize_rcu(); 567 568 nf_nat_l4proto_clean(l3proto, l4proto->l4proto); 569} 570EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister); 571 572int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) 573{ 574 int err; 575 576 err = nf_ct_l3proto_try_module_get(l3proto->l3proto); 577 if (err < 0) 578 return err; 579 580 mutex_lock(&nf_nat_proto_mutex); 581 RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP], 582 &nf_nat_l4proto_tcp); 583 RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP], 584 &nf_nat_l4proto_udp); 585 mutex_unlock(&nf_nat_proto_mutex); 586 587 RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto); 588 return 0; 589} 590EXPORT_SYMBOL_GPL(nf_nat_l3proto_register); 591 592void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto) 593{ 594 mutex_lock(&nf_nat_proto_mutex); 595 RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], NULL); 596 mutex_unlock(&nf_nat_proto_mutex); 597 synchronize_rcu(); 598 599 nf_nat_l3proto_clean(l3proto->l3proto); 600 nf_ct_l3proto_module_put(l3proto->l3proto); 601} 602EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister); 603 604/* No one using conntrack by the time this called. */ 605static void nf_nat_cleanup_conntrack(struct nf_conn *ct) 606{ 607 struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); 608 609 if (nat == NULL || nat->ct == NULL) 610 return; 611 612 NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); 613 614 spin_lock_bh(&nf_nat_lock); 615 hlist_del_rcu(&nat->bysource); 616 spin_unlock_bh(&nf_nat_lock); 617} 618 619static void nf_nat_move_storage(void *new, void *old) 620{ 621 struct nf_conn_nat *new_nat = new; 622 struct nf_conn_nat *old_nat = old; 623 struct nf_conn *ct = old_nat->ct; 624 625 if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) 626 return; 627 628 spin_lock_bh(&nf_nat_lock); 629 hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); 630 spin_unlock_bh(&nf_nat_lock); 631} 632 633static struct nf_ct_ext_type nat_extend __read_mostly = { 634 .len = sizeof(struct nf_conn_nat), 635 .align = __alignof__(struct nf_conn_nat), 636 .destroy = nf_nat_cleanup_conntrack, 637 .move = nf_nat_move_storage, 638 .id = NF_CT_EXT_NAT, 639 .flags = NF_CT_EXT_F_PREALLOC, 640}; 641 642#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 643 644#include <linux/netfilter/nfnetlink.h> 645#include <linux/netfilter/nfnetlink_conntrack.h> 646 647static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { 648 [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, 649 [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, 650}; 651 652static int nfnetlink_parse_nat_proto(struct nlattr *attr, 653 const struct nf_conn *ct, 654 struct nf_nat_range *range) 655{ 656 struct nlattr *tb[CTA_PROTONAT_MAX+1]; 657 const struct nf_nat_l4proto *l4proto; 658 int err; 659 660 err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); 661 if (err < 0) 662 return err; 663 664 l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 665 if (l4proto->nlattr_to_range) 666 err = l4proto->nlattr_to_range(tb, range); 667 668 return err; 669} 670 671static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { 672 [CTA_NAT_V4_MINIP] = { .type = NLA_U32 }, 673 [CTA_NAT_V4_MAXIP] = { .type = NLA_U32 }, 674 [CTA_NAT_V6_MINIP] = { .len = sizeof(struct in6_addr) }, 675 [CTA_NAT_V6_MAXIP] = { .len = sizeof(struct in6_addr) }, 676 [CTA_NAT_PROTO] = { .type = NLA_NESTED }, 677}; 678 679static int 680nfnetlink_parse_nat(const struct nlattr *nat, 681 const struct nf_conn *ct, struct nf_nat_range *range) 682{ 683 const struct nf_nat_l3proto *l3proto; 684 struct nlattr *tb[CTA_NAT_MAX+1]; 685 int err; 686 687 memset(range, 0, sizeof(*range)); 688 689 err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); 690 if (err < 0) 691 return err; 692 693 rcu_read_lock(); 694 l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); 695 if (l3proto == NULL) { 696 err = -EAGAIN; 697 goto out; 698 } 699 err = l3proto->nlattr_to_range(tb, range); 700 if (err < 0) 701 goto out; 702 703 if (!tb[CTA_NAT_PROTO]) 704 goto out; 705 706 err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); 707out: 708 rcu_read_unlock(); 709 return err; 710} 711 712static int 713nfnetlink_parse_nat_setup(struct nf_conn *ct, 714 enum nf_nat_manip_type manip, 715 const struct nlattr *attr) 716{ 717 struct nf_nat_range range; 718 int err; 719 720 err = nfnetlink_parse_nat(attr, ct, &range); 721 if (err < 0) 722 return err; 723 if (nf_nat_initialized(ct, manip)) 724 return -EEXIST; 725 726 return nf_nat_setup_info(ct, &range, manip); 727} 728#else 729static int 730nfnetlink_parse_nat_setup(struct nf_conn *ct, 731 enum nf_nat_manip_type manip, 732 const struct nlattr *attr) 733{ 734 return -EOPNOTSUPP; 735} 736#endif 737 738static int __net_init nf_nat_net_init(struct net *net) 739{ 740 /* Leave them the same for the moment. */ 741 net->ct.nat_htable_size = net->ct.htable_size; 742 net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0); 743 if (!net->ct.nat_bysource) 744 return -ENOMEM; 745 return 0; 746} 747 748static void __net_exit nf_nat_net_exit(struct net *net) 749{ 750 struct nf_nat_proto_clean clean = {}; 751 752 nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean); 753 synchronize_rcu(); 754 nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); 755} 756 757static struct pernet_operations nf_nat_net_ops = { 758 .init = nf_nat_net_init, 759 .exit = nf_nat_net_exit, 760}; 761 762static struct nf_ct_helper_expectfn follow_master_nat = { 763 .name = "nat-follow-master", 764 .expectfn = nf_nat_follow_master, 765}; 766 767static struct nfq_ct_nat_hook nfq_ct_nat = { 768 .seq_adjust = nf_nat_tcp_seq_adjust, 769}; 770 771static int __init nf_nat_init(void) 772{ 773 int ret; 774 775 ret = nf_ct_extend_register(&nat_extend); 776 if (ret < 0) { 777 printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); 778 return ret; 779 } 780 781 ret = register_pernet_subsys(&nf_nat_net_ops); 782 if (ret < 0) 783 goto cleanup_extend; 784 785 nf_ct_helper_expectfn_register(&follow_master_nat); 786 787 /* Initialize fake conntrack so that NAT will skip it */ 788 nf_ct_untracked_status_or(IPS_NAT_DONE_MASK); 789 790 BUG_ON(nf_nat_seq_adjust_hook != NULL); 791 RCU_INIT_POINTER(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); 792 BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); 793 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, 794 nfnetlink_parse_nat_setup); 795 BUG_ON(nf_ct_nat_offset != NULL); 796 RCU_INIT_POINTER(nf_ct_nat_offset, nf_nat_get_offset); 797 RCU_INIT_POINTER(nfq_ct_nat_hook, &nfq_ct_nat); 798#ifdef CONFIG_XFRM 799 BUG_ON(nf_nat_decode_session_hook != NULL); 800 RCU_INIT_POINTER(nf_nat_decode_session_hook, __nf_nat_decode_session); 801#endif 802 return 0; 803 804 cleanup_extend: 805 nf_ct_extend_unregister(&nat_extend); 806 return ret; 807} 808 809static void __exit nf_nat_cleanup(void) 810{ 811 unsigned int i; 812 813 unregister_pernet_subsys(&nf_nat_net_ops); 814 nf_ct_extend_unregister(&nat_extend); 815 nf_ct_helper_expectfn_unregister(&follow_master_nat); 816 RCU_INIT_POINTER(nf_nat_seq_adjust_hook, NULL); 817 RCU_INIT_POINTER(nfnetlink_parse_nat_setup_hook, NULL); 818 RCU_INIT_POINTER(nf_ct_nat_offset, NULL); 819 RCU_INIT_POINTER(nfq_ct_nat_hook, NULL); 820#ifdef CONFIG_XFRM 821 RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL); 822#endif 823 for (i = 0; i < NFPROTO_NUMPROTO; i++) 824 kfree(nf_nat_l4protos[i]); 825 synchronize_net(); 826} 827 828MODULE_LICENSE("GPL"); 829 830module_init(nf_nat_init); 831module_exit(nf_nat_cleanup); 832