1/* 2 * (C) 1999-2001 Paul `Rusty' Russell 3 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2011 Patrick McHardy <kaber@trash.net> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/types.h> 12#include <linux/module.h> 13#include <linux/skbuff.h> 14#include <linux/ip.h> 15#include <linux/icmp.h> 16#include <linux/netfilter.h> 17#include <linux/netfilter_ipv4.h> 18#include <net/secure_seq.h> 19#include <net/checksum.h> 20#include <net/route.h> 21#include <net/ip.h> 22 23#include <net/netfilter/nf_conntrack_core.h> 24#include <net/netfilter/nf_conntrack.h> 25#include <net/netfilter/nf_nat_core.h> 26#include <net/netfilter/nf_nat_l3proto.h> 27#include <net/netfilter/nf_nat_l4proto.h> 28 29static const struct nf_nat_l3proto nf_nat_l3proto_ipv4; 30 31#ifdef CONFIG_XFRM 32static void nf_nat_ipv4_decode_session(struct sk_buff *skb, 33 const struct nf_conn *ct, 34 enum ip_conntrack_dir dir, 35 unsigned long statusbit, 36 struct flowi *fl) 37{ 38 const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; 39 struct flowi4 *fl4 = &fl->u.ip4; 40 41 if (ct->status & statusbit) { 42 fl4->daddr = t->dst.u3.ip; 43 if (t->dst.protonum == IPPROTO_TCP || 44 t->dst.protonum == IPPROTO_UDP || 45 t->dst.protonum == IPPROTO_UDPLITE || 46 t->dst.protonum == IPPROTO_DCCP || 47 t->dst.protonum == IPPROTO_SCTP) 48 fl4->fl4_dport = t->dst.u.all; 49 } 50 51 statusbit ^= IPS_NAT_MASK; 52 53 if (ct->status & statusbit) { 54 fl4->saddr = t->src.u3.ip; 55 if (t->dst.protonum == IPPROTO_TCP || 56 t->dst.protonum == IPPROTO_UDP || 57 t->dst.protonum == IPPROTO_UDPLITE || 58 t->dst.protonum == IPPROTO_DCCP || 59 t->dst.protonum == IPPROTO_SCTP) 60 fl4->fl4_sport = t->src.u.all; 61 } 62} 63#endif /* CONFIG_XFRM */ 64 65static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t, 66 const struct nf_nat_range *range) 67{ 68 return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && 69 ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); 70} 71 72static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t, 73 __be16 dport) 74{ 75 return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport); 76} 77 78static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, 79 unsigned int iphdroff, 80 const struct nf_nat_l4proto *l4proto, 81 const struct nf_conntrack_tuple *target, 82 enum nf_nat_manip_type maniptype) 83{ 84 struct iphdr *iph; 85 unsigned int hdroff; 86 87 if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) 88 return false; 89 90 iph = (void *)skb->data + iphdroff; 91 hdroff = iphdroff + iph->ihl * 4; 92 93 if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff, 94 target, maniptype)) 95 return false; 96 iph = (void *)skb->data + iphdroff; 97 98 if (maniptype == NF_NAT_MANIP_SRC) { 99 csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); 100 iph->saddr = target->src.u3.ip; 101 } else { 102 csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); 103 iph->daddr = target->dst.u3.ip; 104 } 105 return true; 106} 107 108static void nf_nat_ipv4_csum_update(struct sk_buff *skb, 109 unsigned int iphdroff, __sum16 *check, 110 const struct nf_conntrack_tuple *t, 111 enum nf_nat_manip_type maniptype) 112{ 113 struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); 114 __be32 oldip, newip; 115 116 if (maniptype == NF_NAT_MANIP_SRC) { 117 oldip = iph->saddr; 118 newip = t->src.u3.ip; 119 } else { 120 oldip = iph->daddr; 121 newip = t->dst.u3.ip; 122 } 123 inet_proto_csum_replace4(check, skb, oldip, newip, 1); 124} 125 126static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, 127 u8 proto, void *data, __sum16 *check, 128 int datalen, int oldlen) 129{ 130 const struct iphdr *iph = ip_hdr(skb); 131 struct rtable *rt = skb_rtable(skb); 132 133 if (skb->ip_summed != CHECKSUM_PARTIAL) { 134 if (!(rt->rt_flags & RTCF_LOCAL) && 135 (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { 136 skb->ip_summed = CHECKSUM_PARTIAL; 137 skb->csum_start = skb_headroom(skb) + 138 skb_network_offset(skb) + 139 ip_hdrlen(skb); 140 skb->csum_offset = (void *)check - data; 141 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 142 datalen, proto, 0); 143 } else { 144 *check = 0; 145 *check = csum_tcpudp_magic(iph->saddr, iph->daddr, 146 datalen, proto, 147 csum_partial(data, datalen, 148 0)); 149 if (proto == IPPROTO_UDP && !*check) 150 *check = CSUM_MANGLED_0; 151 } 152 } else 153 inet_proto_csum_replace2(check, skb, 154 htons(oldlen), htons(datalen), 1); 155} 156 157#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 158static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], 159 struct nf_nat_range *range) 160{ 161 if (tb[CTA_NAT_V4_MINIP]) { 162 range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]); 163 range->flags |= NF_NAT_RANGE_MAP_IPS; 164 } 165 166 if (tb[CTA_NAT_V4_MAXIP]) 167 range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]); 168 else 169 range->max_addr.ip = range->min_addr.ip; 170 171 return 0; 172} 173#endif 174 175static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { 176 .l3proto = NFPROTO_IPV4, 177 .in_range = nf_nat_ipv4_in_range, 178 .secure_port = nf_nat_ipv4_secure_port, 179 .manip_pkt = nf_nat_ipv4_manip_pkt, 180 .csum_update = nf_nat_ipv4_csum_update, 181 .csum_recalc = nf_nat_ipv4_csum_recalc, 182#if IS_ENABLED(CONFIG_NF_CT_NETLINK) 183 .nlattr_to_range = nf_nat_ipv4_nlattr_to_range, 184#endif 185#ifdef CONFIG_XFRM 186 .decode_session = nf_nat_ipv4_decode_session, 187#endif 188}; 189 190int nf_nat_icmp_reply_translation(struct sk_buff *skb, 191 struct nf_conn *ct, 192 enum ip_conntrack_info ctinfo, 193 unsigned int hooknum) 194{ 195 struct { 196 struct icmphdr icmp; 197 struct iphdr ip; 198 } *inside; 199 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 200 enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); 201 unsigned int hdrlen = ip_hdrlen(skb); 202 const struct nf_nat_l4proto *l4proto; 203 struct nf_conntrack_tuple target; 204 unsigned long statusbit; 205 206 NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); 207 208 if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) 209 return 0; 210 if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) 211 return 0; 212 213 inside = (void *)skb->data + hdrlen; 214 if (inside->icmp.type == ICMP_REDIRECT) { 215 if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) 216 return 0; 217 if (ct->status & IPS_NAT_MASK) 218 return 0; 219 } 220 221 if (manip == NF_NAT_MANIP_SRC) 222 statusbit = IPS_SRC_NAT; 223 else 224 statusbit = IPS_DST_NAT; 225 226 /* Invert if this is reply direction */ 227 if (dir == IP_CT_DIR_REPLY) 228 statusbit ^= IPS_NAT_MASK; 229 230 if (!(ct->status & statusbit)) 231 return 1; 232 233 l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol); 234 if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), 235 l4proto, &ct->tuplehash[!dir].tuple, !manip)) 236 return 0; 237 238 if (skb->ip_summed != CHECKSUM_PARTIAL) { 239 /* Reloading "inside" here since manip_pkt may reallocate */ 240 inside = (void *)skb->data + hdrlen; 241 inside->icmp.checksum = 0; 242 inside->icmp.checksum = 243 csum_fold(skb_checksum(skb, hdrlen, 244 skb->len - hdrlen, 0)); 245 } 246 247 /* Change outer to look like the reply to an incoming packet */ 248 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); 249 l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0); 250 if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip)) 251 return 0; 252 253 return 1; 254} 255EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); 256 257unsigned int 258nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, 259 const struct net_device *in, const struct net_device *out, 260 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 261 struct sk_buff *skb, 262 const struct net_device *in, 263 const struct net_device *out, 264 struct nf_conn *ct)) 265{ 266 struct nf_conn *ct; 267 enum ip_conntrack_info ctinfo; 268 struct nf_conn_nat *nat; 269 /* maniptype == SRC for postrouting. */ 270 enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); 271 272 /* We never see fragments: conntrack defrags on pre-routing 273 * and local-out, and nf_nat_out protects post-routing. 274 */ 275 NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); 276 277 ct = nf_ct_get(skb, &ctinfo); 278 /* Can't track? It's not due to stress, or conntrack would 279 * have dropped it. Hence it's the user's responsibilty to 280 * packet filter it out, or implement conntrack/NAT for that 281 * protocol. 8) --RR 282 */ 283 if (!ct) 284 return NF_ACCEPT; 285 286 /* Don't try to NAT if this packet is not conntracked */ 287 if (nf_ct_is_untracked(ct)) 288 return NF_ACCEPT; 289 290 nat = nf_ct_nat_ext_add(ct); 291 if (nat == NULL) 292 return NF_ACCEPT; 293 294 switch (ctinfo) { 295 case IP_CT_RELATED: 296 case IP_CT_RELATED_REPLY: 297 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { 298 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 299 ops->hooknum)) 300 return NF_DROP; 301 else 302 return NF_ACCEPT; 303 } 304 /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ 305 case IP_CT_NEW: 306 /* Seen it before? This can happen for loopback, retrans, 307 * or local packets. 308 */ 309 if (!nf_nat_initialized(ct, maniptype)) { 310 unsigned int ret; 311 312 ret = do_chain(ops, skb, in, out, ct); 313 if (ret != NF_ACCEPT) 314 return ret; 315 316 if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum))) 317 break; 318 319 ret = nf_nat_alloc_null_binding(ct, ops->hooknum); 320 if (ret != NF_ACCEPT) 321 return ret; 322 } else { 323 pr_debug("Already setup manip %s for ct %p\n", 324 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", 325 ct); 326 if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) 327 goto oif_changed; 328 } 329 break; 330 331 default: 332 /* ESTABLISHED */ 333 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 334 ctinfo == IP_CT_ESTABLISHED_REPLY); 335 if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) 336 goto oif_changed; 337 } 338 339 return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); 340 341oif_changed: 342 nf_ct_kill_acct(ct, ctinfo, skb); 343 return NF_DROP; 344} 345EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn); 346 347unsigned int 348nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, 349 const struct net_device *in, const struct net_device *out, 350 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 351 struct sk_buff *skb, 352 const struct net_device *in, 353 const struct net_device *out, 354 struct nf_conn *ct)) 355{ 356 unsigned int ret; 357 __be32 daddr = ip_hdr(skb)->daddr; 358 359 ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain); 360 if (ret != NF_DROP && ret != NF_STOLEN && 361 daddr != ip_hdr(skb)->daddr) 362 skb_dst_drop(skb); 363 364 return ret; 365} 366EXPORT_SYMBOL_GPL(nf_nat_ipv4_in); 367 368unsigned int 369nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, 370 const struct net_device *in, const struct net_device *out, 371 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 372 struct sk_buff *skb, 373 const struct net_device *in, 374 const struct net_device *out, 375 struct nf_conn *ct)) 376{ 377#ifdef CONFIG_XFRM 378 const struct nf_conn *ct; 379 enum ip_conntrack_info ctinfo; 380 int err; 381#endif 382 unsigned int ret; 383 384 /* root is playing with raw sockets. */ 385 if (skb->len < sizeof(struct iphdr) || 386 ip_hdrlen(skb) < sizeof(struct iphdr)) 387 return NF_ACCEPT; 388 389 ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain); 390#ifdef CONFIG_XFRM 391 if (ret != NF_DROP && ret != NF_STOLEN && 392 !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 393 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 394 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 395 396 if ((ct->tuplehash[dir].tuple.src.u3.ip != 397 ct->tuplehash[!dir].tuple.dst.u3.ip) || 398 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 399 ct->tuplehash[dir].tuple.src.u.all != 400 ct->tuplehash[!dir].tuple.dst.u.all)) { 401 err = nf_xfrm_me_harder(skb, AF_INET); 402 if (err < 0) 403 ret = NF_DROP_ERR(err); 404 } 405 } 406#endif 407 return ret; 408} 409EXPORT_SYMBOL_GPL(nf_nat_ipv4_out); 410 411unsigned int 412nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, 413 const struct net_device *in, const struct net_device *out, 414 unsigned int (*do_chain)(const struct nf_hook_ops *ops, 415 struct sk_buff *skb, 416 const struct net_device *in, 417 const struct net_device *out, 418 struct nf_conn *ct)) 419{ 420 const struct nf_conn *ct; 421 enum ip_conntrack_info ctinfo; 422 unsigned int ret; 423 int err; 424 425 /* root is playing with raw sockets. */ 426 if (skb->len < sizeof(struct iphdr) || 427 ip_hdrlen(skb) < sizeof(struct iphdr)) 428 return NF_ACCEPT; 429 430 ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain); 431 if (ret != NF_DROP && ret != NF_STOLEN && 432 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 433 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 434 435 if (ct->tuplehash[dir].tuple.dst.u3.ip != 436 ct->tuplehash[!dir].tuple.src.u3.ip) { 437 err = ip_route_me_harder(skb, RTN_UNSPEC); 438 if (err < 0) 439 ret = NF_DROP_ERR(err); 440 } 441#ifdef CONFIG_XFRM 442 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 443 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 444 ct->tuplehash[dir].tuple.dst.u.all != 445 ct->tuplehash[!dir].tuple.src.u.all) { 446 err = nf_xfrm_me_harder(skb, AF_INET); 447 if (err < 0) 448 ret = NF_DROP_ERR(err); 449 } 450#endif 451 } 452 return ret; 453} 454EXPORT_SYMBOL_GPL(nf_nat_ipv4_local_fn); 455 456static int __init nf_nat_l3proto_ipv4_init(void) 457{ 458 int err; 459 460 err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp); 461 if (err < 0) 462 goto err1; 463 err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4); 464 if (err < 0) 465 goto err2; 466 return err; 467 468err2: 469 nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); 470err1: 471 return err; 472} 473 474static void __exit nf_nat_l3proto_ipv4_exit(void) 475{ 476 nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4); 477 nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); 478} 479 480MODULE_LICENSE("GPL"); 481MODULE_ALIAS("nf-nat-" __stringify(AF_INET)); 482 483module_init(nf_nat_l3proto_ipv4_init); 484module_exit(nf_nat_l3proto_ipv4_exit); 485