1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2011 Patrick McHardy <kaber@trash.net>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/netfilter.h>
12#include <linux/netfilter_ipv4.h>
13#include <linux/netfilter_ipv4/ip_tables.h>
14#include <linux/ip.h>
15#include <net/ip.h>
16
17#include <net/netfilter/nf_nat.h>
18#include <net/netfilter/nf_nat_core.h>
19#include <net/netfilter/nf_nat_l3proto.h>
20
21static const struct xt_table nf_nat_ipv4_table = {
22	.name		= "nat",
23	.valid_hooks	= (1 << NF_INET_PRE_ROUTING) |
24			  (1 << NF_INET_POST_ROUTING) |
25			  (1 << NF_INET_LOCAL_OUT) |
26			  (1 << NF_INET_LOCAL_IN),
27	.me		= THIS_MODULE,
28	.af		= NFPROTO_IPV4,
29};
30
31static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
32{
33	/* Force range to this IP; let proto decide mapping for
34	 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
35	 */
36	struct nf_nat_range range;
37
38	range.flags = 0;
39	pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
40		 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
41		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
42		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
43
44	return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
45}
46
47static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
48				     const struct net_device *in,
49				     const struct net_device *out,
50				     struct nf_conn *ct)
51{
52	struct net *net = nf_ct_net(ct);
53	unsigned int ret;
54
55	ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
56	if (ret == NF_ACCEPT) {
57		if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
58			ret = alloc_null_binding(ct, hooknum);
59	}
60	return ret;
61}
62
63static unsigned int
64nf_nat_ipv4_fn(unsigned int hooknum,
65	       struct sk_buff *skb,
66	       const struct net_device *in,
67	       const struct net_device *out,
68	       int (*okfn)(struct sk_buff *))
69{
70	struct nf_conn *ct;
71	enum ip_conntrack_info ctinfo;
72	struct nf_conn_nat *nat;
73	/* maniptype == SRC for postrouting. */
74	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
75
76	/* We never see fragments: conntrack defrags on pre-routing
77	 * and local-out, and nf_nat_out protects post-routing.
78	 */
79	NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
80
81	ct = nf_ct_get(skb, &ctinfo);
82	/* Can't track?  It's not due to stress, or conntrack would
83	 * have dropped it.  Hence it's the user's responsibilty to
84	 * packet filter it out, or implement conntrack/NAT for that
85	 * protocol. 8) --RR
86	 */
87	if (!ct)
88		return NF_ACCEPT;
89
90	/* Don't try to NAT if this packet is not conntracked */
91	if (nf_ct_is_untracked(ct))
92		return NF_ACCEPT;
93
94	nat = nfct_nat(ct);
95	if (!nat) {
96		/* NAT module was loaded late. */
97		if (nf_ct_is_confirmed(ct))
98			return NF_ACCEPT;
99		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
100		if (nat == NULL) {
101			pr_debug("failed to add NAT extension\n");
102			return NF_ACCEPT;
103		}
104	}
105
106	switch (ctinfo) {
107	case IP_CT_RELATED:
108	case IP_CT_RELATED_REPLY:
109		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
110			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
111							   hooknum))
112				return NF_DROP;
113			else
114				return NF_ACCEPT;
115		}
116		/* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
117	case IP_CT_NEW:
118		/* Seen it before?  This can happen for loopback, retrans,
119		 * or local packets.
120		 */
121		if (!nf_nat_initialized(ct, maniptype)) {
122			unsigned int ret;
123
124			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
125			if (ret != NF_ACCEPT)
126				return ret;
127		} else {
128			pr_debug("Already setup manip %s for ct %p\n",
129				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
130				 ct);
131			if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
132				goto oif_changed;
133		}
134		break;
135
136	default:
137		/* ESTABLISHED */
138		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
139			     ctinfo == IP_CT_ESTABLISHED_REPLY);
140		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
141			goto oif_changed;
142	}
143
144	return nf_nat_packet(ct, ctinfo, hooknum, skb);
145
146oif_changed:
147	nf_ct_kill_acct(ct, ctinfo, skb);
148	return NF_DROP;
149}
150
151static unsigned int
152nf_nat_ipv4_in(unsigned int hooknum,
153	       struct sk_buff *skb,
154	       const struct net_device *in,
155	       const struct net_device *out,
156	       int (*okfn)(struct sk_buff *))
157{
158	unsigned int ret;
159	__be32 daddr = ip_hdr(skb)->daddr;
160
161	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
162	if (ret != NF_DROP && ret != NF_STOLEN &&
163	    daddr != ip_hdr(skb)->daddr)
164		skb_dst_drop(skb);
165
166	return ret;
167}
168
169static unsigned int
170nf_nat_ipv4_out(unsigned int hooknum,
171		struct sk_buff *skb,
172		const struct net_device *in,
173		const struct net_device *out,
174		int (*okfn)(struct sk_buff *))
175{
176#ifdef CONFIG_XFRM
177	const struct nf_conn *ct;
178	enum ip_conntrack_info ctinfo;
179	int err;
180#endif
181	unsigned int ret;
182
183	/* root is playing with raw sockets. */
184	if (skb->len < sizeof(struct iphdr) ||
185	    ip_hdrlen(skb) < sizeof(struct iphdr))
186		return NF_ACCEPT;
187
188	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
189#ifdef CONFIG_XFRM
190	if (ret != NF_DROP && ret != NF_STOLEN &&
191	    !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
192	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
193		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
194
195		if ((ct->tuplehash[dir].tuple.src.u3.ip !=
196		     ct->tuplehash[!dir].tuple.dst.u3.ip) ||
197		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
198		     ct->tuplehash[dir].tuple.src.u.all !=
199		     ct->tuplehash[!dir].tuple.dst.u.all)) {
200			err = nf_xfrm_me_harder(skb, AF_INET);
201			if (err < 0)
202				ret = NF_DROP_ERR(err);
203		}
204	}
205#endif
206	return ret;
207}
208
209static unsigned int
210nf_nat_ipv4_local_fn(unsigned int hooknum,
211		     struct sk_buff *skb,
212		     const struct net_device *in,
213		     const struct net_device *out,
214		     int (*okfn)(struct sk_buff *))
215{
216	const struct nf_conn *ct;
217	enum ip_conntrack_info ctinfo;
218	unsigned int ret;
219	int err;
220
221	/* root is playing with raw sockets. */
222	if (skb->len < sizeof(struct iphdr) ||
223	    ip_hdrlen(skb) < sizeof(struct iphdr))
224		return NF_ACCEPT;
225
226	ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
227	if (ret != NF_DROP && ret != NF_STOLEN &&
228	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
229		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
230
231		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
232		    ct->tuplehash[!dir].tuple.src.u3.ip) {
233			err = ip_route_me_harder(skb, RTN_UNSPEC);
234			if (err < 0)
235				ret = NF_DROP_ERR(err);
236		}
237#ifdef CONFIG_XFRM
238		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
239			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
240			 ct->tuplehash[dir].tuple.dst.u.all !=
241			 ct->tuplehash[!dir].tuple.src.u.all) {
242			err = nf_xfrm_me_harder(skb, AF_INET);
243			if (err < 0)
244				ret = NF_DROP_ERR(err);
245		}
246#endif
247	}
248	return ret;
249}
250
251static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
252	/* Before packet filtering, change destination */
253	{
254		.hook		= nf_nat_ipv4_in,
255		.owner		= THIS_MODULE,
256		.pf		= NFPROTO_IPV4,
257		.hooknum	= NF_INET_PRE_ROUTING,
258		.priority	= NF_IP_PRI_NAT_DST,
259	},
260	/* After packet filtering, change source */
261	{
262		.hook		= nf_nat_ipv4_out,
263		.owner		= THIS_MODULE,
264		.pf		= NFPROTO_IPV4,
265		.hooknum	= NF_INET_POST_ROUTING,
266		.priority	= NF_IP_PRI_NAT_SRC,
267	},
268	/* Before packet filtering, change destination */
269	{
270		.hook		= nf_nat_ipv4_local_fn,
271		.owner		= THIS_MODULE,
272		.pf		= NFPROTO_IPV4,
273		.hooknum	= NF_INET_LOCAL_OUT,
274		.priority	= NF_IP_PRI_NAT_DST,
275	},
276	/* After packet filtering, change source */
277	{
278		.hook		= nf_nat_ipv4_fn,
279		.owner		= THIS_MODULE,
280		.pf		= NFPROTO_IPV4,
281		.hooknum	= NF_INET_LOCAL_IN,
282		.priority	= NF_IP_PRI_NAT_SRC,
283	},
284};
285
286static int __net_init iptable_nat_net_init(struct net *net)
287{
288	struct ipt_replace *repl;
289
290	repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
291	if (repl == NULL)
292		return -ENOMEM;
293	net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
294	kfree(repl);
295	return PTR_RET(net->ipv4.nat_table);
296}
297
298static void __net_exit iptable_nat_net_exit(struct net *net)
299{
300	ipt_unregister_table(net, net->ipv4.nat_table);
301}
302
303static struct pernet_operations iptable_nat_net_ops = {
304	.init	= iptable_nat_net_init,
305	.exit	= iptable_nat_net_exit,
306};
307
308static int __init iptable_nat_init(void)
309{
310	int err;
311
312	err = register_pernet_subsys(&iptable_nat_net_ops);
313	if (err < 0)
314		goto err1;
315
316	err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
317	if (err < 0)
318		goto err2;
319	return 0;
320
321err2:
322	unregister_pernet_subsys(&iptable_nat_net_ops);
323err1:
324	return err;
325}
326
327static void __exit iptable_nat_exit(void)
328{
329	nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
330	unregister_pernet_subsys(&iptable_nat_net_ops);
331}
332
333module_init(iptable_nat_init);
334module_exit(iptable_nat_exit);
335
336MODULE_LICENSE("GPL");
337