1/*
2 * Stateless NAT actions
3 *
4 * Copyright (c) 2007 Herbert Xu <herbert@gondor.apana.org.au>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 */
11
12#include <linux/errno.h>
13#include <linux/init.h>
14#include <linux/kernel.h>
15#include <linux/module.h>
16#include <linux/netfilter.h>
17#include <linux/rtnetlink.h>
18#include <linux/skbuff.h>
19#include <linux/slab.h>
20#include <linux/spinlock.h>
21#include <linux/string.h>
22#include <linux/tc_act/tc_nat.h>
23#include <net/act_api.h>
24#include <net/icmp.h>
25#include <net/ip.h>
26#include <net/netlink.h>
27#include <net/tc_act/tc_nat.h>
28#include <net/tcp.h>
29#include <net/udp.h>
30
31
32#define NAT_TAB_MASK	15
33static struct tcf_common *tcf_nat_ht[NAT_TAB_MASK + 1];
34static u32 nat_idx_gen;
35static DEFINE_RWLOCK(nat_lock);
36
37static struct tcf_hashinfo nat_hash_info = {
38	.htab	=	tcf_nat_ht,
39	.hmask	=	NAT_TAB_MASK,
40	.lock	=	&nat_lock,
41};
42
43static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
44	[TCA_NAT_PARMS]	= { .len = sizeof(struct tc_nat) },
45};
46
47static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
48			struct tc_action *a, int ovr, int bind)
49{
50	struct nlattr *tb[TCA_NAT_MAX + 1];
51	struct tc_nat *parm;
52	int ret = 0, err;
53	struct tcf_nat *p;
54	struct tcf_common *pc;
55
56	if (nla == NULL)
57		return -EINVAL;
58
59	err = nla_parse_nested(tb, TCA_NAT_MAX, nla, nat_policy);
60	if (err < 0)
61		return err;
62
63	if (tb[TCA_NAT_PARMS] == NULL)
64		return -EINVAL;
65	parm = nla_data(tb[TCA_NAT_PARMS]);
66
67	pc = tcf_hash_check(parm->index, a, bind, &nat_hash_info);
68	if (!pc) {
69		pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
70				     &nat_idx_gen, &nat_hash_info);
71		if (IS_ERR(pc))
72			return PTR_ERR(pc);
73		p = to_tcf_nat(pc);
74		ret = ACT_P_CREATED;
75	} else {
76		p = to_tcf_nat(pc);
77		if (!ovr) {
78			tcf_hash_release(pc, bind, &nat_hash_info);
79			return -EEXIST;
80		}
81	}
82
83	spin_lock_bh(&p->tcf_lock);
84	p->old_addr = parm->old_addr;
85	p->new_addr = parm->new_addr;
86	p->mask = parm->mask;
87	p->flags = parm->flags;
88
89	p->tcf_action = parm->action;
90	spin_unlock_bh(&p->tcf_lock);
91
92	if (ret == ACT_P_CREATED)
93		tcf_hash_insert(pc, &nat_hash_info);
94
95	return ret;
96}
97
98static int tcf_nat_cleanup(struct tc_action *a, int bind)
99{
100	struct tcf_nat *p = a->priv;
101
102	return tcf_hash_release(&p->common, bind, &nat_hash_info);
103}
104
105static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
106		   struct tcf_result *res)
107{
108	struct tcf_nat *p = a->priv;
109	struct iphdr *iph;
110	__be32 old_addr;
111	__be32 new_addr;
112	__be32 mask;
113	__be32 addr;
114	int egress;
115	int action;
116	int ihl;
117	int noff;
118
119	spin_lock(&p->tcf_lock);
120
121	p->tcf_tm.lastuse = jiffies;
122	old_addr = p->old_addr;
123	new_addr = p->new_addr;
124	mask = p->mask;
125	egress = p->flags & TCA_NAT_FLAG_EGRESS;
126	action = p->tcf_action;
127
128	bstats_update(&p->tcf_bstats, skb);
129
130	spin_unlock(&p->tcf_lock);
131
132	if (unlikely(action == TC_ACT_SHOT))
133		goto drop;
134
135	noff = skb_network_offset(skb);
136	if (!pskb_may_pull(skb, sizeof(*iph) + noff))
137		goto drop;
138
139	iph = ip_hdr(skb);
140
141	if (egress)
142		addr = iph->saddr;
143	else
144		addr = iph->daddr;
145
146	if (!((old_addr ^ addr) & mask)) {
147		if (skb_cloned(skb) &&
148		    !skb_clone_writable(skb, sizeof(*iph) + noff) &&
149		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
150			goto drop;
151
152		new_addr &= mask;
153		new_addr |= addr & ~mask;
154
155		/* Rewrite IP header */
156		iph = ip_hdr(skb);
157		if (egress)
158			iph->saddr = new_addr;
159		else
160			iph->daddr = new_addr;
161
162		csum_replace4(&iph->check, addr, new_addr);
163	} else if ((iph->frag_off & htons(IP_OFFSET)) ||
164		   iph->protocol != IPPROTO_ICMP) {
165		goto out;
166	}
167
168	ihl = iph->ihl * 4;
169
170	/* It would be nice to share code with stateful NAT. */
171	switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
172	case IPPROTO_TCP:
173	{
174		struct tcphdr *tcph;
175
176		if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
177		    (skb_cloned(skb) &&
178		     !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) &&
179		     pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
180			goto drop;
181
182		tcph = (void *)(skb_network_header(skb) + ihl);
183		inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1);
184		break;
185	}
186	case IPPROTO_UDP:
187	{
188		struct udphdr *udph;
189
190		if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
191		    (skb_cloned(skb) &&
192		     !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) &&
193		     pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
194			goto drop;
195
196		udph = (void *)(skb_network_header(skb) + ihl);
197		if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
198			inet_proto_csum_replace4(&udph->check, skb, addr,
199						 new_addr, 1);
200			if (!udph->check)
201				udph->check = CSUM_MANGLED_0;
202		}
203		break;
204	}
205	case IPPROTO_ICMP:
206	{
207		struct icmphdr *icmph;
208
209		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff))
210			goto drop;
211
212		icmph = (void *)(skb_network_header(skb) + ihl);
213
214		if ((icmph->type != ICMP_DEST_UNREACH) &&
215		    (icmph->type != ICMP_TIME_EXCEEDED) &&
216		    (icmph->type != ICMP_PARAMETERPROB))
217			break;
218
219		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) +
220					noff))
221			goto drop;
222
223		icmph = (void *)(skb_network_header(skb) + ihl);
224		iph = (void *)(icmph + 1);
225		if (egress)
226			addr = iph->daddr;
227		else
228			addr = iph->saddr;
229
230		if ((old_addr ^ addr) & mask)
231			break;
232
233		if (skb_cloned(skb) &&
234		    !skb_clone_writable(skb, ihl + sizeof(*icmph) +
235					     sizeof(*iph) + noff) &&
236		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
237			goto drop;
238
239		icmph = (void *)(skb_network_header(skb) + ihl);
240		iph = (void *)(icmph + 1);
241
242		new_addr &= mask;
243		new_addr |= addr & ~mask;
244
245		/* XXX Fix up the inner checksums. */
246		if (egress)
247			iph->daddr = new_addr;
248		else
249			iph->saddr = new_addr;
250
251		inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
252					 0);
253		break;
254	}
255	default:
256		break;
257	}
258
259out:
260	return action;
261
262drop:
263	spin_lock(&p->tcf_lock);
264	p->tcf_qstats.drops++;
265	spin_unlock(&p->tcf_lock);
266	return TC_ACT_SHOT;
267}
268
269static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
270			int bind, int ref)
271{
272	unsigned char *b = skb_tail_pointer(skb);
273	struct tcf_nat *p = a->priv;
274	struct tc_nat opt = {
275		.old_addr = p->old_addr,
276		.new_addr = p->new_addr,
277		.mask     = p->mask,
278		.flags    = p->flags,
279
280		.index    = p->tcf_index,
281		.action   = p->tcf_action,
282		.refcnt   = p->tcf_refcnt - ref,
283		.bindcnt  = p->tcf_bindcnt - bind,
284	};
285	struct tcf_t t;
286
287	if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
288		goto nla_put_failure;
289	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
290	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
291	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
292	if (nla_put(skb, TCA_NAT_TM, sizeof(t), &t))
293		goto nla_put_failure;
294
295	return skb->len;
296
297nla_put_failure:
298	nlmsg_trim(skb, b);
299	return -1;
300}
301
302static struct tc_action_ops act_nat_ops = {
303	.kind		=	"nat",
304	.hinfo		=	&nat_hash_info,
305	.type		=	TCA_ACT_NAT,
306	.capab		=	TCA_CAP_NONE,
307	.owner		=	THIS_MODULE,
308	.act		=	tcf_nat,
309	.dump		=	tcf_nat_dump,
310	.cleanup	=	tcf_nat_cleanup,
311	.lookup		=	tcf_hash_search,
312	.init		=	tcf_nat_init,
313	.walk		=	tcf_generic_walker
314};
315
316MODULE_DESCRIPTION("Stateless NAT actions");
317MODULE_LICENSE("GPL");
318
319static int __init nat_init_module(void)
320{
321	return tcf_register_action(&act_nat_ops);
322}
323
324static void __exit nat_cleanup_module(void)
325{
326	tcf_unregister_action(&act_nat_ops);
327}
328
329module_init(nat_init_module);
330module_exit(nat_cleanup_module);
331