nf_conntrack_expect.c revision e18b890bb0881bbab6f4f1a6cd20d9c60d66b003
1/* Expectation handling for nf_conntrack. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/types.h>
13#include <linux/netfilter.h>
14#include <linux/skbuff.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/stddef.h>
18#include <linux/slab.h>
19#include <linux/err.h>
20#include <linux/percpu.h>
21#include <linux/kernel.h>
22
23#include <net/netfilter/nf_conntrack.h>
24#include <net/netfilter/nf_conntrack_core.h>
25#include <net/netfilter/nf_conntrack_expect.h>
26#include <net/netfilter/nf_conntrack_helper.h>
27#include <net/netfilter/nf_conntrack_tuple.h>
28
29LIST_HEAD(nf_conntrack_expect_list);
30EXPORT_SYMBOL_GPL(nf_conntrack_expect_list);
31
32struct kmem_cache *nf_conntrack_expect_cachep __read_mostly;
33static unsigned int nf_conntrack_expect_next_id;
34
35/* nf_conntrack_expect helper functions */
36void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
37{
38	struct nf_conn_help *master_help = nfct_help(exp->master);
39
40	NF_CT_ASSERT(master_help);
41	NF_CT_ASSERT(!timer_pending(&exp->timeout));
42
43	list_del(&exp->list);
44	NF_CT_STAT_INC(expect_delete);
45	master_help->expecting--;
46	nf_conntrack_expect_put(exp);
47}
48EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
49
50static void expectation_timed_out(unsigned long ul_expect)
51{
52	struct nf_conntrack_expect *exp = (void *)ul_expect;
53
54	write_lock_bh(&nf_conntrack_lock);
55	nf_ct_unlink_expect(exp);
56	write_unlock_bh(&nf_conntrack_lock);
57	nf_conntrack_expect_put(exp);
58}
59
60struct nf_conntrack_expect *
61__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
62{
63	struct nf_conntrack_expect *i;
64
65	list_for_each_entry(i, &nf_conntrack_expect_list, list) {
66		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
67			return i;
68	}
69	return NULL;
70}
71EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find);
72
73/* Just find a expectation corresponding to a tuple. */
74struct nf_conntrack_expect *
75nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple)
76{
77	struct nf_conntrack_expect *i;
78
79	read_lock_bh(&nf_conntrack_lock);
80	i = __nf_conntrack_expect_find(tuple);
81	if (i)
82		atomic_inc(&i->use);
83	read_unlock_bh(&nf_conntrack_lock);
84
85	return i;
86}
87EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get);
88
89/* If an expectation for this connection is found, it gets delete from
90 * global list then returned. */
91struct nf_conntrack_expect *
92find_expectation(const struct nf_conntrack_tuple *tuple)
93{
94	struct nf_conntrack_expect *i;
95
96	list_for_each_entry(i, &nf_conntrack_expect_list, list) {
97	/* If master is not in hash table yet (ie. packet hasn't left
98	   this machine yet), how can other end know about expected?
99	   Hence these are not the droids you are looking for (if
100	   master ct never got confirmed, we'd hold a reference to it
101	   and weird things would happen to future packets). */
102		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
103		    && nf_ct_is_confirmed(i->master)) {
104			if (i->flags & NF_CT_EXPECT_PERMANENT) {
105				atomic_inc(&i->use);
106				return i;
107			} else if (del_timer(&i->timeout)) {
108				nf_ct_unlink_expect(i);
109				return i;
110			}
111		}
112	}
113	return NULL;
114}
115
116/* delete all expectations for this conntrack */
117void nf_ct_remove_expectations(struct nf_conn *ct)
118{
119	struct nf_conntrack_expect *i, *tmp;
120	struct nf_conn_help *help = nfct_help(ct);
121
122	/* Optimization: most connection never expect any others. */
123	if (!help || help->expecting == 0)
124		return;
125
126	list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
127		if (i->master == ct && del_timer(&i->timeout)) {
128			nf_ct_unlink_expect(i);
129			nf_conntrack_expect_put(i);
130 		}
131	}
132}
133EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
134
135/* Would two expected things clash? */
136static inline int expect_clash(const struct nf_conntrack_expect *a,
137			       const struct nf_conntrack_expect *b)
138{
139	/* Part covered by intersection of masks must be unequal,
140	   otherwise they clash */
141	struct nf_conntrack_tuple intersect_mask;
142	int count;
143
144	intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
145	intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
146	intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
147	intersect_mask.dst.protonum = a->mask.dst.protonum
148					& b->mask.dst.protonum;
149
150	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
151		intersect_mask.src.u3.all[count] =
152			a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
153	}
154
155	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
156		intersect_mask.dst.u3.all[count] =
157			a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
158	}
159
160	return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
161}
162
163static inline int expect_matches(const struct nf_conntrack_expect *a,
164				 const struct nf_conntrack_expect *b)
165{
166	return a->master == b->master
167		&& nf_ct_tuple_equal(&a->tuple, &b->tuple)
168		&& nf_ct_tuple_equal(&a->mask, &b->mask);
169}
170
171/* Generally a bad idea to call this: could have matched already. */
172void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
173{
174	struct nf_conntrack_expect *i;
175
176	write_lock_bh(&nf_conntrack_lock);
177	/* choose the the oldest expectation to evict */
178	list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
179		if (expect_matches(i, exp) && del_timer(&i->timeout)) {
180			nf_ct_unlink_expect(i);
181			write_unlock_bh(&nf_conntrack_lock);
182			nf_conntrack_expect_put(i);
183			return;
184		}
185	}
186	write_unlock_bh(&nf_conntrack_lock);
187}
188EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related);
189
190/* We don't increase the master conntrack refcount for non-fulfilled
191 * conntracks. During the conntrack destruction, the expectations are
192 * always killed before the conntrack itself */
193struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
194{
195	struct nf_conntrack_expect *new;
196
197	new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
198	if (!new)
199		return NULL;
200
201	new->master = me;
202	atomic_set(&new->use, 1);
203	return new;
204}
205EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc);
206
207void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family,
208			      union nf_conntrack_address *saddr,
209			      union nf_conntrack_address *daddr,
210			      u_int8_t proto, __be16 *src, __be16 *dst)
211{
212	int len;
213
214	if (family == AF_INET)
215		len = 4;
216	else
217		len = 16;
218
219	exp->flags = 0;
220	exp->expectfn = NULL;
221	exp->helper = NULL;
222	exp->tuple.src.l3num = family;
223	exp->tuple.dst.protonum = proto;
224	exp->mask.src.l3num = 0xFFFF;
225	exp->mask.dst.protonum = 0xFF;
226
227	if (saddr) {
228		memcpy(&exp->tuple.src.u3, saddr, len);
229		if (sizeof(exp->tuple.src.u3) > len)
230			/* address needs to be cleared for nf_ct_tuple_equal */
231			memset((void *)&exp->tuple.src.u3 + len, 0x00,
232			       sizeof(exp->tuple.src.u3) - len);
233		memset(&exp->mask.src.u3, 0xFF, len);
234		if (sizeof(exp->mask.src.u3) > len)
235			memset((void *)&exp->mask.src.u3 + len, 0x00,
236			       sizeof(exp->mask.src.u3) - len);
237	} else {
238		memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
239		memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
240	}
241
242	if (daddr) {
243		memcpy(&exp->tuple.dst.u3, daddr, len);
244		if (sizeof(exp->tuple.dst.u3) > len)
245			/* address needs to be cleared for nf_ct_tuple_equal */
246			memset((void *)&exp->tuple.dst.u3 + len, 0x00,
247			       sizeof(exp->tuple.dst.u3) - len);
248		memset(&exp->mask.dst.u3, 0xFF, len);
249		if (sizeof(exp->mask.dst.u3) > len)
250			memset((void *)&exp->mask.dst.u3 + len, 0x00,
251			       sizeof(exp->mask.dst.u3) - len);
252	} else {
253		memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3));
254		memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3));
255	}
256
257	if (src) {
258		exp->tuple.src.u.all = (__force u16)*src;
259		exp->mask.src.u.all = 0xFFFF;
260	} else {
261		exp->tuple.src.u.all = 0;
262		exp->mask.src.u.all = 0;
263	}
264
265	if (dst) {
266		exp->tuple.dst.u.all = (__force u16)*dst;
267		exp->mask.dst.u.all = 0xFFFF;
268	} else {
269		exp->tuple.dst.u.all = 0;
270		exp->mask.dst.u.all = 0;
271	}
272}
273EXPORT_SYMBOL_GPL(nf_conntrack_expect_init);
274
275void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
276{
277	if (atomic_dec_and_test(&exp->use))
278		kmem_cache_free(nf_conntrack_expect_cachep, exp);
279}
280EXPORT_SYMBOL_GPL(nf_conntrack_expect_put);
281
282static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
283{
284	struct nf_conn_help *master_help = nfct_help(exp->master);
285
286	atomic_inc(&exp->use);
287	master_help->expecting++;
288	list_add(&exp->list, &nf_conntrack_expect_list);
289
290	init_timer(&exp->timeout);
291	exp->timeout.data = (unsigned long)exp;
292	exp->timeout.function = expectation_timed_out;
293	exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
294	add_timer(&exp->timeout);
295
296	exp->id = ++nf_conntrack_expect_next_id;
297	atomic_inc(&exp->use);
298	NF_CT_STAT_INC(expect_create);
299}
300
301/* Race with expectations being used means we could have none to find; OK. */
302static void evict_oldest_expect(struct nf_conn *master)
303{
304	struct nf_conntrack_expect *i;
305
306	list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
307		if (i->master == master) {
308			if (del_timer(&i->timeout)) {
309				nf_ct_unlink_expect(i);
310				nf_conntrack_expect_put(i);
311			}
312			break;
313		}
314	}
315}
316
317static inline int refresh_timer(struct nf_conntrack_expect *i)
318{
319	struct nf_conn_help *master_help = nfct_help(i->master);
320
321	if (!del_timer(&i->timeout))
322		return 0;
323
324	i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
325	add_timer(&i->timeout);
326	return 1;
327}
328
329int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
330{
331	struct nf_conntrack_expect *i;
332	struct nf_conn *master = expect->master;
333	struct nf_conn_help *master_help = nfct_help(master);
334	int ret;
335
336	NF_CT_ASSERT(master_help);
337
338	write_lock_bh(&nf_conntrack_lock);
339	list_for_each_entry(i, &nf_conntrack_expect_list, list) {
340		if (expect_matches(i, expect)) {
341			/* Refresh timer: if it's dying, ignore.. */
342			if (refresh_timer(i)) {
343				ret = 0;
344				goto out;
345			}
346		} else if (expect_clash(i, expect)) {
347			ret = -EBUSY;
348			goto out;
349		}
350	}
351	/* Will be over limit? */
352	if (master_help->helper->max_expected &&
353	    master_help->expecting >= master_help->helper->max_expected)
354		evict_oldest_expect(master);
355
356	nf_conntrack_expect_insert(expect);
357	nf_conntrack_expect_event(IPEXP_NEW, expect);
358	ret = 0;
359out:
360	write_unlock_bh(&nf_conntrack_lock);
361	return ret;
362}
363EXPORT_SYMBOL_GPL(nf_conntrack_expect_related);
364
365#ifdef CONFIG_PROC_FS
366static void *exp_seq_start(struct seq_file *s, loff_t *pos)
367{
368	struct list_head *e = &nf_conntrack_expect_list;
369	loff_t i;
370
371	/* strange seq_file api calls stop even if we fail,
372	 * thus we need to grab lock since stop unlocks */
373	read_lock_bh(&nf_conntrack_lock);
374
375	if (list_empty(e))
376		return NULL;
377
378	for (i = 0; i <= *pos; i++) {
379		e = e->next;
380		if (e == &nf_conntrack_expect_list)
381			return NULL;
382	}
383	return e;
384}
385
386static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
387{
388	struct list_head *e = v;
389
390	++*pos;
391	e = e->next;
392
393	if (e == &nf_conntrack_expect_list)
394		return NULL;
395
396	return e;
397}
398
399static void exp_seq_stop(struct seq_file *s, void *v)
400{
401	read_unlock_bh(&nf_conntrack_lock);
402}
403
404static int exp_seq_show(struct seq_file *s, void *v)
405{
406	struct nf_conntrack_expect *expect = v;
407
408	if (expect->timeout.function)
409		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
410			   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
411	else
412		seq_printf(s, "- ");
413	seq_printf(s, "l3proto = %u proto=%u ",
414		   expect->tuple.src.l3num,
415		   expect->tuple.dst.protonum);
416	print_tuple(s, &expect->tuple,
417		    __nf_ct_l3proto_find(expect->tuple.src.l3num),
418		    __nf_ct_l4proto_find(expect->tuple.src.l3num,
419				       expect->tuple.dst.protonum));
420	return seq_putc(s, '\n');
421}
422
423static struct seq_operations exp_seq_ops = {
424	.start = exp_seq_start,
425	.next = exp_seq_next,
426	.stop = exp_seq_stop,
427	.show = exp_seq_show
428};
429
430static int exp_open(struct inode *inode, struct file *file)
431{
432	return seq_open(file, &exp_seq_ops);
433}
434
435struct file_operations exp_file_ops = {
436	.owner   = THIS_MODULE,
437	.open    = exp_open,
438	.read    = seq_read,
439	.llseek  = seq_lseek,
440	.release = seq_release
441};
442#endif /* CONFIG_PROC_FS */
443