nf_conntrack_expect.c revision ea781f197d6a835cbb93a0bf88ee1696296ed8aa
1/* Expectation handling for nf_conntrack. */
2
3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/types.h>
13#include <linux/netfilter.h>
14#include <linux/skbuff.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/stddef.h>
18#include <linux/slab.h>
19#include <linux/err.h>
20#include <linux/percpu.h>
21#include <linux/kernel.h>
22#include <linux/jhash.h>
23#include <net/net_namespace.h>
24
25#include <net/netfilter/nf_conntrack.h>
26#include <net/netfilter/nf_conntrack_core.h>
27#include <net/netfilter/nf_conntrack_expect.h>
28#include <net/netfilter/nf_conntrack_helper.h>
29#include <net/netfilter/nf_conntrack_tuple.h>
30
31unsigned int nf_ct_expect_hsize __read_mostly;
32EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
33
34static unsigned int nf_ct_expect_hash_rnd __read_mostly;
35unsigned int nf_ct_expect_max __read_mostly;
36static int nf_ct_expect_hash_rnd_initted __read_mostly;
37
38static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
39
40/* nf_conntrack_expect helper functions */
41void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
42{
43	struct nf_conn_help *master_help = nfct_help(exp->master);
44	struct net *net = nf_ct_exp_net(exp);
45
46	NF_CT_ASSERT(master_help);
47	NF_CT_ASSERT(!timer_pending(&exp->timeout));
48
49	hlist_del_rcu(&exp->hnode);
50	net->ct.expect_count--;
51
52	hlist_del(&exp->lnode);
53	master_help->expecting[exp->class]--;
54	nf_ct_expect_put(exp);
55
56	NF_CT_STAT_INC(net, expect_delete);
57}
58EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
59
60static void nf_ct_expectation_timed_out(unsigned long ul_expect)
61{
62	struct nf_conntrack_expect *exp = (void *)ul_expect;
63
64	spin_lock_bh(&nf_conntrack_lock);
65	nf_ct_unlink_expect(exp);
66	spin_unlock_bh(&nf_conntrack_lock);
67	nf_ct_expect_put(exp);
68}
69
70static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
71{
72	unsigned int hash;
73
74	if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
75		get_random_bytes(&nf_ct_expect_hash_rnd,
76				 sizeof(nf_ct_expect_hash_rnd));
77		nf_ct_expect_hash_rnd_initted = 1;
78	}
79
80	hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
81		      (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
82		       (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
83	return ((u64)hash * nf_ct_expect_hsize) >> 32;
84}
85
86struct nf_conntrack_expect *
87__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
88{
89	struct nf_conntrack_expect *i;
90	struct hlist_node *n;
91	unsigned int h;
92
93	if (!net->ct.expect_count)
94		return NULL;
95
96	h = nf_ct_expect_dst_hash(tuple);
97	hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
98		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
99			return i;
100	}
101	return NULL;
102}
103EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
104
105/* Just find a expectation corresponding to a tuple. */
106struct nf_conntrack_expect *
107nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
108{
109	struct nf_conntrack_expect *i;
110
111	rcu_read_lock();
112	i = __nf_ct_expect_find(net, tuple);
113	if (i && !atomic_inc_not_zero(&i->use))
114		i = NULL;
115	rcu_read_unlock();
116
117	return i;
118}
119EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
120
121/* If an expectation for this connection is found, it gets delete from
122 * global list then returned. */
123struct nf_conntrack_expect *
124nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
125{
126	struct nf_conntrack_expect *i, *exp = NULL;
127	struct hlist_node *n;
128	unsigned int h;
129
130	if (!net->ct.expect_count)
131		return NULL;
132
133	h = nf_ct_expect_dst_hash(tuple);
134	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
135		if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
136		    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
137			exp = i;
138			break;
139		}
140	}
141	if (!exp)
142		return NULL;
143
144	/* If master is not in hash table yet (ie. packet hasn't left
145	   this machine yet), how can other end know about expected?
146	   Hence these are not the droids you are looking for (if
147	   master ct never got confirmed, we'd hold a reference to it
148	   and weird things would happen to future packets). */
149	if (!nf_ct_is_confirmed(exp->master))
150		return NULL;
151
152	if (exp->flags & NF_CT_EXPECT_PERMANENT) {
153		atomic_inc(&exp->use);
154		return exp;
155	} else if (del_timer(&exp->timeout)) {
156		nf_ct_unlink_expect(exp);
157		return exp;
158	}
159
160	return NULL;
161}
162
163/* delete all expectations for this conntrack */
164void nf_ct_remove_expectations(struct nf_conn *ct)
165{
166	struct nf_conn_help *help = nfct_help(ct);
167	struct nf_conntrack_expect *exp;
168	struct hlist_node *n, *next;
169
170	/* Optimization: most connection never expect any others. */
171	if (!help)
172		return;
173
174	hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
175		if (del_timer(&exp->timeout)) {
176			nf_ct_unlink_expect(exp);
177			nf_ct_expect_put(exp);
178		}
179	}
180}
181EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
182
183/* Would two expected things clash? */
184static inline int expect_clash(const struct nf_conntrack_expect *a,
185			       const struct nf_conntrack_expect *b)
186{
187	/* Part covered by intersection of masks must be unequal,
188	   otherwise they clash */
189	struct nf_conntrack_tuple_mask intersect_mask;
190	int count;
191
192	intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
193
194	for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
195		intersect_mask.src.u3.all[count] =
196			a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
197	}
198
199	return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
200}
201
202static inline int expect_matches(const struct nf_conntrack_expect *a,
203				 const struct nf_conntrack_expect *b)
204{
205	return a->master == b->master && a->class == b->class
206		&& nf_ct_tuple_equal(&a->tuple, &b->tuple)
207		&& nf_ct_tuple_mask_equal(&a->mask, &b->mask);
208}
209
210/* Generally a bad idea to call this: could have matched already. */
211void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
212{
213	spin_lock_bh(&nf_conntrack_lock);
214	if (del_timer(&exp->timeout)) {
215		nf_ct_unlink_expect(exp);
216		nf_ct_expect_put(exp);
217	}
218	spin_unlock_bh(&nf_conntrack_lock);
219}
220EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
221
222/* We don't increase the master conntrack refcount for non-fulfilled
223 * conntracks. During the conntrack destruction, the expectations are
224 * always killed before the conntrack itself */
225struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
226{
227	struct nf_conntrack_expect *new;
228
229	new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
230	if (!new)
231		return NULL;
232
233	new->master = me;
234	atomic_set(&new->use, 1);
235	INIT_RCU_HEAD(&new->rcu);
236	return new;
237}
238EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
239
240void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
241		       u_int8_t family,
242		       const union nf_inet_addr *saddr,
243		       const union nf_inet_addr *daddr,
244		       u_int8_t proto, const __be16 *src, const __be16 *dst)
245{
246	int len;
247
248	if (family == AF_INET)
249		len = 4;
250	else
251		len = 16;
252
253	exp->flags = 0;
254	exp->class = class;
255	exp->expectfn = NULL;
256	exp->helper = NULL;
257	exp->tuple.src.l3num = family;
258	exp->tuple.dst.protonum = proto;
259
260	if (saddr) {
261		memcpy(&exp->tuple.src.u3, saddr, len);
262		if (sizeof(exp->tuple.src.u3) > len)
263			/* address needs to be cleared for nf_ct_tuple_equal */
264			memset((void *)&exp->tuple.src.u3 + len, 0x00,
265			       sizeof(exp->tuple.src.u3) - len);
266		memset(&exp->mask.src.u3, 0xFF, len);
267		if (sizeof(exp->mask.src.u3) > len)
268			memset((void *)&exp->mask.src.u3 + len, 0x00,
269			       sizeof(exp->mask.src.u3) - len);
270	} else {
271		memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
272		memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
273	}
274
275	if (src) {
276		exp->tuple.src.u.all = *src;
277		exp->mask.src.u.all = htons(0xFFFF);
278	} else {
279		exp->tuple.src.u.all = 0;
280		exp->mask.src.u.all = 0;
281	}
282
283	memcpy(&exp->tuple.dst.u3, daddr, len);
284	if (sizeof(exp->tuple.dst.u3) > len)
285		/* address needs to be cleared for nf_ct_tuple_equal */
286		memset((void *)&exp->tuple.dst.u3 + len, 0x00,
287		       sizeof(exp->tuple.dst.u3) - len);
288
289	exp->tuple.dst.u.all = *dst;
290}
291EXPORT_SYMBOL_GPL(nf_ct_expect_init);
292
293static void nf_ct_expect_free_rcu(struct rcu_head *head)
294{
295	struct nf_conntrack_expect *exp;
296
297	exp = container_of(head, struct nf_conntrack_expect, rcu);
298	kmem_cache_free(nf_ct_expect_cachep, exp);
299}
300
301void nf_ct_expect_put(struct nf_conntrack_expect *exp)
302{
303	if (atomic_dec_and_test(&exp->use))
304		call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
305}
306EXPORT_SYMBOL_GPL(nf_ct_expect_put);
307
308static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
309{
310	struct nf_conn_help *master_help = nfct_help(exp->master);
311	struct net *net = nf_ct_exp_net(exp);
312	const struct nf_conntrack_expect_policy *p;
313	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
314
315	atomic_inc(&exp->use);
316
317	hlist_add_head(&exp->lnode, &master_help->expectations);
318	master_help->expecting[exp->class]++;
319
320	hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
321	net->ct.expect_count++;
322
323	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
324		    (unsigned long)exp);
325	p = &master_help->helper->expect_policy[exp->class];
326	exp->timeout.expires = jiffies + p->timeout * HZ;
327	add_timer(&exp->timeout);
328
329	atomic_inc(&exp->use);
330	NF_CT_STAT_INC(net, expect_create);
331}
332
333/* Race with expectations being used means we could have none to find; OK. */
334static void evict_oldest_expect(struct nf_conn *master,
335				struct nf_conntrack_expect *new)
336{
337	struct nf_conn_help *master_help = nfct_help(master);
338	struct nf_conntrack_expect *exp, *last = NULL;
339	struct hlist_node *n;
340
341	hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
342		if (exp->class == new->class)
343			last = exp;
344	}
345
346	if (last && del_timer(&last->timeout)) {
347		nf_ct_unlink_expect(last);
348		nf_ct_expect_put(last);
349	}
350}
351
352static inline int refresh_timer(struct nf_conntrack_expect *i)
353{
354	struct nf_conn_help *master_help = nfct_help(i->master);
355	const struct nf_conntrack_expect_policy *p;
356
357	if (!del_timer(&i->timeout))
358		return 0;
359
360	p = &master_help->helper->expect_policy[i->class];
361	i->timeout.expires = jiffies + p->timeout * HZ;
362	add_timer(&i->timeout);
363	return 1;
364}
365
366static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
367{
368	const struct nf_conntrack_expect_policy *p;
369	struct nf_conntrack_expect *i;
370	struct nf_conn *master = expect->master;
371	struct nf_conn_help *master_help = nfct_help(master);
372	struct net *net = nf_ct_exp_net(expect);
373	struct hlist_node *n;
374	unsigned int h;
375	int ret = 0;
376
377	if (!master_help->helper) {
378		ret = -ESHUTDOWN;
379		goto out;
380	}
381	h = nf_ct_expect_dst_hash(&expect->tuple);
382	hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
383		if (expect_matches(i, expect)) {
384			/* Refresh timer: if it's dying, ignore.. */
385			if (refresh_timer(i)) {
386				ret = 0;
387				goto out;
388			}
389		} else if (expect_clash(i, expect)) {
390			ret = -EBUSY;
391			goto out;
392		}
393	}
394	/* Will be over limit? */
395	p = &master_help->helper->expect_policy[expect->class];
396	if (p->max_expected &&
397	    master_help->expecting[expect->class] >= p->max_expected) {
398		evict_oldest_expect(master, expect);
399		if (master_help->expecting[expect->class] >= p->max_expected) {
400			ret = -EMFILE;
401			goto out;
402		}
403	}
404
405	if (net->ct.expect_count >= nf_ct_expect_max) {
406		if (net_ratelimit())
407			printk(KERN_WARNING
408			       "nf_conntrack: expectation table full\n");
409		ret = -EMFILE;
410	}
411out:
412	return ret;
413}
414
415int nf_ct_expect_related(struct nf_conntrack_expect *expect)
416{
417	int ret;
418
419	spin_lock_bh(&nf_conntrack_lock);
420	ret = __nf_ct_expect_check(expect);
421	if (ret < 0)
422		goto out;
423
424	nf_ct_expect_insert(expect);
425	atomic_inc(&expect->use);
426	spin_unlock_bh(&nf_conntrack_lock);
427	nf_ct_expect_event(IPEXP_NEW, expect);
428	nf_ct_expect_put(expect);
429	return ret;
430out:
431	spin_unlock_bh(&nf_conntrack_lock);
432	return ret;
433}
434EXPORT_SYMBOL_GPL(nf_ct_expect_related);
435
436int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
437				u32 pid, int report)
438{
439	int ret;
440
441	spin_lock_bh(&nf_conntrack_lock);
442	ret = __nf_ct_expect_check(expect);
443	if (ret < 0)
444		goto out;
445	nf_ct_expect_insert(expect);
446out:
447	spin_unlock_bh(&nf_conntrack_lock);
448	if (ret == 0)
449		nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
450	return ret;
451}
452EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
453
454#ifdef CONFIG_PROC_FS
455struct ct_expect_iter_state {
456	struct seq_net_private p;
457	unsigned int bucket;
458};
459
460static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
461{
462	struct net *net = seq_file_net(seq);
463	struct ct_expect_iter_state *st = seq->private;
464	struct hlist_node *n;
465
466	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
467		n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
468		if (n)
469			return n;
470	}
471	return NULL;
472}
473
474static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
475					     struct hlist_node *head)
476{
477	struct net *net = seq_file_net(seq);
478	struct ct_expect_iter_state *st = seq->private;
479
480	head = rcu_dereference(head->next);
481	while (head == NULL) {
482		if (++st->bucket >= nf_ct_expect_hsize)
483			return NULL;
484		head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
485	}
486	return head;
487}
488
489static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
490{
491	struct hlist_node *head = ct_expect_get_first(seq);
492
493	if (head)
494		while (pos && (head = ct_expect_get_next(seq, head)))
495			pos--;
496	return pos ? NULL : head;
497}
498
499static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
500	__acquires(RCU)
501{
502	rcu_read_lock();
503	return ct_expect_get_idx(seq, *pos);
504}
505
506static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
507{
508	(*pos)++;
509	return ct_expect_get_next(seq, v);
510}
511
512static void exp_seq_stop(struct seq_file *seq, void *v)
513	__releases(RCU)
514{
515	rcu_read_unlock();
516}
517
518static int exp_seq_show(struct seq_file *s, void *v)
519{
520	struct nf_conntrack_expect *expect;
521	struct hlist_node *n = v;
522	char *delim = "";
523
524	expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
525
526	if (expect->timeout.function)
527		seq_printf(s, "%ld ", timer_pending(&expect->timeout)
528			   ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
529	else
530		seq_printf(s, "- ");
531	seq_printf(s, "l3proto = %u proto=%u ",
532		   expect->tuple.src.l3num,
533		   expect->tuple.dst.protonum);
534	print_tuple(s, &expect->tuple,
535		    __nf_ct_l3proto_find(expect->tuple.src.l3num),
536		    __nf_ct_l4proto_find(expect->tuple.src.l3num,
537				       expect->tuple.dst.protonum));
538
539	if (expect->flags & NF_CT_EXPECT_PERMANENT) {
540		seq_printf(s, "PERMANENT");
541		delim = ",";
542	}
543	if (expect->flags & NF_CT_EXPECT_INACTIVE)
544		seq_printf(s, "%sINACTIVE", delim);
545
546	return seq_putc(s, '\n');
547}
548
549static const struct seq_operations exp_seq_ops = {
550	.start = exp_seq_start,
551	.next = exp_seq_next,
552	.stop = exp_seq_stop,
553	.show = exp_seq_show
554};
555
556static int exp_open(struct inode *inode, struct file *file)
557{
558	return seq_open_net(inode, file, &exp_seq_ops,
559			sizeof(struct ct_expect_iter_state));
560}
561
562static const struct file_operations exp_file_ops = {
563	.owner   = THIS_MODULE,
564	.open    = exp_open,
565	.read    = seq_read,
566	.llseek  = seq_lseek,
567	.release = seq_release_net,
568};
569#endif /* CONFIG_PROC_FS */
570
571static int exp_proc_init(struct net *net)
572{
573#ifdef CONFIG_PROC_FS
574	struct proc_dir_entry *proc;
575
576	proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
577	if (!proc)
578		return -ENOMEM;
579#endif /* CONFIG_PROC_FS */
580	return 0;
581}
582
583static void exp_proc_remove(struct net *net)
584{
585#ifdef CONFIG_PROC_FS
586	proc_net_remove(net, "nf_conntrack_expect");
587#endif /* CONFIG_PROC_FS */
588}
589
590module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
591
592int nf_conntrack_expect_init(struct net *net)
593{
594	int err = -ENOMEM;
595
596	if (net_eq(net, &init_net)) {
597		if (!nf_ct_expect_hsize) {
598			nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
599			if (!nf_ct_expect_hsize)
600				nf_ct_expect_hsize = 1;
601		}
602		nf_ct_expect_max = nf_ct_expect_hsize * 4;
603	}
604
605	net->ct.expect_count = 0;
606	net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
607						  &net->ct.expect_vmalloc, 0);
608	if (net->ct.expect_hash == NULL)
609		goto err1;
610
611	if (net_eq(net, &init_net)) {
612		nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
613					sizeof(struct nf_conntrack_expect),
614					0, 0, NULL);
615		if (!nf_ct_expect_cachep)
616			goto err2;
617	}
618
619	err = exp_proc_init(net);
620	if (err < 0)
621		goto err3;
622
623	return 0;
624
625err3:
626	if (net_eq(net, &init_net))
627		kmem_cache_destroy(nf_ct_expect_cachep);
628err2:
629	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
630			     nf_ct_expect_hsize);
631err1:
632	return err;
633}
634
635void nf_conntrack_expect_fini(struct net *net)
636{
637	exp_proc_remove(net);
638	if (net_eq(net, &init_net))
639		kmem_cache_destroy(nf_ct_expect_cachep);
640	nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
641			     nf_ct_expect_hsize);
642}
643